# Code for Ice-Cube 3D CNN

- Oct 29, 2018: This code just makes plots for previously trained CNNs

In [1]:
import sys
import os

import matplotlib.pyplot as plt
import numpy as np
import glob
import pickle
import time

In [2]:
%matplotlib widget
# %matplotlib inline

Useful blog for keras conv3D: http://learnandshare645.blogspot.com/2016/06/3d-cnn-in-keras-action-recognition.html

In [3]:
# keras modules
import tensorflow.keras as keras
from tensorflow.keras import layers, models, optimizers, callbacks  # or tensorflow.keras as keras
import tensorflow as tf
from sklearn.utils import shuffle
from sklearn.metrics import roc_curve, auc, roc_auc_score
from tensorflow.keras.models import load_model



In [4]:
print(tf.__version__)
print(keras.__version__)

1.12.0
2.1.6-tf


## Modules

In [42]:
def f_get_ydata_and_wts(data_dir,f1,f2):
    ''' Load extracted data from files. Just extracting ydata and weights
    returns : inpy,weights as arrays
    '''

    inpy=np.loadtxt(data_dir+f1)
    wts=np.loadtxt(data_dir+f2)
    
    return inpy,wts
    
def f_plot_learning(history):
    
    fig=plt.figure()
    # Plot training & validation accuracy values
    fig.add_subplot(2,1,1)
    plt.plot(history['acc'],label='Train')
    plt.plot(history['val_acc'],label='Validation')
#     plt.title('Model accuracy')
    plt.ylabel('Accuracy')

    # Plot loss values
    fig.add_subplot(2,1,2)
    plt.plot(history['loss'],label='Train')
    plt.plot(history['val_loss'],label='Validation')
#     plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='best')


def f_plot_roc_curve(fpr,tpr):
    '''
    Module for roc plot and printing AUC
    '''
    plt.figure()
    # plt.plot(fpr,tpr)
    plt.scatter(fpr,tpr)
    plt.semilogx(fpr, tpr)
  # Zooms
    plt.xlim([10**-7,1.0])
    plt.ylim([0,1.0])
    # y=x line for comparison
    x=np.linspace(0,1,num=500)
    plt.plot(x,x)
#     plt.xscale('log')
#     plt.xlim(1e-10,1e-5)
    plt.show()

    # AUC 
    auc_val = auc(fpr, tpr)
    print("AUC: ",auc_val)


def f_plot_fit(inpy,wts,model_dict,model_loc):
    '''
    Plot fit results.
    '''
    
    model_save_dir=model_loc
    model_name=model_dict['name'] # string for the model
    fname_model,fname_history='model_{0}.h5'.format(model_name),'history_{0}.pickle'.format(model_name)
    
        
    ########################
    ### Read model and history
    
    ### Check if files exist
    assert os.path.exists(model_save_dir+fname_model),"Model not saved"
    assert os.path.exists(model_save_dir+fname_history),"History not saved"
    
    model=load_model(model_save_dir+fname_model)
    with open(model_save_dir+fname_history,'rb') as f:
        history= pickle.load(f)
    
    ########################
    model.summary()
    # Plot tested model
    f_plot_learning(history)
    
    ########################
    # Get test predictions
    
    test_file_name=model_save_dir+'y-predict_model-'+str(model_name)+'.pred'
    test_y_file_name=model_save_dir+'y-test_model-'+str(model_name)+'.test'
    test_weights_file_name=model_save_dir+'wts-test_model-'+str(model_name)+'.test'    
    print("Using test prediction from previous test",test_file_name)

    assert os.path.exists(test_file_name),"y-predictions not saved"
    y_pred=np.loadtxt(test_file_name)
    ydata=np.loadtxt(test_y_file_name)
    wts=np.loadtxt(test_weights_file_name)
    assert(test_y.shape[0]==y_pred.shape[0]),"Data %s and prediction arrays %s are not of the same size"%(test_y.shape,y_pred.shape)
    
    if y_pred.shape[1]==2:
        y_pred=y_pred[:,1]
    
    fpr,tpr,threshold=roc_curve(ydata,y_pred,sample_weight=wts)
    print(fpr.shape,tpr.shape,threshold.shape)
    f_plot_roc_curve(fpr,tpr)
    
    model_dict['model'],model_dict['history']=model,history
    
    return model_dict



## Read part of test data

In [10]:
if __name__=='__main__':
    
    model_loc='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/resnet50_test_Jan22/'

    ###Extract data : Only extract y-data and weights for tests, which has been saved already along with the model.
    ### Note!: the test file data is the same for all models, so just pick the first one. ###
    f1,f2='y-test_model-15.test','wts-test_model-15.test'
    inpy,wts=f_get_ydata_and_wts(model_loc,f1,f2)
    test_y,test_wts=inpy[:],wts[:]   
    

In [11]:
print(inpy.shape,wts.shape,test_y.shape,test_wts.shape)

(376302,) (376302,) (376302,) (376302,)


### View predicted data

In [16]:
# Plot 2D data for resnet50

pred_data=np.loadtxt(model_loc+'y-predict_model-15.pred')
print(pred_data.shape)

plt.figure()
plt.plot(pred_data[:,0])
plt.title("col1")
plt.show()

plt.figure()
plt.plot(pred_data[:,1])
plt.title("col2")
plt.show()

(376302, 2)


FigureCanvasNbAgg()

FigureCanvasNbAgg()

In [48]:
def f_plt_hist(ypred):
    ''' Plot a histogram of predictions'''
#     fname='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/Jan11_14_models/y-predict_model-1.pred'
#     y_pred=np.loadtxt(fname)
    print(ypred.shape)
    plt.figure()
    n,bins,patches=plt.hist(ypred, density=None, bins=300)
    plt.xlim(0,1)
    plt.show()
#     plt.close()

def f_get_prediction_info(arr,plot=False):
    ''' Function that prints info on predicted data.
        For example, number of zeroes and ones, plots, etc.
    '''
    num_total=arr.shape[0]

    num_zeros=arr[arr==0.0].shape[0]
    num_ones=arr[arr==1.0].shape[0]
    print("Pred 0's:\t%s,\tPred 1's:\t%s,Total:\t %s" %(num_zeros,num_ones,num_total))
    if plot:
        ### Plot data ###
        plt.figure()
        plt.plot(arr)
        plt.title("Prediction")
        plt.show()
        
        ### Plot histogram ###
        f_plt_hist(arr)
f_get_prediction_info(pred_data[:,1],plot=True)


Pred 0's:	376302,	Pred 1's:	0,Total:	 376302


FigureCanvasNbAgg()

(376302,)


FigureCanvasNbAgg()

## Plot fits

In [43]:
dict_list=[]
# for i in range(1,14):
for i in [15]:
    model_dict={'name':str(i),'description':None,'model':None,'history':None}
    print(i,model_dict)
    model_dict=f_plot_fit(test_y,test_wts,model_dict,model_loc)
    dict_list.append(model_dict)

15 {'name': '15', 'description': None, 'model': None, 'history': None}
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 10, 20, 60, 1 0                                            
__________________________________________________________________________________________________
zero_padding3d (ZeroPadding3D)  (None, 16, 26, 66, 1 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv3D)                  (None, 5, 10, 30, 64 22016       zero_padding3d[0][0]             
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 5, 10, 30, 64 120         conv1[0][0]                      
______________________________________

FigureCanvasNbAgg()

Using test prediction from previous test /global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/resnet50_test_Jan22/y-predict_model-15.pred
(2,) (2,) (2,)


FigureCanvasNbAgg()

AUC:  0.5


### Comparing models

In [10]:
## Comparing different models:

for md in dict_list:
#     print(md)
    hist=md['history']
#     print(hist)
#     print(md)
    print('Model %s'%(md['name']))
    for key in hist.keys():
        print(key,hist[key])
#     print(md[''])
        

Model 1
val_loss [0.3134767834455058, 0.25843456030571615, 0.23645994517857458, 0.21849576347822347, 0.2215722617574813, 0.2238299879161458, 0.2122572806424582, 0.20176069722927564, 0.19799405467398187, 0.19925194125436504, 0.20060270049501264, 0.19450749855844127, 0.19126327425502976, 0.1932224254755022, 0.19459983884151952, 0.19130340609275537, 0.19098688857982082, 0.19417386197745415, 0.18784048417064161, 0.18797720276331362]
val_acc [0.8894926729321634, 0.9099149258384928, 0.9145249654777247, 0.920025834035918, 0.9189345686134424, 0.9202262705447419, 0.9244799786174511, 0.9270856532003029, 0.9289786646447996, 0.9271747360892787, 0.9281323771743083, 0.9291345597051532, 0.9308494053690435, 0.9303817201879825, 0.9296913277778449, 0.9277537748875329, 0.9296245156091218, 0.9309384882606742, 0.9315620685047437, 0.9331210191082803]
loss [0.46066368408025493, 0.29600052525007436, 0.2521211558078528, 0.2246521741366262, 0.21325004046990356, 0.20517727015812565, 0.20065882695044993, 0.195246

## -----------------------------------------------

#### Notes on Roc curve:
- We know y-value depending on signal or background (0 or 1).
- The 3D-Cnn gives us a prediction for y, as a float between 0 or 1.
- We must use a cut (threshold) to determine what constitues 0 / 1. Eg. 0.5
- This gives us a false +ve rate a, true +ve .(fpr and tpr)
- Roc curve plots this when varying the threshold
- AUC gives area under this curve.