# Code for Ice-Cube 3D CNN

- Oct 29, 2018: This code just makes plots for previously trained CNNs

In [2]:
import sys
import os

import matplotlib.pyplot as plt
import numpy as np
import glob
import pickle
import time

In [2]:
%matplotlib widget
# %matplotlib inline

Useful blog for keras conv3D: http://learnandshare645.blogspot.com/2016/06/3d-cnn-in-keras-action-recognition.html

In [3]:
# keras modules
import keras
from keras import layers, models, optimizers, callbacks  # or tensorflow.keras as keras
import tensorflow as tf
from sklearn.utils import shuffle
from sklearn.metrics import roc_curve, auc, roc_auc_score
from keras.models import load_model



Using TensorFlow backend.


## Modules

In [4]:
def f_get_ydata_and_wts(data_dir,f1,f2):
    ''' Load extracted data from files. Just extracting ydata and weights
    returns : inpy,weights as arrays
    '''

    inpy=np.loadtxt(data_dir+f1)
    wts=np.loadtxt(data_dir+f2)
    
    return inpy,wts
    
    
def f_plot_learning(history):
    
    fig=plt.figure()
    # Plot training & validation accuracy values
    fig.add_subplot(2,1,1)
    plt.plot(history['acc'],label='Train')
    plt.plot(history['val_acc'],label='Validation')
#     plt.title('Model accuracy')
    plt.ylabel('Accuracy')

    # Plot loss values
    fig.add_subplot(2,1,2)
    plt.plot(history['loss'],label='Train')
    plt.plot(history['val_loss'],label='Validation')
#     plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='best')


def f_plot_roc_curve(fpr,tpr):
    '''
    Module for roc plot and printing AUC
    '''
    plt.figure()
    # plt.plot(fpr,tpr)
    plt.scatter(fpr,tpr)
    plt.semilogx(fpr, tpr)
  # Zooms
    plt.xlim([10**-7,1.0])
    plt.ylim([0,1.0])
#     plt.xscale('log')
#     plt.xlim(1e-10,1e-5)
    plt.show()

    # AUC 
    auc_val = auc(fpr, tpr)
    print("AUC: ",auc_val)


def f_plot_fit(inpy,wts,model_dict,model_loc):
    '''
    Plot fit results.
    '''
    
    model_save_dir=model_loc
    model_name=model_dict['name'] # string for the model
    fname_model,fname_history='model_{0}.h5'.format(model_name),'history_{0}.pickle'.format(model_name)
    
        
    ########################
    ### Read model and history
    
    ### Check if files exist
    assert os.path.exists(model_save_dir+fname_model),"Model not saved"
    assert os.path.exists(model_save_dir+fname_history),"History not saved"
    
    model=load_model(model_save_dir+fname_model)
    with open(model_save_dir+fname_history,'rb') as f:
        history= pickle.load(f)
    
    ########################
    model.summary()
    # Plot tested model
    f_plot_learning(history)
    
    ########################
    # Get test predictions
    
    test_file_name=model_save_dir+'y-predict_model-'+str(model_name)+'.pred'
    test_y_file_name=model_save_dir+'y-test_model-'+str(model_name)+'.test'
    test_weights_file_name=model_save_dir+'wts-test_model-'+str(model_name)+'.test'    
    print("Using test prediction from previous test",test_file_name)

    assert os.path.exists(test_file_name),"y-predictions not saved"
    y_pred=np.loadtxt(test_file_name)
    ydata=np.loadtxt(test_y_file_name)
    wts=np.loadtxt(test_weights_file_name)
    assert(test_y.shape==y_pred.shape),"Data %s and prediction arrays %s are not of the same size"%(test_y.shape,y_pred.shape)
    
    fpr,tpr,threshold=roc_curve(ydata,y_pred,sample_weight=wts)
    print(fpr.shape,tpr.shape,threshold.shape)
    f_plot_roc_curve(fpr,tpr)
    
    model_dict['model'],model_dict['history']=model,history
    
    return model_dict



## Read part of test data

In [5]:
if __name__=='__main__':
    
    model_loc='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/Dec2_no-cut_saved_models/'

    ###Extract data : Only extract y-data and weights for tests, which has been saved already along with the model.
    ### Note!: the test file data is the same for all models, so just pick the first one. ###
    f1,f2='y-test_model-1.test','wts-test_model-1.test'
    inpy,wts=f_get_ydata_and_wts(model_loc,f1,f2)
    test_y,test_wts=inpy[:],wts[:]   
    



In [6]:
print(inpy.shape,wts.shape,test_y.shape,test_wts.shape)

(752604,) (752604,) (752604,) (752604,)


In [7]:
### Plot test data

# Plot y
plt.figure()
plt.plot(test_y)
plt.title("test data weights")
plt.show()

# Plot wts
plt.figure()
plt.plot(test_wts)
plt.title("test data weights")
plt.show()


FigureCanvasNbAgg()

FigureCanvasNbAgg()

## Plot fits

In [8]:
dict_list=[]
for i in range(1,6):
# for i in range(1,2):
    model_dict={'name':str(i),'description':None,'model':None,'history':None}
    print(i,model_dict)
    model_dict=f_plot_fit(test_y,test_wts,model_dict,model_loc)
    dict_list.append(model_dict)

1 {'name': '1', 'description': None, 'model': None, 'history': None}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 10, 20, 60, 1)     0         
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 10, 20, 60, 10)    280       
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 5, 10, 30, 10)     0         
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 5, 10, 30, 10)     2710      
_________________________________________________________________
max_pooling3d_2 (MaxPooling3 (None, 2, 5, 15, 10)      0         
_________________________________________________________________
conv3d_3 (Conv3D)            (None, 2, 5, 15, 10)      2710      
_________________________________________________________________
max_poo

FigureCanvasNbAgg()

Using test prediction from previous test /global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/Dec2_no-cut_saved_models/y-predict_model-1.pred
(720880,) (720880,) (720880,)


FigureCanvasNbAgg()

AUC:  0.9450023200792073
2 {'name': '2', 'description': None, 'model': None, 'history': None}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 10, 20, 60, 1)     0         
_________________________________________________________________
conv3d_4 (Conv3D)            (None, 10, 20, 60, 10)    280       
_________________________________________________________________
max_pooling3d_4 (MaxPooling3 (None, 5, 10, 30, 10)     0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 10, 30, 10)     0         
_________________________________________________________________
conv3d_5 (Conv3D)            (None, 5, 10, 30, 10)     2710      
_________________________________________________________________
max_pooling3d_5 (MaxPooling3 (None, 2, 5, 15, 10)      0         
________________________________________________

FigureCanvasNbAgg()

Using test prediction from previous test /global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/Dec2_no-cut_saved_models/y-predict_model-2.pred
(716798,) (716798,) (716798,)


FigureCanvasNbAgg()

AUC:  0.9293040136246258
3 {'name': '3', 'description': None, 'model': None, 'history': None}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 10, 20, 60, 1)     0         
_________________________________________________________________
conv3d_7 (Conv3D)            (None, 10, 20, 60, 6)     168       
_________________________________________________________________
max_pooling3d_7 (MaxPooling3 (None, 5, 10, 30, 6)      0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 5, 10, 30, 6)      0         
_________________________________________________________________
conv3d_8 (Conv3D)            (None, 5, 10, 30, 6)      978       
_________________________________________________________________
max_pooling3d_8 (MaxPooling3 (None, 2, 5, 15, 6)       0         
________________________________________________

FigureCanvasNbAgg()

Using test prediction from previous test /global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/Dec2_no-cut_saved_models/y-predict_model-3.pred
(700371,) (700371,) (700371,)


FigureCanvasNbAgg()

AUC:  0.9074157527957809
4 {'name': '4', 'description': None, 'model': None, 'history': None}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 10, 20, 60, 1)     0         
_________________________________________________________________
conv3d_10 (Conv3D)           (None, 10, 20, 60, 6)     168       
_________________________________________________________________
max_pooling3d_10 (MaxPooling (None, 5, 10, 30, 6)      0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 5, 10, 30, 6)      0         
_________________________________________________________________
conv3d_11 (Conv3D)           (None, 5, 10, 30, 6)      978       
_________________________________________________________________
max_pooling3d_11 (MaxPooling (None, 2, 5, 15, 6)       0         
________________________________________________

FigureCanvasNbAgg()

Using test prediction from previous test /global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/Dec2_no-cut_saved_models/y-predict_model-4.pred
(698928,) (698928,) (698928,)


FigureCanvasNbAgg()

AUC:  0.9082798107316151
5 {'name': '5', 'description': None, 'model': None, 'history': None}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 10, 20, 60, 1)     0         
_________________________________________________________________
conv3d_13 (Conv3D)           (None, 10, 20, 60, 6)     726       
_________________________________________________________________
max_pooling3d_13 (MaxPooling (None, 3, 6, 20, 6)       0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 3, 6, 20, 6)       0         
_________________________________________________________________
conv3d_14 (Conv3D)           (None, 3, 6, 20, 6)       4326      
_________________________________________________________________
max_pooling3d_14 (MaxPooling (None, 1, 2, 6, 6)        0         
________________________________________________

FigureCanvasNbAgg()

Using test prediction from previous test /global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/Dec2_no-cut_saved_models/y-predict_model-5.pred
(2,) (2,) (2,)


FigureCanvasNbAgg()

AUC:  0.5


### Comparing models

In [9]:
# dict_list[0]

In [10]:
## Comparing different models:

for md in dict_list:
#     print(md)
    hist=md['history']
#     print(hist)
#     print(md)
    print('Model %s'%(md['name']))
    for key in hist.keys():
        print(key,hist[key])
#     print(md[''])
        

Model 1
val_loss [0.3182131493045388, 0.28443256651377935, 0.24086589907892064, 0.21594179985525985, 0.21354722345885782, 0.20398141269038492, 0.20050183623733142, 0.20205124440033778, 0.20962622090042485, 0.19240169106923702, 0.20524825179164305, 0.18539710079321145, 0.1883549608755131, 0.19149937261760297, 0.19653342411696612, 0.19596384648549084, 0.18685260002111084, 0.1951380814973838, 0.19027778037361331, 0.18527841486090207]
val_acc [0.889180882811456, 0.889982628836132, 0.9130328270455659, 0.9225646964473919, 0.9228542158451916, 0.9251926417504964, 0.9267961337998484, 0.9298917642840139, 0.9200481047588257, 0.9283996258492003, 0.9228542158451916, 0.9314507148875505, 0.9308048639232281, 0.9307380517545052, 0.9315175270589283, 0.9282660015117543, 0.932942853322364, 0.9314284441646428, 0.9326533339245644, 0.9325197095871184]
loss [0.42471213855000045, 0.29682878761742165, 0.2591850371571779, 0.226465350202881, 0.21716913239568894, 0.2079125521610977, 0.2009007216224308, 0.193871315

## -----------------------------------------------

### Questions:


#### Notes:
- model.fit 
    - batch_size= sample of data used for training (subset of full training set). 
    - epoch= number of runs over training data
    - callbacks=
    
- for layers.Input need size (x,y,z,1) in channels_last mode.

#### Roc curve notes:
- We know y-value depending on signal or background (0 or 1).
- The 3D-Cnn gives us a prediction for y, as a float between 0 or 1.
- We must use a cut (threshold) to determine what constitues 0 / 1. Eg. 0.5
- This gives us a false +ve rate a, true +ve .(fpr and tpr)
- Roc curve plots this when varying the threshold
- AUC gives area under this curve.

In [11]:
# Plotting weights
# print(train_wts.shape,test_wts.shape)

# # Train data 
# plt.figure()
# plt.plot(train_wts)
# plt.title("train + cv data weigts ")
# plt.show()

plt.figure()
plt.plot(test_wts)
plt.title("test data weights")
plt.show()



FigureCanvasNbAgg()

## View created figures

In [12]:
# from IPython.display import IFrame


# model_loc='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/Nov5_saved_models/'
# fle='learning_model1.'
# fname=model_loc+"%s.pdf"%(fle)
# fname='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/Nov5_saved_models/learning_model1.eps'
# print(fname)
# print(os.path.exists(fname))
# IFrame(fname, width=600, height=300)

In [13]:
# from IPython.display import Image
# Image(fname)

## To do
- pick the best model
- test on reserve data set
- running with multiple cores on a batch node.
- using multiple nodes
- using GPU nodes
- Test a host of models using ipyparallel
- make changes to incorporate regular data in training and reserved data in testing
- way to store tested values for easy plotting


Nov 21, 2018
- Increase complications in model
- Try to reproduce Zahra' resnet-18 plot
- Get threshold from roc-curve to get the events that give 10^-7
- Get results for hesse-cut data

In [8]:
a=np.arange(10)
print(a)
shuffle_idx=np.random.randint(0,10,size=10)
print(shuffle_idx)

[0 1 2 3 4 5 6 7 8 9]
[6 9 2 2 3 8 9 8 2 8]


In [9]:
? np.random.

[0;31mDocstring:[0m
random_sample(size=None)

Return random floats in the half-open interval [0.0, 1.0).

Results are from the "continuous uniform" distribution over the
stated interval.  To sample :math:`Unif[a, b), b > a` multiply
the output of `random_sample` by `(b-a)` and add `a`::

  (b - a) * random_sample() + a

Parameters
----------
size : int or tuple of ints, optional
    Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
    ``m * n * k`` samples are drawn.  Default is None, in which case a
    single value is returned.

Returns
-------
out : float or ndarray of floats
    Array of random floats of shape `size` (unless ``size=None``, in which
    case a single float is returned).

Examples
--------
>>> np.random.random_sample()
0.47108547995356098
>>> type(np.random.random_sample())
<type 'float'>
>>> np.random.random_sample((5,))
array([ 0.30220482,  0.86820401,  0.1654503 ,  0.11659149,  0.54323428])

Three-by-two array of random numbers from [-5, 0):

>>> 

In [14]:
(52/230 * 24/32.0)**(-1)

5.897435897435898