# Code for Ice-Cube 3D CNN

- Oct 29, 2018: This code just makes plots for previously trained CNNs

In [1]:
import sys
import os

import matplotlib.pyplot as plt
import numpy as np
import glob
import pickle
import time

In [2]:
%matplotlib widget
# %matplotlib inline

Useful blog for keras conv3D: http://learnandshare645.blogspot.com/2016/06/3d-cnn-in-keras-action-recognition.html

## Modules

In [3]:
def f_load_data(data_dir,f1,f2,f3):
    ''' Load extracted data from files. Three files for xdata,ydata,weights.
    arguments: data directory, f1,f2,f3 
    returns : inpx,inpy,weights as arrays
    '''

    inpx=np.load(data_dir+f1+'.npy')
    inpy=np.load(data_dir+f2+'.npy')
    wts=np.load(data_dir+f3+'.npy')
    print(inpx.shape,inpy.shape)
    
    return inpx,inpy,wts


def f_get_ydata_and_wts(data_dir,f1,f2):
    ''' Load extracted data from files. Just extracting ydata and weights
    returns : inpy,weights as arrays
    '''
    inpy=np.load(data_dir+f1+'.npy')
    wts=np.load(data_dir+f2+'.npy')
    
    return inpy,wts

## Read train and test data

In [4]:
def f_get_data(dir_name):    
    ### Extract regular data
    f1,f2='processed_input_regular_y','processed_input_regular_wts'
    i1y,i1wts=f_get_ydata_and_wts(data_dir,f1,f2)
    print("Num samples in regular data",i1y.shape[0])
    ### Extract reserved data
    f1,f2='processed_input_reserved_y','processed_input_reserved_wts'
    i2y,i2wts=f_get_ydata_and_wts(data_dir,f1,f2)
    print("Num samples in reserved data",i2y.shape[0])

    train_y,train_wts=i1y,i1wts
    test_y,test_wts=i2y,i2wts
    del(i1y,i1wts,i2y,i2wts)
    
    print("Number of signal events in train",train_y[train_y>0].shape[0])
    print("Number of signal events in test",test_y[test_y>0].shape[0])
    return(train_y,train_wts,test_y,test_wts)

def f_plot_data(title,y,wts):
    # Plot y
    fig=plt.figure()
    ax1=fig.add_subplot(211)
    plt.title("%s"%(title))
    ax1.plot(y)
    ax1.set_ylabel('y-data')
    # Plot wts
    ax2=fig.add_subplot(212)
    ax2.plot(wts)
    plt.setp(ax1.get_xticklabels(), visible=False)
    ax2.set_ylabel('weights')
#     plt.show()



## Regular data

In [5]:

data_dir='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/temp_data/'
train_y,train_wts,test_y,test_wts=f_get_data(data_dir)
f_plot_data('train_data',train_y,train_wts)
f_plot_data('test_data',test_y,test_wts)

Num samples in regular data 136066
Num samples in reserved data 752604
Number of signal events in train 26237
Number of signal events in test 17617


FigureCanvasNbAgg()

FigureCanvasNbAgg()

### Checking shuffled data


In [6]:

data_dir='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/temp_data/'
f1,f2,f3='shuffled_input_regular_x','shuffled_input_regular_y','shuffled_input_regular_wts'
i1x,i1y,i1wts=f_load_data(data_dir,f1,f2,f3)
f1,f2,f3='shuffled_input_reserved_x','shuffled_input_reserved_y','shuffled_input_reserved_wts'
i2x,i2y,i2wts=f_load_data(data_dir,f1,f2,f3)

f_plot_data('shuffled_data_regular',i1y,i1wts)
f_plot_data('shuffled_data_reserved',i2y,i2wts)



(136066, 10, 20, 60, 1) (136066,)
(752604, 10, 20, 60, 1) (752604,)


FigureCanvasNbAgg()

FigureCanvasNbAgg()

## Hesse cuts

In [7]:
data_dir='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/data_hesse_cuts/'

train_y,train_wts,test_y,test_wts=f_get_data(data_dir)
f_plot_data('train_data',train_y[:],train_wts[:])
f_plot_data('test_data',test_y[:],test_wts[:])


Num samples in regular data 136066
Num samples in reserved data 752604
Number of signal events in train 26237
Number of signal events in test 17617


FigureCanvasNbAgg()

FigureCanvasNbAgg()

### Checking shuffled data


In [8]:

data_dir='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/data_hesse_cuts_dec_25/'
f1,f2,f3='shuffled_input_regular_x','shuffled_input_regular_y','shuffled_input_regular_wts'
i1x,i1y,i1wts=f_load_data(data_dir,f1,f2,f3)
f1,f2,f3='shuffled_input_reserved_x','shuffled_input_reserved_y','shuffled_input_reserved_wts'
i2x,i2y,i2wts=f_load_data(data_dir,f1,f2,f3)

f_plot_data('shuffled_data_regular',i1y,i1wts)
f_plot_data('shuffled_data_reserved',i2y,i2wts)

FileNotFoundError: [Errno 2] No such file or directory: '/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/data_hesse_cuts_dec_25/shuffled_input_regular_x.npy'

### Checking raw data with shuffled data 

In [None]:
data_dir='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/temp_data/'
f1,f2,f3='shuffled_input_regular_x','shuffled_input_regular_y','shuffled_input_regular_wts'
i1x,i1y,i1wts=f_load_data(data_dir,f1,f2,f3)

f1,f2,f3='processed_input_regular_x','processed_input_regular_y','processed_input_regular_wts'
i2x,i2y,i2wts=f_load_data(data_dir,f1,f2,f3)


print(np.mean(i1x),np.mean(i2x),'\n',np.mean(i1y),np.mean(i2y),'\n',np.mean(i1wts),np.mean(i2wts))

The means agree to a good extent. I'm convinced the shuffling works fine.

## Checking shuffled data for two different runs

In [11]:
data_dir='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/data_regular/'
f1,f2,f3='shuffled_input_regular_x','shuffled_input_regular_y','shuffled_input_regular_wts'
i1x,i1y,i1wts=f_load_data(data_dir,f1,f2,f3)


data_dir='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/new_data_regular/'
f1,f2,f3='shuffled_input_regular_x','shuffled_input_regular_y','shuffled_input_regular_wts'
i2x,i2y,i2wts=f_load_data(data_dir,f1,f2,f3)


print(np.mean(i1x),np.mean(i2x),'\n',np.mean(i1y),np.mean(i2y),'\n',np.mean(i1wts),np.mean(i2wts))

(136066, 10, 20, 60, 1) (136066,)
(136066, 10, 20, 60, 1) (136066,)
1.3285437059286471 1.3285437059286471 
 0.1928255405464995 0.1928255405464995 
 0.0012851776562493081 0.0012851776562493081


In [12]:
data_dir='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/data_hesse_cuts/'
f1,f2,f3='shuffled_input_regular_x','shuffled_input_regular_y','shuffled_input_regular_wts'
i1x,i1y,i1wts=f_load_data(data_dir,f1,f2,f3)


data_dir='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/new_data_hesse_cuts/'
f1,f2,f3='shuffled_input_regular_x','shuffled_input_regular_y','shuffled_input_regular_wts'
i2x,i2y,i2wts=f_load_data(data_dir,f1,f2,f3)


print(np.mean(i1x),np.mean(i2x),'\n',np.mean(i1y),np.mean(i2y),'\n',np.mean(i1wts),np.mean(i2wts))

(136066, 10, 20, 60, 1) (136066,)
(136066, 10, 20, 60, 1) (136066,)
1.3285437059286471 1.3285437059286471 
 0.1928255405464995 0.1928255405464995 
 0.0012851776562493081 0.0012851776562493081
