# Code for Ice-Cube 3D CNN

- Oct 29, 2018: This code just makes plots for previously trained CNNs

In [14]:
import sys
import os

import matplotlib.pyplot as plt
import numpy as np
import glob
import pickle
import time

In [15]:
%matplotlib widget
# %matplotlib inline

Useful blog for keras conv3D: http://learnandshare645.blogspot.com/2016/06/3d-cnn-in-keras-action-recognition.html

## Modules

In [16]:
def f_load_data(data_dir,f1,f2,f3,mode=False):
    ''' Load extracted data from files. Three files for xdata,ydata,weights.
    arguments: data directory, f1,f2,f3 
    returns : inpx,inpy,weights as arrays
    '''
    m='r' if mode else None
    inpx=np.load(data_dir+f1+'.npy',mmap_mode=m)
    inpy=np.load(data_dir+f2+'.npy',mmap_mode=m)
    wts=np.load(data_dir+f3+'.npy',mmap_mode=m)
    print(inpx.shape,inpy.shape)
    
    return inpx,inpy,wts


## Read train and test data

In [17]:

def f_plot_data(title,y,wts):
    '''
    Plot function for ydata and weights
    '''
    # Plot y
    fig=plt.figure()
    ax1=fig.add_subplot(211)
    plt.title("%s"%(title))
    ax1.plot(y)
    ax1.set_ylabel('y-data')
    # Plot wts
    ax2=fig.add_subplot(212)
    ax2.plot(wts)
    plt.setp(ax1.get_xticklabels(), visible=False)
    ax2.set_ylabel('weights')
#     plt.show()

def f_sig_bg_summary(y_arr):
    
    num=y_arr.shape[0]
    sig=np.count_nonzero(y_arr) # Signal corresponds to y==1
    print("Signal %s, %s %s"%(sig,sig/num,'%'))

## Extract data

In [18]:
data_dir='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/data_regular/'


### Extract regular data
f1,f2,f3='shuffled_input_regular_x','shuffled_input_regular_y','shuffled_input_regular_wts'
f1,f2,f3='processed_input_regular_x','processed_input_regular_y','processed_input_regular_wts'
i1x,i1y,i1wts=f_load_data(data_dir,f1,f2,f3,mode=True)

### Extract reserved data
f1,f2,f3='shuffled_input_reserved_x','shuffled_input_reserved_y','shuffled_input_reserved_wts'
f1,f2,f3='processed_input_reserved_x','processed_input_reserved_y','processed_input_reserved_wts'
i2x,i2y,i2wts=f_load_data(data_dir,f1,f2,f3,mode=True)


(136066, 10, 20, 60, 1) (136066,)
(752604, 10, 20, 60, 1) (752604,)


## Data overview

In [19]:
print("Regular data")
print("Num samples in regular data",i1y.shape[0])
f_sig_bg_summary(i1y)
print("Reserved")
print("Num samples in reserved data",i2y.shape[0])
f_sig_bg_summary(i2y)

Regular data
Num samples in regular data 136066
Signal 26237, 0.1928255405464995 %
Reserved
Num samples in reserved data 752604
Signal 17617, 0.023408060547113755 %


## View Y data and weights

In [20]:

data_dir='/global/project/projectdirs/dasrepo/vpa/ice_cube/data_for_cnn/extracted_data_v/data/temp_data/'
f_plot_data('regular_data',i1y,i1wts)
f_plot_data('reserved_data',i2y,i2wts)

FigureCanvasNbAgg()

FigureCanvasNbAgg()

## X data summary

In [21]:
def f_gist_xdata(x_arr):
    
    arr=x_arr[:,:,:,:,0]
    num_samples=arr.shape[0]
    print(num_samples)

    # Find number of non-zero 3D images
    non_zero_count_3d=[np.count_nonzero(i) for i in arr]
    print("Number of 3d images that are zero:\t",np.count_nonzero(non_zero_count_3d)-num_samples)

    # Find number of non-zero 2D images
    img_arr=arr.reshape(num_samples*10,20,60)
    num_2d_img=num_samples*10
    # print(img_arr.shape)
    ## First get the True-False value of whether a 2d array has non-zero values. Then pick the True values. 
    ## This gives the number of 2d images that have atleast one non-zero values.
    non_zero_count_2d=np.count_nonzero(np.array([np.any(i) for i in img_arr]))
    zero_count=num_2d_img-non_zero_count_2d
    print("2d images that are zero:\t%s\t%s%s"%(zero_count,zero_count*100/num_2d_img,'%'))



In [22]:
f_gist_xdata(i1x)
f_gist_xdata(i2x)

136066
Number of 3d images that are zero:	 0
2d images that are zero:	369847	27.18144135934032%
752604
Number of 3d images that are zero:	 0
2d images that are zero:	2063841	27.42266849498541%


## Detailed image analysis

In [23]:
# Extract images for a few signal and background cases, for further analysis
y1_idx_list=np.where(i1y==1)[0]
idx1=y1_idx_list[:10]
y0_idx_list=np.where(i1y==0)[0]
idx0=y0_idx_list[:10]
# Xarrays for signal and background
x_0=i1x[idx0][:,:,:,:,0]
x_1=i1x[idx1][:,:,:,:,0]

In [24]:
### Code to analyze a specific sample in detail

def f_analyze_sample(x):
    
    def f_get_sample_image_info(arr):
        ''' Module to find the number of non-zero images in a sample and number of non-zero spots in each image of the sample.
        Sample array size is (10,20,60)'''

        img_count=[np.count_nonzero(i) for i in arr]
        non_zero_count=np.count_nonzero(img_count)

        dict_keys=['img_count','non_zero_count']
        image_dict={key:val for (key,val) in zip(dict_keys,[img_count,non_zero_count])}

        return(image_dict)


    def f_plot_images(arr):
        ''' Plots 2D images for each of the 10 in the 3rd dimension'''

        print(arr.shape)

        rows,cols=2,5
        fig,axarr=plt.subplots(rows,cols,figsize=(10,2))
        for i in range(rows*cols):
            row,col=int(i/cols),i%cols
        #     print(i,'\t',row,col)
            axarr[row,col].imshow(arr[i,:,:],origin='lower',alpha=0.9)
            axarr[row,col].set_xticks(np.arange(0,62,10))
            axarr[row,col].set_yticks(np.arange(0,22,10))

        fig.subplots_adjust(hspace=0.0)
        # Drop axis labels
        temp=plt.setp([a.get_xticklabels() for a in axarr[:-1, :].flatten()], visible=False)
        temp=plt.setp([a.get_yticklabels() for a in axarr[:,1:].flatten()], visible=False)

        plt.close()

        
    ### Function begins ###
    print(f_get_sample_image_info(x))
    f_plot_images(x)


In [25]:
for sample_no,i in enumerate(x_0):
    print("Sample",sample_no,'\t',f_analyze_sample(i))

{'img_count': [0, 0, 0, 1, 10, 49, 97, 139, 100, 42], 'non_zero_count': 7}
(10, 20, 60)


FigureCanvasNbAgg()

Sample 0 	 None
{'img_count': [0, 0, 1, 28, 87, 213, 249, 231, 188, 112], 'non_zero_count': 8}
(10, 20, 60)


FigureCanvasNbAgg()

Sample 1 	 None
{'img_count': [2, 22, 57, 109, 154, 102, 26, 0, 0, 0], 'non_zero_count': 7}
(10, 20, 60)


FigureCanvasNbAgg()

Sample 2 	 None
{'img_count': [1, 8, 20, 31, 34, 30, 18, 14, 5, 0], 'non_zero_count': 9}
(10, 20, 60)


FigureCanvasNbAgg()

Sample 3 	 None
{'img_count': [0, 9, 25, 41, 63, 70, 39, 11, 1, 0], 'non_zero_count': 8}
(10, 20, 60)


FigureCanvasNbAgg()

Sample 4 	 None
{'img_count': [0, 0, 0, 0, 0, 2, 12, 51, 83, 127], 'non_zero_count': 5}
(10, 20, 60)


FigureCanvasNbAgg()

Sample 5 	 None
{'img_count': [0, 0, 0, 0, 0, 0, 1, 45, 110, 162], 'non_zero_count': 4}
(10, 20, 60)


FigureCanvasNbAgg()

Sample 6 	 None
{'img_count': [0, 8, 44, 99, 122, 73, 14, 0, 0, 0], 'non_zero_count': 6}
(10, 20, 60)


FigureCanvasNbAgg()

Sample 7 	 None
{'img_count': [64, 68, 39, 7, 0, 0, 0, 0, 0, 0], 'non_zero_count': 4}
(10, 20, 60)


FigureCanvasNbAgg()

Sample 8 	 None
{'img_count': [49, 125, 149, 130, 72, 14, 0, 0, 0, 0], 'non_zero_count': 6}
(10, 20, 60)


FigureCanvasNbAgg()

Sample 9 	 None


In [26]:
def f_view_non_zero(x):
    ''' View parts of the data that are non-zero'''
    
    flt_x=x.flatten()
    x_non_zero=x.flatten()[np.where(x.flatten()>0.0)[0]]

    print("non_zero size %s, full size %s, "%(x_non_zero.shape,flt_x.shape))
    plt.figure()
    plt.plot(x_non_zero)
    plt.show()
f_view_non_zero(x_0)

non_zero size (4109,), full size (120000,), 


FigureCanvasNbAgg()

Parameters to incorporate
- tpr, fpr
- y==1, y==0 for predictions