<a href="https://colab.research.google.com/github/anamika-yadav99/Screening-task/blob/main/E2E_E_P_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import h5py

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import tensorflow as tf 
from tensorflow import keras 
import matplotlib.pyplot as plt 
import sys 
from tensorflow.keras.optimizers import Adam, SGD, RMSprop

from sklearn.model_selection import train_test_split
from keras.models import Sequential 
from keras.layers import Dense, Dropout, Flatten 
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import gradient_descent_v2
from keras.regularizers import l2
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import roc_curve, auc


In [4]:
def load_dataset():
  #load dataset
    f_electron = h5py.File('/content/drive/MyDrive/Colab Notebooks/task1/SingleElectronPt50_IMGCROPS_n249k_RHv1.hdf5', 'r')
    f_photon = h5py.File('/content/drive/MyDrive/Colab Notebooks/task1/SinglePhotonPt50_IMGCROPS_n249k_RHv1.hdf5', 'r')

    f_electron.keys()
    f_photon.keys()

    X_electron = f_electron.get('X')
    y_electron = f_electron.get('y')
    X_photon = f_photon.get('X')
    y_photon = f_photon.get('y')
    

    X_electron = np.array(X_electron)
    y_electron = np.array(y_electron)
    X_photon = np.array(X_photon)
    y_photon = np.array(y_photon)

    f_electron.close()
    f_photon.close

    X = np.concatenate([X_electron, X_photon])
    y = np.concatenate([y_electron, y_photon])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state= 0, shuffle = True)

    

    return X_train, y_train, X_test, y_test

In [5]:
def validation_split(X_test, y_test, valid_X, valid_Y, v_split):

    index_of_validation = int(v_split * len(X_test))
    valid_X.extend(X_test[-index_of_validation:])
    valid_Y.extend(y_test[-index_of_validation:])
    X_test = X_test[:-index_of_validation]
    y_test = y_test[:-index_of_validation]
    return X_test, y_test, np.asarray(valid_X), np.asarray(valid_Y)

In [6]:
def model_VGG():
  model = Sequential()

  #vgg block_1
  model.add(Conv2D(filters = 32, activation='relu', kernel_size=(3,3), padding='same', kernel_initializer='TruncatedNormal', input_shape=(32, 32, 2)))
  model.add(Conv2D(filters= 32, activation='relu', kernel_size=(3,3), padding='same', kernel_initializer='TruncatedNormal'))
  model.add(MaxPooling2D(pool_size=(2, 2)))

  #vgg block_2
  model.add(Conv2D(filters = 64, activation='relu', kernel_size=(3,3), padding='same', kernel_initializer='TruncatedNormal'))
  model.add(Conv2D(filters = 64, activation='relu', kernel_size=(3,3), padding='same', kernel_initializer='TruncatedNormal'))
  model.add(MaxPooling2D(pool_size=(2, 2)))

  #Fully connected layer
  model.add(Flatten())
  model.add(Dense(256, activation='relu', kernel_initializer='TruncatedNormal'))
  model.add(Dropout(0.2))
  model.add(Dense(128, activation='relu', kernel_initializer='TruncatedNormal'))
  model.add(Dropout(0.2))
  model.add(Dense(1, activation='sigmoid', kernel_initializer='TruncatedNormal'))
  
  #compile model
  opt = Adam(lr= 1.e-3)
  model.compile(loss='binary_crossentropy',optimizer = opt , metrics=['accuracy'])
  model.summary()

  return model


In [7]:
def summarize_diagnostics(history):
    plt.subplots(figsize = (7,7))
    # plot loss
    plt.subplot(211)
    plt.title('Cross Entropy Loss')
    plt.plot(history.history['loss'], color='blue', label='train')
    plt.plot(history.history['val_loss'], color='orange', label='test')

    # plot accuracy
    plt.subplot(212)
    plt.title('Classification Accuracy')
    plt.plot(history.history['accuracy'], color='blue', label='train')
    plt.plot(history.history['val_acc'], color='orange', label='test')
    plt.show()
    # save plot to file
    filename = sys.argv[0].split('/')[-1]
    plt.savefig(filename + '_plot.png')
    plt.close()

In [8]:
def test_model():
    X_train, y_train, X_test, y_test = load_dataset()

    #get validation set 
    valid_X = []
    valid_Y = []
    X_test, y_test, validX, validY = validation_split(X_test, y_test, valid_X, valid_Y,v_split=0.5)

    model = model_VGG()
    

    ##create data generator 
    #datagen = ImageDataGenerator(width_shift_range = 0.1, height_shift_range = 0.1, horizontal_flip = True)
    #iterator 
    #train = datagen.flow(X_train, y_train, batch_size = 64)
    

    #checkpoint for early stopping 
    checkpoint = ModelCheckpoint("vgg16_1.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
    early = EarlyStopping(monitor='val_acc', min_delta=0, patience=3, verbose=1, mode='auto')
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1.e-6)

    # fit model
    steps = int(X_train.shape[0]/ 64)
    history = model.fit(X_train, y_train,
                        batch_size = 64, 
                        steps_per_epoch = steps, 
                        epochs=50, 
                        validation_data=(validX, validY),
                        verbose=1, shuffle = True ,
                        callbacks=[reduce_lr])
    
    #evaluate on validation dataset
    score = model.evaluate(validX, validY , verbose=1)
    print(f'Test loss: {score[0]} / Test accuracy: {score[1]}') 
    y_pred = model.predict(validX)
    fpr, tpr, _ = roc_curve(validY, y_pred)
    ROC = auc(fpr, tpr)
    print('Validation ROC AUC: ',ROC)

    #evaluate on test dataset
    score = model.evaluate(X_test, y_test, verbose=1)
    print(f'Test loss: {score[0]} / Test accuracy: {score[1]}') 
    y_pred = model.predict(X_test)
    fpr, tpr, _ = roc_curve(y_test, y_pred)
    ROC = auc(fpr, tpr)
    print('Test ROC AUC: ',ROC)
    #summarize_diagnostics(history)
    return history

def main():
	test_model()

if __name__ == "__main__":
    main()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 32)        608       
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 16, 32)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 16, 64)        18496     
                                                                 
 conv2d_3 (Conv2D)           (None, 16, 16, 64)        36928     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 8, 8, 64)         0         
 2D)                                                    

  super(Adam, self).__init__(name, **kwargs)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test loss: 0.5291250944137573 / Test accuracy: 0.7429718971252441
Validation ROC AUC:  0.8128957743884982
Test loss: 0.5336227416992188 / Test accuracy: 0.7382597327232361
Test ROC AUC:  0.8091968447503954
