<a href="https://colab.research.google.com/github/gnye8/RBP_Prediction/blob/main/Modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
#import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten

In [2]:
X_train = np.load('/content/X_train2.npy')
X_test = np.load('/content/X_test2.npy')
y_train = np.load('/content/y_train2.npy')
y_test = np.load('/content/y_test2.npy')

X_extval = np.load('/content/X_extval.npy')
y_extval = np.load('/content/y_extval.npy')

LeNet Architecture



source: https://www.kaggle.com/code/blurredmachine/lenet-architecture-a-complete-guide

In [3]:
LeNet = Sequential()
LeNet.add(Conv2D(128, kernel_size=(9,3), activation='relu', input_shape=(406,4,1), padding='same'))
LeNet.add(MaxPooling2D(pool_size=(3,1)))
LeNet.add(Conv2D(128, kernel_size=(9,3), activation='relu', padding='same'))
LeNet.add(MaxPooling2D(pool_size=(3,1)))
LeNet.add(Flatten())
LeNet.add(Dense(128, activation='relu'))
LeNet.add(Dense(64, activation='relu'))
LeNet.add(Dense(1, activation='sigmoid'))

LeNet.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 406, 4, 128)       3584      
                                                                 
 max_pooling2d (MaxPooling2  (None, 135, 4, 128)       0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 135, 4, 128)       442496    
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 45, 4, 128)        0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 23040)             0         
                                                                 
 dense (Dense)               (None, 128)               2

In [4]:
LeNet.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.legacy.Adam(), metrics=['accuracy'])

In [None]:
LeNet.fit(X_train, y_train, batch_size=256, epochs=50, verbose=1, validation_data=(X_test, y_test))

VGG Model Architecture


source: https://builtin.com/machine-learning/vgg16

In [5]:
vgg = Sequential()
vgg.add(Conv2D(64, kernel_size=(9,3), activation='relu', input_shape=[406,4,1], padding='same'))
vgg.add(MaxPooling2D(pool_size=(3,1), strides=(2,2)))
vgg.add(Conv2D(128, kernel_size=(9,3), activation='relu', padding='same'))
vgg.add(Conv2D(128, kernel_size=(9,3), activation='relu', padding='same'))
vgg.add(MaxPooling2D(pool_size=(3,1), strides=(2,2)))
vgg.add(Conv2D(filters=128, kernel_size=(9,3), padding="same", activation="relu"))
vgg.add(Conv2D(filters=128, kernel_size=(9,3), padding="same", activation="relu"))
vgg.add(Conv2D(filters=128, kernel_size=(9,3), padding="same", activation="relu"))
vgg.add(MaxPooling2D(pool_size=(3,1),strides=(2,2)))
vgg.add(Conv2D(filters=128, kernel_size=(9,3), padding="same", activation="relu"))
vgg.add(Conv2D(filters=128, kernel_size=(9,3), padding="same", activation="relu"))
vgg.add(Conv2D(filters=128, kernel_size=(9,3), padding="same", activation="relu"))
vgg.add(MaxPooling2D(pool_size=(3,1),strides=(2,2)))
vgg.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
vgg.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
vgg.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
vgg.add(MaxPooling2D(pool_size=(3,1),strides=(2,2)))
vgg.add(Flatten())
vgg.add(Dense(units=128, activation='relu'))
vgg.add(Dense(units=128, activation='relu'))
vgg.add(Dense(1, activation='sigmoid'))

print(vgg.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 406, 4, 64)        1792      
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 202, 2, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 202, 2, 128)       221312    
                                                                 
 conv2d_4 (Conv2D)           (None, 202, 2, 128)       442496    
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 100, 1, 128)       0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 100, 1, 128)      

In [6]:
vgg.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.legacy.Adam(), metrics=['accuracy'])

In [None]:
vgg.fit(X_train, y_train, batch_size=256, epochs=50, verbose=1, validation_data=(X_test, y_test))

LeNet Optimization

I will optimize LeNet over VGG because it is faster to train/fit and has comparable accuracy.

In order to improve accuracy, I will add an additional cycle of convolutions and max pooling, and in order to prevent overfitting, I will insert dropout layers between convolution/max pooling steps.

In [7]:
model = Sequential()
model.add(Conv2D(128, kernel_size=(9,3), activation='relu', input_shape=(406,4,1), padding='same'))
model.add(MaxPooling2D(pool_size=(3,1)))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=(9,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(3,1)))
model.add(Dropout(0.25))
model.add(Conv2D(256, kernel_size=(3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(3,1)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))

LeNet.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 406, 4, 128)       3584      
                                                                 
 max_pooling2d (MaxPooling2  (None, 135, 4, 128)       0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 135, 4, 128)       442496    
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 45, 4, 128)        0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 23040)             0         
                                                                 
 dense (Dense)               (None, 128)               2

In [8]:
model.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.legacy.Adam(), metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, batch_size=256, epochs=50, verbose=1, validation_data=(X_test, y_test))

### Parameter Tuning

We will now tune the parameters that we are using for model training.

Parameters we will focus on are:

- kernel size
- dropout size
- learning rate
- batch size