## Explore Overfitting and Underfitting

We'll explore two common regularization techniques—weight regularization and dropout—and

In [48]:
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import numpy
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dropout

In [55]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load  dataset

dataset = pd.read_csv('/home_l/francovm/Projects/SSE/data/processed/input_data.csv', sep='\t', encoding='utf-8' ,index_col=0)


X_data = dataset.drop(columns=['Events'])

#one-hot encode target column
Y_data = to_categorical(dataset.Events)


#get number of columns in training data
n_cols = X_data.shape[1]


In [56]:
# split into 67% for train and 33% for test
X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, test_size=0.33, random_state=seed)

# Baseline Model

In [57]:
baseline_model = Sequential()

#add layers to model
baseline_model.add(Dense(16, activation='relu', input_shape=(n_cols,)))
baseline_model.add(Dense(16, activation='relu'))
baseline_model.add(Dense(2, activation='sigmoid'))

baseline_model.compile(optimizer='adam',
                       loss='categorical_crossentropy',
                       metrics=['accuracy', 'binary_crossentropy'])

baseline_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_25 (Dense)             (None, 16)                112       
_________________________________________________________________
dense_26 (Dense)             (None, 16)                272       
_________________________________________________________________
dense_27 (Dense)             (None, 2)                 34        
Total params: 418
Trainable params: 418
Non-trainable params: 0
_________________________________________________________________


In [60]:

#set early stopping monitor so the model stops training when it won't improve anymore
early_stopping_monitor = EarlyStopping(patience=5)

# Fit the model

baseline_history = baseline_model.fit(X_train,y_train, 
                      epochs=20, 
                      validation_split=0.3,
                      batch_size=64,
                      callbacks=[early_stopping_monitor],
                      verbose=2)

Train on 106799 samples, validate on 45771 samples
Epoch 1/20


InternalError: CUDA runtime implicit initialization on GPU:0 failed. Status: out of memory

In [35]:
smaller_model = Sequential()

#add layers to model
smaller_model.add(Dense(4, activation='relu', input_shape=(n_cols,)))
smaller_model.add(Dense(4, activation='relu'))
smaller_model.add(Dense(2, activation='sigmoid'))

smaller_model.compile(optimizer='adam',
                       loss='categorical_crossentropy',
                       metrics=['accuracy', 'binary_crossentropy'])

smaller_model.summary()



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 4)                 28        
_________________________________________________________________
dense_14 (Dense)             (None, 4)                 20        
_________________________________________________________________
dense_15 (Dense)             (None, 2)                 10        
Total params: 58
Trainable params: 58
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Fit the model

smaller_history = smaller_model.fit(X_train,y_train, 
                      epochs=20, 
                      validation_split=0.3,
                      batch_size=64,
                      callbacks=[early_stopping_monitor],
                      verbose=2)

In [None]:

bigger_model = Sequential()

#add layers to model
bigger_model.add(Dense(512, activation='relu', input_shape=(n_cols,)))
bigger_model.add(Dense(512,activation='relu'))
bigger_model.add(Dense(2, activation='sigmoid'))

bigger_model.compile(optimizer='adam',
                       loss='categorical_crossentropy',
                       metrics=['accuracy', 'binary_crossentropy'])

bigger_model.summary()


In [None]:
# Fit the model

bigger_history = bigger_model.fit(X_train,y_train, 
                      epochs=20, 
                      validation_split=0.3,
                      batch_size=64,
                      callbacks=[early_stopping_monitor],
                      verbose=2)

In [None]:
def plot_history(histories, key='binary_crossentropy'):
  plt.figure(figsize=(16,10))

  for name, history in histories:
    val = plt.plot(history.epoch, history.history['val_'+key],
                   '--', label=name.title()+' Val')
    plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
             label=name.title()+' Train')

  plt.xlabel('Epochs')
  plt.ylabel(key.replace('_',' ').title())
  plt.legend()

  plt.xlim([-2,max(history.epoch)])


plot_history([('baseline', baseline_history),
              ('smaller', smaller_history),
              ('bigger', bigger_history)])

# Regularization

## *L2*

In [None]:
l2_model = Sequential()

#add layers to model
l2_model.add(Dense(512, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001),input_shape=(n_cols,)))
l2_model.add(Dense(512,activation='relu',kernel_regularizer=keras.regularizers.l2(0.001)))
l2_model.add(Dense(2, activation='sigmoid'))

l2_model.compile(optimizer='adam',
                       loss='categorical_crossentropy',
                       metrics=['accuracy', 'binary_crossentropy'])

l2_model.summary()

In [None]:

# Fit the model

l2_model_history =  l2_model.fit(X_train,y_train, 
                      epochs=20, 
                      validation_split=0.3,
                      batch_size=64,
                      callbacks=[early_stopping_monitor],
                      verbose=2)

In [None]:
plot_history([('baseline', bigger_history),
              ('l2', l2_model_history)])

## *Dropout*

In [None]:
dpt_model = Sequential()

#add layers to model
dpt_model.add(Dense(512, activation='relu',input_shape=(n_cols,)))
dpt_model.add(Dropout(0.5))
dpt_model.add(Dense(512,activation='relu'))
dpt_model.add(Dropout(0.5))
dpt_model.add(Dense(2, activation='sigmoid'))

dpt_model.compile(optimizer='adam',
                       loss='categorical_crossentropy',
                       metrics=['accuracy', 'binary_crossentropy'])

dpt_model.summary()


# Fit the model

dpt_model_history =  dpt_model.fit(X_train,y_train, 
                      epochs=20, 
                      validation_split=0.3,
                      batch_size=64,
                      callbacks=[early_stopping_monitor],
                      verbose=2)

In [None]:
plot_history([('baseline', bigger_history),
              ('dropout', dpt_model_history)])

In [None]:
# evaluate the model
scores = bigger_model.evaluate(X_test,y_test)
print((scores[1]*100))

# USE THIS FUNCTION TO FIND BATCH SIZE, JUST EXACTLY BEFORE STARTING YOUR TRAINING/ TESTING.

In [None]:
def FindBatchSize(model):
    """#model: model architecture, that is yet to be trained"""
    import os, sys, psutil, gc, tensorflow, keras
    import numpy as np
    from keras import backend as K
    BatchFound= 16

    try:
        total_params= int(model.count_params());    GCPU= "CPU"
        #find whether gpu is available
        try:
            if K.tensorflow_backend._get_available_gpus()== []:
                GCPU= "CPU";    #CPU and Cuda9GPU
            else:
                GCPU= "GPU"
        except:
            from tensorflow.python.client import device_lib;    #Cuda8GPU
            def get_available_gpus():
                local_device_protos= device_lib.list_local_devices()
                return [x.name for x in local_device_protos if x.device_type == 'GPU']
            if "gpu" not in str(get_available_gpus()).lower():
                GCPU= "CPU"
            else:
                GCPU= "GPU"

        #decide batch size on the basis of GPU availability and model complexity
        if (GCPU== "GPU") and (os.cpu_count() >15) and (total_params <1000000):
            BatchFound= 64    
        if (os.cpu_count() <16) and (total_params <500000):
            BatchFound= 64  
        if (GCPU== "GPU") and (os.cpu_count() >15) and (total_params <2000000) and (total_params >=1000000):
            BatchFound= 32      
        if (GCPU== "GPU") and (os.cpu_count() >15) and (total_params >=2000000) and (total_params <10000000):
            BatchFound= 16  
        if (GCPU== "GPU") and (os.cpu_count() >15) and (total_params >=10000000):
            BatchFound= 8       
        if (os.cpu_count() <16) and (total_params >5000000):
            BatchFound= 8    
        if total_params >100000000:
            BatchFound= 1

    except:
        pass
    try:

        #find percentage of memory used
        memoryused= psutil.virtual_memory()
        memoryused= float(str(memoryused).replace(" ", "").split("percent=")[1].split(",")[0])
        if memoryused >75.0:
            BatchFound= 8
        if memoryused >85.0:
            BatchFound= 4
        if memoryused >90.0:
            BatchFound= 2
        if total_params >100000000:
            BatchFound= 1
        print("Batch Size:  "+ str(BatchFound));    gc.collect()
    except:
        pass

    memoryused= [];    total_params= [];    GCPU= "";
    del memoryused, total_params, GCPU;    gc.collect()
    return BatchFound



#####################################################################################################
#####################################################################################################

In [None]:
FindBatchSize(baseline_model)