# <center>Dense networks</center>

In [1]:
import tensorflow as tf
from tensorflow.keras import Sequential, backend as K
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow.keras as keras
import tensorflow_hub as hub
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.model_selection import ParameterGrid, GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import graphviz
%matplotlib inline
sns.set()



We will load data from data.npz file.

In [2]:
# Load all numpy arrays except training images (in order to save memory).
def load_data():
    with np.load('data.npz', allow_pickle=True) as npz_file:
        #X_train = npz_file['X_train']
        X_valid = npz_file['X_valid']
        X_test = npz_file['X_test']
        X_train_features = npz_file['X_train_features']
        X_valid_features = npz_file['X_valid_features']
        X_test_features = npz_file['X_test_features']
        y_train_1h = npz_file['y_train_1h']
        y_valid_1h = npz_file['y_valid_1h']
        y_test_1h = npz_file['y_test_1h']
        y_train = npz_file['y_train']
        y_valid = npz_file['y_valid']
        y_test = npz_file['y_test']
        class_indices = npz_file['class_indices']
        train_filenames = npz_file['train_filenames']
        valid_filenames = npz_file['valid_filenames']
        test_filenames = npz_file['test_filenames']
    return X_train_features, y_train_1h, y_train, train_filenames, X_valid,X_valid_features, y_valid_1h, y_valid, valid_filenames, X_test, X_test_features, y_test_1h, y_test, test_filenames,class_indices

# load training images
def load_images():
    with np.load('data.npz', allow_pickle=True) as npz_file:
        X_train = npz_file['X_train']
    return X_train

# merge two dictionaries
def merge_dict(x,y):
    d = x.copy()
    d.update(y)
    return d

# fit a model
def fit_model(model, grid, X_tr, y_tr, X_va, y_va, X_t, y_t):
    valid_scores = []
    train_scores = []
    params = []
    for params_dict in grid:
        #print (params_dict)
        params.append(params_dict)
        model.set_params(**params_dict)
        model.fit(X_tr, y_tr)
        train_scores.append(model.score(X_tr, y_tr))
        valid_scores.append(model.score(X_va, y_va))
    best_index = np.argmax(valid_scores)
    # refit model with best params
    model.set_params(**params[best_index])
    model.fit(X_tr, y_tr)
    test_score = model.score(X_t, y_t)
    return { 'best_params':params[best_index],
              'params':params,
             'train_scores':train_scores,
             'valid_scores':valid_scores,
             'test_score':test_score,
             'best_index':best_index,
              'best_valid_score':valid_scores[best_index],
              'best_model':model
            }

X_train_features, y_train_1h, y_train, train_filenames, X_valid,X_valid_features, y_valid_1h, y_valid, valid_filenames, X_test, X_test_features, y_test_1h, y_test, test_filenames,class_indices = load_data()


## Dense network
Finally, try with neural networks
- layer dense network i.e. no hidden layer, just the input and output ones

This is an example of 1 layer neural network for our problem.

In [3]:
nn_1l_model = Sequential()
nn_1l_model.add(Dense(6, activation='softmax', input_dim=X_train_features.shape[1],
                     kernel_initializer=keras.initializers.VarianceScaling(scale=1.0, seed=0),
                     kernel_regularizer=keras.regularizers.l2(10**1),
                     activity_regularizer=keras.regularizers.l1(10**1)
                     )
               )
nn_1l_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 6)                 12294     
Total params: 12,294
Trainable params: 12,294
Non-trainable params: 0
_________________________________________________________________


We will try fitting our model with regularization using a grid of C parameters. We will also stop training when there is overfitting by using EarlyStopping callback.
Training will stop when validation accuracy starts to go down.

In [4]:
np.random.seed(0)
tf.set_random_seed(0)
scaler = StandardScaler()
X_train_preprocessed = scaler.fit_transform(X_train_features).astype(float)
X_valid_preprocessed = scaler.fit_transform(X_valid_features).astype(float)
X_test_preprocessed = scaler.transform(X_test_features).astype(float)
nn_test=[]
for C in [10**i for i in range(-3,3)]:
    nn_1l_model = Sequential()
    nn_1l_model.add(Dense(6, activation='softmax', input_dim=X_train_features.shape[1],
                         kernel_initializer=keras.initializers.VarianceScaling(scale=1.0, seed=0),
                         kernel_regularizer=keras.regularizers.l2(C),
                         activity_regularizer=keras.regularizers.l1(C)
                         )
                   )
    nn_1l_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
    nn_1l_fit = nn_1l_model.fit(
                            x=X_train_preprocessed, y=y_train_1h,
                            validation_data=(X_valid_preprocessed, y_valid_1h), batch_size=32, epochs=50,
                            callbacks=[keras.callbacks.EarlyStopping(monitor='val_acc', patience=5)]
                            ,shuffle=True
                            )
    (test_loss, test_accuracy) = nn_1l_model.evaluate(X_test_preprocessed, y_test_1h, batch_size=100)
    nn_test.append((C,test_accuracy))
    print("Test Accuracy :", test_accuracy)

Train on 1400 samples, validate on 139 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Test Accuracy : 0.9599999785423279
Train on 1400 samples, validate on 139 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Test Accuracy : 0.9599999785423279
Train on 1400 samples, validate on 139 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Test Accuracy : 0.9399999976158142
Train on 1400 samples, validate on 139 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Test Accuracy : 0.9399999976158142
Train on 1400 samples, validate on 139 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Test Accuracy : 0.9399999976158142
Train on 1400 samples, validate on 139 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50

In [5]:
nn_test

[(0.001, 0.9599999785423279),
 (0.01, 0.9599999785423279),
 (0.1, 0.9399999976158142),
 (1, 0.9399999976158142),
 (10, 0.9399999976158142),
 (100, 0.8199999928474426)]

The best test accuracy for this model is 96% for C=0.01 and 0.001.

- layer dense network i.e. one hidden layer

For this 2 layers nn, we will do reguralization using dropout instead of l2 regularization.
We will try many number of units.

In [6]:
np.random.seed(0)
tf.set_random_seed(0)
nn2_test=[]
for units in [32,64,128]:
    nn_2l_model = Sequential()
    # first layer : relu
    nn_2l_model.add(Dense(units=units, activation='relu', input_dim=X_train_features.shape[1],
                         kernel_initializer=keras.initializers.VarianceScaling(scale=2.0, seed=0)
                         )
                   )
    # dropout for regularization
    nn_2l_model.add(keras.layers.Dropout(0.5))
    #second layer : softmax
    nn_2l_model.add(Dense(6, activation='softmax',
                         kernel_initializer=keras.initializers.VarianceScaling(scale=1.0, seed=0)
                         )
                   )
    nn_2l_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
    nn_1l_fit = nn_2l_model.fit(
                            x=X_train_preprocessed, y=y_train_1h,
                            validation_data=(X_valid_preprocessed, y_valid_1h), batch_size=32, epochs=50,
                            callbacks=[keras.callbacks.EarlyStopping(monitor='val_acc', patience=5)]
                            ,shuffle=True
                            )
    (test_loss, test_accuracy) = nn_2l_model.evaluate(X_test_preprocessed, y_test_1h, batch_size=100)
    print("Test Accuracy :", test_accuracy)
    nn2_test.append((units,test_accuracy))

Train on 1400 samples, validate on 139 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Test Accuracy : 0.8999999761581421
Train on 1400 samples, validate on 139 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Test Accuracy : 0.9200000166893005
Train on 1400 samples, validate on 139 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Test Accuracy : 0.9200000166893005


In [7]:
nn2_test

[(32, 0.8999999761581421), (64, 0.9200000166893005), (128, 0.9200000166893005)]

64 and 128 units gives the best results.

The 1-layer neural networks gives best results than 2-layers neural network. This is due to the fact that our train dataset is small (1400 rows) and the input space dimension is high (2048 features). The training of our neural networks start overfitting after few epochs. 

To have better results to Train neural network, we needs generally much more data.