# Project: Recognizing Traffic Signs Using Deep Learning
### Scope: Classifier: Sklearn's MLPClassifier Neural Net Investigation on tunning hyper-parameters
##### Author: Ashish Panchal (epababl03.ashishp@iima.ac.in)

In [1]:
# Import Libraries
import warnings

import sys
import os
import seaborn
import numpy as np
from os import path
import matplotlib.pyplot as plt


from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV

# Local python modules
sys.path.insert(0, os.path.abspath('../src/util'))
sys.path.insert(1, os.path.abspath('../src/model'))
sys.path.insert(2, os.path.abspath('../src/data'))
from util import Util
from load_save import LoadSave

In [2]:
#Load stored data

#No. of classes
%store -r n_classes

# class labels
%store -r signs

#Final preprocessed Training Dataset
%store -r X_train_preprocessed
%store -r y_train_final

#Final preprocessed Test Dataset
%store -r X_test_preprocessed
%store -r y_test


#Final preprocessed Validation Dataset
%store -r X_valid_preprocessed 
%store -r y_valid

#### Verify pre-processed Training, Test, and Validation datasets

In [3]:
print(X_train_preprocessed.shape)
print(y_train_final.shape)
print(X_valid_preprocessed.shape)
print(y_valid.shape)

(39239, 32, 32, 1)
(39239,)
(4410, 32, 32, 1)
(4410,)


## 5.1 Hyper-parameters of MLP

In [4]:
# Find hyper parameters for MLP
parameters = {
    'solver': ['sgd', 'adam'], 
    'max_iter': [400], 
    'alpha': 10.0 ** -np.arange(3, 6), 
    'hidden_layer_sizes':[(1024,),(64,)]
}
X_train = X_train_preprocessed.reshape((len(X_train_preprocessed)),-1)

In [None]:
random_search_clf = RandomizedSearchCV(MLPClassifier(), parameters, n_jobs=10, cv=5, verbose=3)
random_search_clf.fit(X_train, y_train_final)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed: 98.7min


In [None]:
print(grid_search_clf.best_estimator_)
print(grid_search_clf.best_params_)

### 5.2 Different learning rate schedules and momentum parameters (Comparing different learning strategies for the network)

In [None]:
params = [
    {'solver': 'sgd', 'learning_rate': 'constant', 'momentum': .9,
           'learning_rate_init': 0.001, 'alpha':0.0001, 'hidden_layer_sizes': (1024, 750, 250, 43)},
    {'solver': 'adam', 'learning_rate_init': 0.001,'alpha':0.0001, 'hidden_layer_sizes': (1024, 750, 250, 43)}
]