In [5]:
import time
from dataloader import MinibooneLoader
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, GridSearchCV, KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Performance/Optimizations
EnableDaal4py = False    # Optimise sklearn for Intel CPUs (Requires daal4py)

shuffleSeed = 0     # Data shuffle seed.
kFoldSeed = None    # Cross-validation shuffle seed.
targetCores = 8     # How many cores to use.
trainSize = 0.8
testSize = 0.2

In [6]:
if (EnableDaal4py == True):
    from daal4py.sklearn import patch_sklearn
    patch_sklearn()

In [7]:
dataset = MinibooneLoader().loadMiniboone()

events = dataset.events

In [8]:
# Set up a preprocessing pipeline
estimators = [
    # Standardizes and scales the dataset
    ['scale', StandardScaler()]
]
pipe = Pipeline(estimators).fit(events)

# Transform the data using the prepared pipeline.
X = pipe.transform(events)
y = dataset.classifications

In [9]:

# Shuffle the dataset and split into training and test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, random_state=shuffleSeed, train_size=trainSize, test_size=testSize)

print('Data has been split. Training data:')
print(X_train)
print(y_train)

Data has been split. Training data:
[[ 0.04852549  0.05710245 -0.42166557 ...  0.04230077  0.08020339
   0.0614591 ]
 [ 0.08522973  0.05787857  0.40372423 ...  0.15819105  0.06338857
   0.0602966 ]
 [ 0.03886527  0.05797087 -0.56084576 ...  0.03024111  0.03142102
   0.06056402]
 ...
 [ 0.06007261  0.07552737  0.02669232 ...  0.06275209  0.08296288
   0.06197942]
 [ 0.06221417  0.06761905 -0.02986604 ...  0.07141983  0.07571434
   0.05977339]
 [ 0.06484492  0.0546634   0.00282108 ...  0.0365456   0.03887806
   0.0591355 ]]
[0 1 0 ... 1 1 1]


In [11]:
# Perform grid search to identify the best hyperparameters

# Define our estimator
mlp=MLPClassifier(hidden_layer_sizes=layersConfig, max_iter=maxIterations)

# Define our hyperameter grid-space.
parameters={
    'hidden_layer_sizes': (
        # (10, 10, 10),
        (5, 5, 5),
        (25, 25),
        # (10, 10),
        (5, 5),
        (25)
    ),
    'max_iter': ( 1000, 500, 250 ),
    'activation': (
        'tanh',
        'relu'
    ),
    'n_iter_no_change': (
        10,
        20
    )
}

# Set up a basic 5-fold cross-validation.
cv = KFold(n_splits=5, shuffle=True, random_state=kFoldSeed)
# Set up the final grid-search
search = GridSearchCV(estimator=mlp, param_grid=parameters, cv=cv, scoring='accuracy', n_jobs=targetCores, verbose=20)

search.fit(X_train, y_train)


Fitting 5 folds for each of 36 candidates, totalling 180 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   1 tasks      | elapsed:   48.4s
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:  1.0min
[Parallel(n_jobs=8)]: Done   3 tasks      | elapsed:  1.5min
[Parallel(n_jobs=8)]: Done   4 tasks      | elapsed:  1.6min
[Parallel(n_jobs=8)]: Done   5 tasks      | elapsed:  1.9min
[Parallel(n_jobs=8)]: Done   6 tasks      | elapsed:  2.1min
[Parallel(n_jobs=8)]: Done   7 tasks      | elapsed:  2.3min
[Parallel(n_jobs=8)]: Done   8 tasks      | elapsed:  2.6min
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:  3.0min
[Parallel(n_jobs=8)]: Done  10 tasks      | elapsed:  3.0min
[Parallel(n_jobs=8)]: Done  11 tasks      | elapsed:  3.0min
[Parallel(n_jobs=8)]: Done  12 tasks      | elapsed:  3.2min
[Parallel(n_jobs=8)]: Done  13 tasks      | elapsed:  3.2min
[Parallel(n_jobs=8)]: Done  14 tasks      | elapsed:  3.5min
[Parallel

GridSearchCV(cv=KFold(n_splits=5, random_state=None, shuffle=False),
             estimator=MLPClassifier(hidden_layer_sizes=(10, 10, 10),
                                     max_iter=1000),
             n_jobs=8,
             param_grid={'activation': ('tanh', 'relu'),
                         'hidden_layer_sizes': ((5, 5, 5), (25, 25), (5, 5)),
                         'max_iter': (1000, 500, 250),
                         'n_iter_no_change': (10, 20)},
             scoring='accuracy', verbose=20)

In [12]:
# Print the results of the above grid search
print('Best Score: %s' % search.best_score_)
print('Best Hyperparameters: %s\n' % search.best_params_)

# Report all the scores for all the various combinations
means = search.cv_results_['mean_test_score']
stds = search.cv_results_['std_test_score']

# Print the mean accuracy and expected deviation for all parameter combinations.
print('Performance breakdown for each fit:')
for mean, std, params in zip(means, stds, search.cv_results_['params']):
    print('%0.3f (+/-%0.03f) for %r' % (mean, std * 2, params))


Best Score: 0.9388184588275557
Best Hyperparameters: {'activation': 'relu', 'hidden_layer_sizes': (25, 25), 'max_iter': 500, 'n_iter_no_change': 20}
0.929 (+/-0.004) for {'activation': 'tanh', 'hidden_layer_sizes': (5, 5, 5), 'max_iter': 1000, 'n_iter_no_change': 10}
0.930 (+/-0.006) for {'activation': 'tanh', 'hidden_layer_sizes': (5, 5, 5), 'max_iter': 1000, 'n_iter_no_change': 20}
0.927 (+/-0.008) for {'activation': 'tanh', 'hidden_layer_sizes': (5, 5, 5), 'max_iter': 500, 'n_iter_no_change': 10}
0.930 (+/-0.003) for {'activation': 'tanh', 'hidden_layer_sizes': (5, 5, 5), 'max_iter': 500, 'n_iter_no_change': 20}
0.929 (+/-0.007) for {'activation': 'tanh', 'hidden_layer_sizes': (5, 5, 5), 'max_iter': 250, 'n_iter_no_change': 10}
0.929 (+/-0.003) for {'activation': 'tanh', 'hidden_layer_sizes': (5, 5, 5), 'max_iter': 250, 'n_iter_no_change': 20}
0.936 (+/-0.003) for {'activation': 'tanh', 'hidden_layer_sizes': (25, 25), 'max_iter': 1000, 'n_iter_no_change': 10}
0.939 (+/-0.003) for {'

In [13]:
# Test the selected hyperparameters on the test set.
timeStart = time.time()
predictions = search.predict(X_test)
timeEnd = time.time()
totalTime = timeEnd - timeStart

print('\nTotal prediction time: {}s\nAverage time/prediction: {}s'.format(totalTime, totalTime / len(X_test)))

print('\nConfusion matrix:')
print(confusion_matrix(y_test, predictions))

print('\nClassification report:')
print(classification_report(y_test, predictions))


Confusion matrix:
[[ 6685   585]
 [ 1008 17735]]

Classification report:
              precision    recall  f1-score   support

           0       0.87      0.92      0.89      7270
           1       0.97      0.95      0.96     18743

    accuracy                           0.94     26013
   macro avg       0.92      0.93      0.93     26013
weighted avg       0.94      0.94      0.94     26013

