https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html#sklearn.neural_network.MLPClassifier.predict_proba

### import dependencies

In [39]:
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

### train model

In [40]:
df = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

# features
X_train = df.iloc[:, 1:]
y_train = df.iloc[:, 0]

X_train = X_train.values
y_train = y_train.values

X_test = df_test.iloc[:, 1:]
y_test = df_test.iloc[:, 0]

X_test = X_test.values
y_test = y_test.values

display(df.head())
print(X_train.shape)

Unnamed: 0,5,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.608,0.609,0.610,0.611,0.612,0.613,0.614,0.615,0.616,0.617
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


(59999, 784)


### fit model

In [45]:
model = MLPClassifier(
    activation="relu",
    solver="adam",
    alpha=0.0001,
    hidden_layer_sizes=(50, 50, 50),
    learning_rate="constant",
    random_state=1, 
    max_iter=100, 
    verbose=True
)
model.fit(X_train, y_train)

Iteration 1, loss = 2.42186546
Iteration 2, loss = 0.51157860
Iteration 3, loss = 0.35610405
Iteration 4, loss = 0.28354064
Iteration 5, loss = 0.23894830
Iteration 6, loss = 0.20745889
Iteration 7, loss = 0.18619910
Iteration 8, loss = 0.16766521
Iteration 9, loss = 0.15204572
Iteration 10, loss = 0.13985336
Iteration 11, loss = 0.12845942
Iteration 12, loss = 0.12510924
Iteration 13, loss = 0.11237064
Iteration 14, loss = 0.10808025
Iteration 15, loss = 0.10603741
Iteration 16, loss = 0.09822191
Iteration 17, loss = 0.09117498
Iteration 18, loss = 0.08903323
Iteration 19, loss = 0.08911433
Iteration 20, loss = 0.08439092
Iteration 21, loss = 0.07892599
Iteration 22, loss = 0.07491522
Iteration 23, loss = 0.07273684
Iteration 24, loss = 0.06659993
Iteration 25, loss = 0.06965796
Iteration 26, loss = 0.05958163
Iteration 27, loss = 0.06166039
Iteration 28, loss = 0.05712741
Iteration 29, loss = 0.05947304
Iteration 30, loss = 0.05069661
Iteration 31, loss = 0.05153462
Iteration 32, los

### search for better hyperparameters

In [42]:
# parameter_space = {
#     'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
#     'activation': ['tanh', 'relu'],
#     'solver': ['sgd', 'adam'],
#     'alpha': [0.0001, 0.05],
#     'learning_rate': ['constant','adaptive'],
# }
# 
# clf = GridSearchCV(model, parameter_space, n_jobs=-1, cv=3)
# clf.fit(X_train, y_train)
# 
# # Best paramete set
# print('Best parameters found:\n', clf.best_params_)
# 
# # All results
# means = clf.cv_results_['mean_test_score']
# stds = clf.cv_results_['std_test_score']
# for mean, std, params in zip(means, stds, clf.cv_results_['params']):
#     print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
# 
# y_true, y_pred = y_test , clf.predict(X_test)
# 
# print('Results on the test set:')
# print(classification_report(y_true, y_pred))

Iteration 1, loss = 1.36336533
Iteration 2, loss = 0.35869451
Iteration 3, loss = 0.28801919
Iteration 4, loss = 0.25995271
Iteration 5, loss = 0.22372068
Iteration 6, loss = 0.22011495
Iteration 7, loss = 0.21840906
Iteration 8, loss = 0.20833270
Iteration 9, loss = 0.21275556
Iteration 10, loss = 0.20626661
Iteration 11, loss = 0.21445420
Iteration 12, loss = 0.21246458
Iteration 13, loss = 0.22203759
Iteration 14, loss = 0.21543823
Iteration 15, loss = 0.22614298
Iteration 16, loss = 0.22817610
Iteration 17, loss = 0.22470761
Iteration 18, loss = 0.22546211
Iteration 19, loss = 0.23274325
Iteration 20, loss = 0.21814338
Iteration 21, loss = 0.22086939
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Best parameters found:
 {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'constant', 'solver': 'adam'}
0.824 (+/-0.034) for {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50, 50), 'le

### predictions

In [46]:
y_pred = model.predict(X_test)
print(y_pred)

y_pred_proba = model.predict_proba(X_test)
print(y_pred_proba)

[2 1 0 ... 4 5 6]
[[1.20321105e-22 6.75561865e-14 1.00000000e+00 ... 1.27987445e-20
  5.69335030e-27 1.21152843e-17]
 [3.42525749e-41 1.00000000e+00 3.55114232e-33 ... 4.07362223e-29
  4.25854871e-19 6.57576218e-36]
 [1.00000000e+00 5.32481362e-17 1.18215231e-23 ... 2.72677779e-20
  1.69759669e-25 1.39263573e-17]
 ...
 [2.26266858e-52 8.73444723e-33 5.05119681e-51 ... 6.08135358e-34
  6.25073721e-29 3.77073465e-19]
 [5.63529272e-46 1.73768064e-33 1.08966335e-43 ... 7.13892819e-67
  2.25833038e-20 1.70454157e-37]
 [1.15033572e-22 1.10138994e-44 2.02870090e-44 ... 2.52632236e-80
  3.12951615e-25 3.91104448e-52]]


### score

In [47]:
print("Training set score: %f" % model.score(X_train, y_train))
print("Test set score: %f" % model.score(X_test, y_test))

Training set score: 0.997433
Test set score: 0.965097
