In [None]:
import os
import glob
import numpy as np
from scipy.sparse import csr_matrix
from scipy import sparse
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt

x = []
y = []

for file in glob.glob('data/virtual_loss_training_data_sparse/*.pkl'):
    with open(file, 'rb') as f:
        data = pickle.load(f)
    xs = data['x']
    ys = data['y']

    x.append(xs)
    y.append(ys)

x = sparse.vstack(x)
y = np.concatenate(y)

print(x.shape, y.shape)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
print("x_train: {}, x_test: {}, y_train: {}, y_test: {}".format(x_train.shape, x_test.shape, y_train.shape, y_test.shape))

In [None]:
from sklearn.model_selection import GridSearchCV

mlp = MLPClassifier(max_iter=300)

parameter_space = {
    'hidden_layer_sizes': [(100,), (200,), (300,), (400,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}
clf = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=3, verbose=3)
clf.fit(x_train, y_train)

In [None]:
# Best parameter set
print('Best parameters found:\n', clf.best_params_)

# All results
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

In [None]:
from sklearn.metrics import classification_report

y_true, y_pred = y_test , clf.predict(x_test)

print('Results on the test set:')
print(classification_report(y_true, y_pred))