In [1]:
import cv2
import pandas as pd
import numpy as np

def preprocessing(val=True):
    
    data = pd.read_csv('data/x_train_gr_smpl.csv')
    labels = pd.read_csv('data/y_train_smpl.csv')

    if(val):
        ## ---------------- Data preparation ---------------- ##
        X_train = []
        for i in range(data.shape[0]):
            img = np.uint8(data.iloc[i])
            edited = cv2.Canny(img, 10, 30)
            edited = cv2.GaussianBlur(edited, (5, 5), 0)
            X_train.append(edited.reshape((1,-1))[0])

        data = pd.DataFrame(X_train)
        ## -------------------------------------------------- ##

    return data, labels

In [2]:
from sklearn.model_selection import train_test_split

data, labels = preprocessing()
data['label'] = labels

X = data.iloc[:, :-1]
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [3]:
from sklearn.neural_network import MLPClassifier

In [4]:
clf = MLPClassifier()
clf.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [5]:
y_pred = clf.predict_proba(X_test)

In [6]:
print(y_pred[:10])

[[8.90311927e-56 1.34036160e-11 1.37213934e-46 5.68902960e-55
  1.85949946e-23 1.00000000e+00 1.71668725e-38 1.23120382e-42
  7.46403739e-28 6.41744387e-55]
 [3.14743029e-73 7.99802153e-28 2.80675205e-38 1.00000000e+00
  1.54191190e-15 8.65546611e-57 9.89917779e-62 4.90065107e-68
  1.22790316e-39 9.53117764e-13]
 [9.96351105e-01 3.64889533e-03 8.73151298e-13 3.55507504e-20
  1.42008869e-17 1.43454060e-12 4.32188540e-28 1.51896194e-21
  2.38133922e-15 9.20557132e-26]
 [4.31974240e-85 1.37355899e-25 3.13721916e-84 3.67019416e-41
  1.00000000e+00 4.43748173e-22 1.12008673e-78 6.69574910e-32
  2.00763884e-40 3.00955642e-41]
 [2.56575364e-36 9.20056423e-40 2.90807068e-34 1.21710016e-69
  5.55016237e-48 1.01673466e-39 5.24273373e-30 1.65146641e-54
  2.65893913e-20 1.00000000e+00]
 [6.30093348e-54 5.62241321e-20 3.36199620e-29 9.99999981e-01
  5.39515538e-12 1.49789084e-41 3.32465961e-45 4.10160611e-49
  1.46268552e-30 1.92064579e-08]
 [2.47766919e-94 6.18508956e-32 2.21436132e-88 9.45329613e

In [8]:
score = clf.score(X_test, y_test)
score

0.8992340832934418

In [None]:
from sklearn.model_selection import GridSearchCV

clf = MLPClassifier()

possible_parameters = {
    'alpha': [0.0001, 0.00005],
    'batch_size': [100, 50],
    'learning_rate_init': [0.001, 0.0005],
    'beta_1': [0.9, 0.99],
    'beta_2': [0.999, 0.5],
    'epsilon': [5e-9, 2e-9]
}

clf = GridSearchCV(clf, possible_parameters, n_jobs=-1)
clf.fit(X_train, y_train)

cv = clf.cv_results_
tab = pd.DataFrame({
    'mean_fit_time': cv['mean_fit_time'],
    'mean_score_time': cv['mean_score_time'],
    'mean_test_score': cv['mean_test_score'],
    'mean_train_score': cv['mean_train_score'],
    'param_alpha': cv['param_alpha'],
    'param_batch_size': cv['param_batch_size'],
    'param_learning_rate_init': cv['param_learning_rate_init'],
    'rank_test_score': cv['rank_test_score'],
    'param_beta_1': cv['param_beta_1'],
    'param_epsilon': cv['param_epsilon']
})

tab.sort_values(['rank_test_score']).head(3)

