In [37]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
import numpy as np
from scipy import stats
import joblib

In [10]:
datas = pd.read_csv('rock_paper_scissors.csv')
datas.drop('0Z', axis='columns', inplace=True) # because every z coord are distance to joints 0 so 0Z always equals 0
datas['class_ix'] = 0
datas.loc[datas['papier'] == 1,'class_ix'] = 1
datas.loc[datas['ciseau'] == 1,'class_ix'] = 2
datas.head()

Unnamed: 0,0X,0Y,1X,1Y,1Z,2X,2Y,2Z,3X,3Y,...,19X,19Y,19Z,20X,20Y,20Z,pierre,papier,ciseau,class_ix
0,0.399762,0.605224,0.468224,0.571393,0.011114,0.535536,0.562703,-0.00541,0.581658,0.593264,...,0.488453,0.71221,-0.130412,0.470517,0.685818,-0.114807,1,0,0,0
1,0.345986,0.52317,0.431423,0.566229,0.000925,0.512038,0.656728,-0.022832,0.548989,0.757154,...,0.349909,0.789651,-0.145727,0.350797,0.742302,-0.128931,1,0,0,0
2,0.331668,0.509075,0.426953,0.548386,0.008503,0.506499,0.638192,-0.010258,0.544655,0.744586,...,0.347203,0.766743,-0.129066,0.34519,0.707406,-0.11211,1,0,0,0
3,0.355348,0.50243,0.447299,0.535909,0.005518,0.527554,0.628732,-0.014713,0.560454,0.738761,...,0.360879,0.757194,-0.125835,0.359875,0.701427,-0.109228,1,0,0,0
4,0.360759,0.50442,0.449354,0.532166,0.003292,0.527185,0.624952,-0.016635,0.559306,0.736007,...,0.361286,0.764354,-0.122693,0.360595,0.709032,-0.104617,1,0,0,0


In [16]:
dic_param = {'alpha':[0.1, 0.01, 0.001, 0.0001], 
             'activation':['identity', 'logistic', 'tanh', 'relu'], 
             'hidden_layer_sizes':[(100), (100, 10), (50, 50), (25, 50, 25)]}

model = MLPClassifier(solver='lbfgs', max_iter=1000)
result = GridSearchCV(model, dic_param, verbose=1, cv=5, n_jobs=-1)

result.fit(datas.loc[:,'0X':'20Z'], datas['class_ix'])

Fitting 5 folds for each of 64 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   38.5s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:  4.1min finished
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


GridSearchCV(cv=5, estimator=MLPClassifier(max_iter=1000, solver='lbfgs'),
             n_jobs=-1,
             param_grid={'activation': ['identity', 'logistic', 'tanh', 'relu'],
                         'alpha': [0.1, 0.01, 0.001, 0.0001],
                         'hidden_layer_sizes': [100, (100, 10), (50, 50),
                                                (25, 50, 25)]},
             verbose=1)

In [20]:
print(result.best_params_)
print(result.best_score_)
print(result.best_estimator_)

{'activation': 'relu', 'alpha': 0.1, 'hidden_layer_sizes': (100, 10)}
0.9159473163492455
MLPClassifier(alpha=0.1, hidden_layer_sizes=(100, 10), max_iter=1000,
              solver='lbfgs')


In [35]:
model = result.best_estimator_
probas = np.array(model.predict_proba(datas.loc[:,'0X':'20Z']))
max_prob = np.amax(probas, 1)
print(stats.describe(max_prob))

DescribeResult(nobs=1558, minmax=(0.6204443143588535, 0.9999999999981757), mean=0.9949047731792633, variance=0.0004830185340757811, skewness=-10.257562558015252, kurtosis=132.32744515812752)


In [36]:
others = pd.read_csv('not_rps.csv')
others.drop('0Z', axis='columns', inplace=True) # because every z coord are distance to joints 0 so 0Z always equals 0
probas = np.array(model.predict_proba(others.loc[:,'0X':'20Z']))
max_prob = np.amax(probas, 1)
print(stats.describe(max_prob))

DescribeResult(nobs=423, minmax=(0.5022890423235109, 0.999999966117272), mean=0.9806297945297326, variance=0.004975304490513744, skewness=-4.931597202770706, kurtosis=25.341811561753865)


In [39]:
joblib.dump(model, 'RPS_classifier.joblib')

['RPS_classifier.joblib']