# Hyperparameter Tuning

## MLP

In [1]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [2]:
df=pd.read_pickle('C:/Users/oefel/Documents/Datasets/group50')
df=df[df['# Dataset']==1]
df['Sex']=df['Sex'].map({'M':0,'F':1})
X=df[['Sex','Height','Ability Score 1','Ability Score2']]
y=df['Label']

In [9]:
parameterlist={
    'hidden_layer_sizes':[(5,3)],
    'activation':['relu','logistic'],
    'alpha':[0.0001,0.01],
    'n_iter_no_change':[100]
}
search=GridSearchCV(MLPClassifier(early_stopping=True,max_iter=1000),param_grid=parameterlist, cv=10)
search.fit(X,y)



GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'hidden_layer_sizes': [(5, 3)], 'activation': ['relu', 'logistic'], 'alpha': [0.0001, 0.01], 'n_iter_no_change': [100]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [10]:
search.best_estimator_.get_params()

{'activation': 'logistic',
 'alpha': 0.0001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': True,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (5, 3),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_iter': 1000,
 'momentum': 0.9,
 'n_iter_no_change': 100,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [11]:
df_test=pd.read_pickle('C:/Users/oefel/Documents/Datasets/group50')
df_test=df_test[df_test['# Dataset']==2]
df_test['Sex']=df_test['Sex'].map({'M':0,'F':1})
X_test=df_test[['Sex','Height','Ability Score 1','Ability Score2']]
y_test=df_test['Label']

In [12]:
search.best_estimator_.score(X_test,y_test)

0.8025

In [13]:
acc=pd.read_pickle('./possibleacc.pkl')

In [14]:
print(acc.iloc[49])

possible accuracys          0.811344
possible accuracys men      0.815281
possible accuracys women    0.804031
Expected Bias               0.011250
Name: 49, dtype: float64


## Decision Tree

In [22]:
df=pd.read_pickle('C:/Users/oefel/Documents/Datasets/group50')
df=df[df['# Dataset']==1]
df['Sex']=df['Sex'].map({'M':0,'F':1})
X=df[['Sex','Height','Ability Score 1','Ability Score2']]
y=df['Label']

In [23]:
parameterlist={
    'max_depth':[1,2,3,4,None],
    'min_samples_split':[2,0.01,0.05,0.1],
    'min_samples_leaf':[1,0.01,0.05,0.1],
    'max_features':[2,3,4]
}
search=GridSearchCV(DecisionTreeClassifier(),param_grid=parameterlist, cv=10)
search.fit(X,y)



GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_depth': [1, 2, 3, 4, None], 'min_samples_split': [2, 0.01, 0.05, 0.1], 'min_samples_leaf': [1, 0.01, 0.05, 0.1], 'max_features': [2, 3, 4]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [24]:
df_test=pd.read_pickle('C:/Users/oefel/Documents/Datasets/group50')
df_test=df_test[df_test['# Dataset']==2]
df_test['Sex']=df_test['Sex'].map({'M':0,'F':1})
X_test=df_test[['Sex','Height','Ability Score 1','Ability Score2']]
y_test=df_test['Label']

In [25]:
print(search.best_estimator_.get_params())
print(search.best_estimator_.score(X_test,y_test))
acc=pd.read_pickle('./possibleacc.pkl')
print(acc.iloc[49])

{'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_features': 3, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 0.01, 'min_samples_split': 0.01, 'min_weight_fraction_leaf': 0.0, 'presort': False, 'random_state': None, 'splitter': 'best'}
0.8035
possible accuracys    0.810662
Name: 49, dtype: float64


## SVM

In [10]:
df=pd.read_pickle('C:/Users/oefel/Documents/Datasets/group50')
df=df[df['# Dataset']==1]
df['Sex']=df['Sex'].map({'M':0,'F':1})
X=df[['Sex','Height','Ability Score 1','Ability Score2']]
y=df['Label']

In [11]:
parameterlist={
    'C':[0.1,0.5,1,1.5,2,3,5],
    'kernel':['linear','rbf','poly','sigmoid']
}
search=GridSearchCV(SVC(gamma='scale'),param_grid=parameterlist, cv=10)
search.fit(X,y)

GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [0.1, 0.5, 1, 1.5, 2, 3, 5], 'kernel': ['linear', 'rbf', 'poly', 'sigmoid']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [12]:
df_test=pd.read_pickle('C:/Users/oefel/Documents/Datasets/group50')
df_test=df_test[df_test['# Dataset']==2]
df_test['Sex']=df_test['Sex'].map({'M':0,'F':1})
X_test=df_test[['Sex','Height','Ability Score 1','Ability Score2']]
y_test=df_test['Label']

In [13]:
print(search.best_estimator_.get_params())
print(search.best_estimator_.score(X_test,y_test))
acc=pd.read_pickle('./possibleacc.pkl')
print(acc.iloc[49])

{'C': 1.5, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
0.812
possible accuracys    0.810662
Name: 49, dtype: float64
