In [23]:
import warnings

import pandas as pd 
import numpy as np

from sklearn.neural_network import MLPClassifier
from imblearn.under_sampling import RandomUnderSampler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate

In [24]:
# sometimes optimization fails to converge during grid search and i dont care
warnings.filterwarnings('ignore')

## Data Preperation

In [25]:
# load training/testing data
ft_train = np.load("./processed_data/features_train.npy")
ft_test = np.load("./processed_data/features_test.npy")
lbl_train = np.load("./processed_data/labels_train.npy")
lbl_test = np.load("./processed_data/labels_test.npy")

In [26]:
# no balance parameter, so will have to do "by hand"
ros = RandomUnderSampler()
ft_train_res, lbl_train_res = ros.fit_resample(ft_train, lbl_train)

## Cross Validation Method (Stratified KFold)

In [27]:
cv = StratifiedKFold(n_splits=5)

# Cross Val Random Forest

In [28]:
rfc = RandomForestClassifier(n_estimators=500, n_jobs=-1, class_weight='balanced', max_leaf_nodes=16, criterion='gini')

In [29]:
scores = cross_validate(rfc, ft_train, lbl_train, cv=cv, scoring=['recall', 'accuracy'])

In [30]:
scores

{'fit_time': array([1.07831001, 0.28457689, 0.30106378, 0.25848889, 0.21178603]),
 'score_time': array([0.04589605, 0.05493593, 0.04613709, 0.04116201, 0.04033613]),
 'test_recall': array([0.6       , 0.775     , 0.7       , 0.82051282, 0.725     ]),
 'test_accuracy': array([0.77017115, 0.74694377, 0.77383863, 0.74296206, 0.75275398])}

# Cross Val Neural Network

In [31]:
mlp = MLPClassifier(activation='logistic', hidden_layer_sizes=(75, 50, 50, 25,))

In [32]:
scores = cross_validate(mlp, ft_train_res, lbl_train_res, cv=cv, scoring=['recall', 'accuracy'])

In [33]:
scores

{'fit_time': array([0.58356905, 0.52017498, 0.81025529, 0.0355401 , 0.03141713]),
 'score_time': array([0.00066209, 0.00064707, 0.00065374, 0.0006578 , 0.00064898]),
 'test_recall': array([0.75 , 0.925, 0.825, 0.   , 1.   ]),
 'test_accuracy': array([0.7       , 0.7375    , 0.7375    , 0.50632911, 0.50632911])}