In [53]:
import warnings

import pandas as pd 
import numpy as np

from sklearn.neural_network import MLPClassifier
from imblearn.under_sampling import RandomUnderSampler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate

In [54]:
# sometimes optimization fails to converge during grid search and i dont care
warnings.filterwarnings('ignore')

## Data Preperation

In [55]:
# load training/testing data
ft_train = np.load("./processed_data/features_train.npy")
ft_test = np.load("./processed_data/features_test.npy")
lbl_train = np.load("./processed_data/labels_train.npy")
lbl_test = np.load("./processed_data/labels_test.npy")

In [56]:
# no balance parameter, so will have to do "by hand"
ros = RandomUnderSampler()
ft_train_res, lbl_train_res = ros.fit_resample(ft_train, lbl_train)

## Cross Validation Method (Stratified KFold)

In [57]:
cv = StratifiedKFold(n_splits=5)

# Cross Val Random Forest

In [58]:
rfc = RandomForestClassifier(n_estimators=500, n_jobs=-1, class_weight='balanced', max_leaf_nodes=16, criterion='gini')

In [59]:
scores = cross_validate(rfc, ft_train, lbl_train, cv=cv, scoring=['recall', 'accuracy'])

In [60]:
scores

{'fit_time': array([1.12184215, 0.24797797, 0.23200893, 0.23835611, 0.22488809]),
 'score_time': array([0.04099798, 0.04088116, 0.04116392, 0.04991889, 0.04180884]),
 'test_recall': array([0.8       , 0.675     , 0.7       , 0.79487179, 0.7       ]),
 'test_accuracy': array([0.74449878, 0.76283619, 0.77506112, 0.74051408, 0.76621787])}

# Cross Val Neural Network

In [61]:
mlp = MLPClassifier(activation='logistic', hidden_layer_sizes=(75, 50, 50, 25,))

In [62]:
scores = cross_validate(mlp, ft_train_res, lbl_train_res, cv=cv, scoring=['recall', 'accuracy'])

In [63]:
scores

{'fit_time': array([1.52189803, 1.45116735, 2.35401821, 2.16017985, 0.16242409]),
 'score_time': array([0.00068116, 0.00066471, 0.00065875, 0.00320816, 0.00071597]),
 'test_recall': array([0.925     , 0.75      , 0.9       , 0.92307692, 0.875     ]),
 'test_accuracy': array([0.775     , 0.75      , 0.7375    , 0.82278481, 0.72151899])}