In [1]:
import os
import time
import json
import joblib
import datetime

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline

np.random.seed(42)

In [2]:
CONFIG = {
    'timestamp'             : str(datetime.datetime.now()),
    'model'                 : 'Support Vector Machine',
    'test_fold'             : 'Fold 1',
#     'C'                     : 3,
    'criterion'             : 'gini',
    'n_estimators'          : 32,
    'max_depth'             : 16,
#     'class_weight'          : { 0: 0.71, 1:  2.32, 2: 0.86 },
    'training_time'         : 0,
    'testing_time'          : 0,
    'cm_atick'              : '',
    'cr_atick'              : '',
    'cm_valentina'          : '',
    'cr_valentina'          : ''
}

In [3]:
BASE_DIR            = os.getcwd()

LOG_FILE            = '../logs/classical_ml.txt'
EMOTIONS            = ['Approval', 'Disapproval', 'Neutral']
FOLDS               = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']

FEATURES_DIR_AF     = '../../Dataset/Features-AF/'
FEATURES_DIR_VAL    = '../../Dataset/Features-VAL/'

In [4]:
features_path_af = os.path.join(BASE_DIR, FEATURES_DIR_AF, 'Features.joblib')
features_af = joblib.load(features_path_af)

features_path_val = os.path.join(BASE_DIR, FEATURES_DIR_VAL, 'Features.joblib')
features_val = joblib.load(features_path_val)

In [5]:
X_AF  = features_af.drop(['fold', 'label'], axis=1).to_numpy()
y_AF  = features_af['label'].to_numpy()
f_AF  = features_af['fold'].to_numpy()

X_VAL = features_val.drop(['fold', 'label'], axis=1).to_numpy()
y_VAL = features_val['label'].to_numpy()

In [6]:
mask       = (f_AF == FOLDS.index(CONFIG['test_fold']))
train_X    = X_AF[~mask, :]
train_y    = y_AF[~mask]
val_X      = X_AF[mask, :]
val_y      = y_AF[mask]
test_X     = X_VAL
test_y     = y_VAL

In [7]:
clf = RandomForestClassifier(
    n_estimators          = CONFIG['n_estimators'],
    criterion             = CONFIG['criterion'],
    max_depth             = CONFIG['max_depth'],
#     class_weight          = CONFIG['class_weight'],
    verbose               = 1,
)

# clf = SVC(
#     C                     = CONFIG['C'],
#     class_weight          = CONFIG['class_weight'],
#     verbose               = 1,
# )

In [8]:
train_X, train_y = shuffle(train_X, train_y)
pipe = Pipeline([('scaler', StandardScaler()), ('svm', clf)])
pipe.fit(train_X, train_y)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed:    0.7s finished


Pipeline(steps=[('scaler', StandardScaler()),
                ('svm',
                 RandomForestClassifier(max_depth=16, n_estimators=32,
                                        verbose=1))])

In [9]:
y_pred = pipe.predict(val_X)
cr = classification_report(val_y, y_pred)
print(cr)

              precision    recall  f1-score   support

         0.0       0.69      0.65      0.67      1335
         1.0       0.23      0.22      0.22       390
         2.0       0.64      0.70      0.66      1158

    accuracy                           0.61      2883
   macro avg       0.52      0.52      0.52      2883
weighted avg       0.61      0.61      0.61      2883



[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed:    0.0s finished


In [10]:
y_pred = pipe.predict(test_X)
cr = classification_report(test_y, y_pred)
print(cr)

              precision    recall  f1-score   support

         0.0       0.57      0.89      0.70      1783
         1.0       0.01      0.04      0.02       378
         2.0       0.93      0.68      0.79      7327

    accuracy                           0.69      9488
   macro avg       0.51      0.54      0.50      9488
weighted avg       0.83      0.69      0.74      9488



[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed:    0.0s finished
