In [1]:
import os
import time
import json
import joblib
import datetime

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline

np.random.seed(42)

In [2]:
CONFIG = {
    'timestamp'             : str(datetime.datetime.now()),
    'model'                 : 'Random Forest',
    'test_fold'             : 'Fold 1',
    'criterion'             : 'gini',
    'n_estimators'          : 32,
    'max_depth'             : 16,
    'class_weight'          : { 0: 0.71, 1:  2.32, 2: 0.86 },
    'training_time'         : 0,
    'testing_time'          : 0,
    'cm_atick'              : '',
    'cr_atick'              : '',
    'cm_valentina'          : '',
    'cr_valentina'          : ''
}

In [3]:
BASE_DIR            = os.getcwd()

LOG_FILE            = '/content/drive/MyDrive/Research/Crowd Emotion Logs/classical_ml.txt'
FOLDS               = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']
EMOTIONS            = ['Approval', 'Disapproval', 'Neutral']

FEATURES_DIR        = '../Features/'

In [4]:
features_path_af = os.path.join(BASE_DIR, FEATURES_DIR, 'Features_AF.joblib')
features_af = joblib.load(features_path_af)

features_path_val = os.path.join(BASE_DIR, FEATURES_DIR, 'Features_VAL.joblib')
features_val = joblib.load(features_path_val)

In [5]:
X_AF  = features_af.drop(['fold', 'label'], axis=1).to_numpy()
y_AF  = features_af['label'].to_numpy()
f_AF  = features_af['fold'].to_numpy()

X_VAL = features_val.drop(['fold', 'label'], axis=1).to_numpy()
y_VAL = features_val['label'].to_numpy()

In [6]:
mask       = (f_AF == FOLDS.index(CONFIG['test_fold']))
train_X    = X_AF[~mask, :]
train_y    = y_AF[~mask]
val_X      = X_AF[mask, :]
val_y      = y_AF[mask]
test_X     = X_VAL
test_y     = y_VAL

In [7]:
# clf = RandomForestClassifier(
#     n_estimators          = CONFIG['n_estimators'],
#     criterion             = CONFIG['criterion'],
#     max_depth             = CONFIG['max_depth'],
#     class_weight          = CONFIG['class_weight'],
#     verbose               = 1,
# )

clf = SVC(
    C                     = 3,
    class_weight          = CONFIG['class_weight'],
    verbose               = 1,
)

In [8]:
pipe = Pipeline([('scaler', StandardScaler()), ('rnf', clf)])

In [9]:
train_X, train_y = shuffle(train_X, train_y)

pipe.fit(train_X, train_y)

[LibSVM]

Pipeline(steps=[('scaler', StandardScaler()),
                ('rnf',
                 SVC(C=3, class_weight={0: 0.71, 1: 2.32, 2: 0.86},
                     verbose=1))])

In [10]:
y_pred = pipe.predict(val_X)
cr = classification_report(val_y, y_pred)
print(cr)

              precision    recall  f1-score   support

         0.0       0.63      0.59      0.61      1335
         1.0       0.30      0.43      0.35       390
         2.0       0.77      0.70      0.73      1158

    accuracy                           0.62      2883
   macro avg       0.56      0.58      0.57      2883
weighted avg       0.64      0.62      0.63      2883



In [11]:
y_pred = pipe.predict(test_X)
cr = classification_report(test_y, y_pred)
print(cr)

              precision    recall  f1-score   support

         0.0       0.30      0.76      0.43      1783
         1.0       0.03      0.37      0.05       378
         2.0       0.82      0.03      0.05      7327

    accuracy                           0.18      9488
   macro avg       0.38      0.38      0.18      9488
weighted avg       0.69      0.18      0.12      9488

