In [1]:
import numpy as np

y_labelled = np.load('emotion_labels.npy')
X_labelled_pca = np.load('X_labelled_pca.npy')

In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline

class SVMClassifier:
    def __init__(self, kernel='poly', param_grid=None, cv=5, scoring='accuracy'):
        self.param_grid = param_grid or {
            'svc__degree': [2, 3, 4],
            'svc__coef0': [0, 1, 5],
            'svc__C': [0.1, 1, 10]
        }
        self.pipeline = Pipeline([
            ('svc', SVC(kernel=kernel, class_weight='balanced'))
        ])

        self.grid_search = GridSearchCV(self.pipeline, self.param_grid, cv=cv, scoring=scoring)
        self.best_model = None

    def fit(self, X, y):
        self.grid_search.fit(X, y)
        self.best_model = self.grid_search.best_estimator_
        print("Best Parameters:", self.grid_search.best_params_)
        print("Best Score:", self.grid_search.best_score_)

    def predict(self, X):
        if self.best_model is None:
            raise Exception("Model has not been trained. Call `.fit(X, y)` first.")
        return self.best_model.predict(X)
    
    def report(self, X, y_true):
        y_pred = self.predict(X)
        print("Classification Report:\n", classification_report(y_true, y_pred))


In [101]:
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
X_train, X_test, y_train, y_test = train_test_split(X_labelled_pca, y_labelled, test_size=0.3, random_state=42,  stratify=y_labelled)


In [102]:
import numpy as np


unique, counts = np.unique(y_labelled, return_counts=True)
count_dict = dict(zip(unique, counts))

count_dict

{np.str_('Anger'): np.int64(45),
 np.str_('Contempt'): np.int64(18),
 np.str_('Disgust'): np.int64(59),
 np.str_('Fear'): np.int64(25),
 np.str_('Happiness'): np.int64(69),
 np.str_('Neutral'): np.int64(593),
 np.str_('Sadness'): np.int64(28),
 np.str_('Surprise'): np.int64(83)}

In [103]:
svm_clf = SVMClassifier(cv=10, param_grid = {
    'svc__C': [0.1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90,  100],       
    'svc__gamma': ['scale', 0.001, 0.01, 0.1, 1] 
}, kernel='rbf', scoring='f1_macro')

In [104]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [112]:
X_train

array([[ 4.00245201e+02,  1.31429920e+03, -8.19288829e+01, ...,
        -7.16436622e+01, -7.53521525e+01, -7.18449925e+01],
       [ 2.14747523e+03,  1.91194861e+02, -8.81364639e+02, ...,
        -1.07228203e+02,  9.56854329e+01,  5.94205646e+01],
       [ 2.21178541e+03,  7.43415098e+02, -1.22692141e+03, ...,
        -6.33190605e+01,  1.11993576e+02, -6.22877999e+01],
       ...,
       [ 2.67059487e+03, -6.82254837e+02, -1.75548252e+03, ...,
        -9.89438628e+00, -2.74781834e+01,  5.20158623e+01],
       [ 4.33327701e+02, -1.07726678e+03, -7.80405134e+02, ...,
        -1.04277281e+02, -1.39700642e+02,  6.34564220e-02],
       [ 1.10932392e+03,  1.66515260e+03, -8.34306346e+02, ...,
        -9.17801665e+01,  6.00429870e+01, -1.12222773e+02]],
      shape=(644, 68))

In [111]:
X_train_resampled

array([[  400.24520125,  1314.29920256,   -81.92888292, ...,
          -71.64366222,   -75.35215248,   -71.84499247],
       [ 2147.47523369,   191.19486105,  -881.36463896, ...,
         -107.22820323,    95.68543295,    59.42056456],
       [ 2211.78540826,   743.41509778, -1226.9214102 , ...,
          -63.31906048,   111.99357595,   -62.28779993],
       ...,
       [ -152.61205458,  -549.19312706, -1178.92403603, ...,
         -102.7284136 ,    67.82082177,  -117.07407531],
       [   28.47140028,   105.85970637, -1194.30587301, ...,
           35.36852202,    83.11954833,  -146.04920606],
       [ 1607.25165236,   490.91092164, -1851.55967849, ...,
          103.24460165,   -37.94873302,    75.93426311]], shape=(3320, 68))

In [105]:
svm_clf.fit(X=X_train_resampled, y=y_train_resampled)

Best Parameters: {'svc__C': 90, 'svc__gamma': 'scale'}
Best Score: 0.9960564202515207


In [106]:
y_pred = svm_clf.predict(X=X_test)

In [109]:
y_pred

array(['Neutral', 'Happiness', 'Surprise', 'Neutral', 'Neutral',
       'Neutral', 'Neutral', 'Anger', 'Disgust', 'Neutral', 'Anger',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Surprise', 'Neutral',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Anger',
       'Neutral', 'Happiness', 'Happiness', 'Neutral', 'Fear', 'Neutral',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Disgust',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Sadness', 'Happiness',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral',
       'Neutral', 'Happiness', 'Neutral', 'Neutral', 'Neutral', 'Neutral',
       'Neutral', 'Neutral', 'Happiness', 'Contempt', 'Neutral',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Happiness', 'Neutral',
       'Neutral', 'Disgust', 'Neutral', 'Surprise', 'Neutral', 'Surprise',
       'Sadness', 'Surprise', 'Happiness', 'Neutral', 'Surprise',
       'Neutral', 'Surprise', 'Neutral', 'Happiness', 'Contempt',
    

In [110]:
svm_clf.report(X=X_test, y_true=y_test)

Classification Report:
               precision    recall  f1-score   support

       Anger       0.71      0.38      0.50        13
    Contempt       0.00      0.00      0.00         5
     Disgust       1.00      0.61      0.76        18
        Fear       1.00      0.38      0.55         8
   Happiness       0.95      0.86      0.90        21
     Neutral       0.83      0.97      0.89       178
     Sadness       0.75      0.38      0.50         8
    Surprise       0.89      0.64      0.74        25

    accuracy                           0.83       276
   macro avg       0.77      0.53      0.61       276
weighted avg       0.84      0.83      0.82       276



In [108]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_pred=y_pred, y_true=y_test)
cm

array([[  5,   0,   0,   0,   0,   8,   0,   0],
       [  0,   0,   0,   0,   0,   5,   0,   0],
       [  0,   0,  11,   0,   1,   6,   0,   0],
       [  0,   3,   0,   3,   0,   2,   0,   0],
       [  1,   0,   0,   0,  18,   2,   0,   0],
       [  0,   2,   0,   0,   0, 173,   1,   2],
       [  1,   0,   0,   0,   0,   4,   3,   0],
       [  0,   0,   0,   0,   0,   9,   0,  16]])

In [136]:
from sklearn.utils import resample
import pandas as pd

# Combine X and y into a DataFrame
df = pd.DataFrame(X_labelled_pca)
df['label'] = y_labelled

# Create separate dataframes for each class
neutral_df = df[df['label'] == 'Neutral']
minority_df = df[df['label'] != 'Neutral']

# Downsample Neutral to match the minority size
neutral_downsampled = resample(neutral_df, 
                               replace=False, 
                               n_samples=len(minority_df), 
                               random_state=42)

# Combine and shuffle
balanced_subset = pd.concat([neutral_downsampled, minority_df])
balanced_subset = balanced_subset.sample(frac=1, random_state=42)  # shuffle

X_subset = balanced_subset.drop('label', axis=1).values
y_subset = balanced_subset['label'].values


In [137]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59,60,61,62,63,64,65,66,67,label
0,-396.303520,949.051268,-111.104701,-538.943371,199.580067,-845.017971,-349.541572,-580.330680,-373.950313,133.142826,...,145.214582,-45.109076,-105.038878,-229.007887,-200.261830,-48.528872,-87.416680,144.755620,-144.690525,Neutral
1,2100.259086,474.722929,-2320.169145,-600.447396,2387.355348,1092.173397,329.944673,-56.052682,-448.693229,549.181735,...,-381.262964,158.246769,-228.482400,-376.579220,-488.382019,30.056388,79.087073,-125.018024,-45.230635,Neutral
2,404.833623,654.554513,-1387.410773,-66.524433,2169.383725,75.470377,9.505847,770.841489,-848.653878,1008.312867,...,-71.737138,2.699155,-191.099919,-372.914756,-356.559912,-135.285255,243.080246,226.491997,-197.173851,Neutral
3,3109.171996,173.046114,-1376.551194,-394.079477,2913.549884,668.442897,181.869845,25.466892,-580.445211,992.901128,...,2.396512,314.383404,-212.659519,-147.079742,-434.102296,-44.397042,-10.293212,11.472211,-6.268691,Neutral
4,2147.475234,191.194861,-881.364639,-742.537040,2081.106484,-350.717502,-76.491123,102.556122,-265.885635,511.479933,...,-285.786856,337.139618,-160.554938,-125.240813,-352.414820,74.929273,-107.228203,95.685433,59.420565,Neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
915,3480.230358,298.008164,110.478042,-1173.910843,2685.800702,-81.636357,-141.154329,-85.025914,-611.922107,938.644764,...,-136.344071,28.020016,30.671036,-532.954898,84.377678,-94.753970,37.530072,303.226470,-24.601827,Surprise
916,-1425.466716,-27.355083,-536.776608,-601.039476,450.396471,-903.678907,-376.209347,-206.613449,-414.152282,426.446675,...,-216.309523,177.577334,-115.267890,-166.442630,-225.119375,13.379492,-46.102861,32.376972,-35.025196,Surprise
917,2281.093014,52.390126,-1379.147762,-676.383289,1912.865592,312.296603,4.606913,-619.066320,-443.051372,300.917326,...,-133.973307,-71.618230,-182.707330,-180.207003,35.388049,138.225703,-221.413784,61.117156,-11.849902,Surprise
918,1186.409681,1943.653535,-1380.650188,-808.185303,963.090386,1706.000692,588.065425,-230.622254,-778.570315,386.218710,...,-154.780183,148.757109,-15.050085,-583.600784,-205.727097,183.911879,-68.198638,55.874591,135.063733,Surprise


In [138]:
X_subset

array([[ 2.09680522e+03,  1.64658942e+02, -1.93998579e+03, ...,
        -1.65685598e+02,  3.90469542e+00, -1.16358790e+02],
       [ 4.13676691e+03,  1.74880248e+03, -1.96998476e+03, ...,
         9.94847440e+01, -2.56253146e+02,  1.58595535e+02],
       [ 3.68341217e+03,  8.23473236e+02, -1.16086239e+03, ...,
         1.16799859e+00, -4.70913902e+01, -9.42529543e+01],
       ...,
       [-1.93851930e+03, -4.66175209e+02, -5.90004124e+01, ...,
        -1.19412344e+02,  2.18448992e+02, -6.99186794e+01],
       [-1.19441320e+03, -7.16413004e+02,  3.91550274e+02, ...,
         6.16106752e+00, -3.80985094e+01, -4.16924529e+01],
       [ 3.12089458e+03, -7.95874950e+01, -2.25299506e+03, ...,
         2.19233683e+02, -1.63770752e+02,  6.82761473e+01]],
      shape=(654, 68))

In [139]:
import numpy as np


unique, counts = np.unique(y_labelled, return_counts=True)
count_dict = dict(zip(unique, counts))

count_dict

{np.str_('Anger'): np.int64(45),
 np.str_('Contempt'): np.int64(18),
 np.str_('Disgust'): np.int64(59),
 np.str_('Fear'): np.int64(25),
 np.str_('Happiness'): np.int64(69),
 np.str_('Neutral'): np.int64(593),
 np.str_('Sadness'): np.int64(28),
 np.str_('Surprise'): np.int64(83)}

In [140]:
import numpy as np


unique, counts = np.unique(y_subset, return_counts=True)
count_dict = dict(zip(unique, counts))

count_dict

{'Anger': np.int64(45),
 'Contempt': np.int64(18),
 'Disgust': np.int64(59),
 'Fear': np.int64(25),
 'Happiness': np.int64(69),
 'Neutral': np.int64(327),
 'Sadness': np.int64(28),
 'Surprise': np.int64(83)}

In [121]:
## Using subset

In [142]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_subset, y_subset, test_size=0.3, random_state=42,  stratify=y_subset)


In [143]:
svm_clf = SVMClassifier(cv=10, param_grid = {
    'svc__C': [0.1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90,  100],       
    'svc__gamma': ['scale', 0.001, 0.01, 0.1, 1] 
}, kernel='rbf', scoring='f1_macro')

In [144]:
svm_clf.fit(X=X_train, y=y_train)

Best Parameters: {'svc__C': 100, 'svc__gamma': 'scale'}
Best Score: 0.4785311472250739


In [145]:
y_pred = svm_clf.predict(X=X_test)

In [146]:
svm_clf.report(X=X_test, y_true=y_test)

Classification Report:
               precision    recall  f1-score   support

       Anger       0.41      0.50      0.45        14
    Contempt       0.20      0.20      0.20         5
     Disgust       0.92      0.61      0.73        18
        Fear       0.33      0.25      0.29         8
   Happiness       0.89      0.76      0.82        21
     Neutral       0.76      0.91      0.83        98
     Sadness       0.00      0.00      0.00         8
    Surprise       0.90      0.72      0.80        25

    accuracy                           0.73       197
   macro avg       0.55      0.49      0.51       197
weighted avg       0.72      0.73      0.72       197



In [147]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_pred=y_pred, y_true=y_test)
cm

array([[ 7,  1,  0,  0,  0,  6,  0,  0],
       [ 0,  1,  0,  1,  0,  2,  1,  0],
       [ 2,  0, 11,  0,  0,  5,  0,  0],
       [ 0,  0,  0,  2,  2,  3,  0,  1],
       [ 0,  0,  1,  2, 16,  2,  0,  0],
       [ 5,  3,  0,  0,  0, 89,  0,  1],
       [ 1,  0,  0,  0,  0,  7,  0,  0],
       [ 2,  0,  0,  1,  0,  3,  1, 18]])

In [148]:
## Subset plus SMOTE

In [149]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_subset, y_subset, test_size=0.3, random_state=42,  stratify=y_subset)


In [150]:
smote = SMOTE(random_state=42)
X_train_subset_Smote, y_train_subset_Smote = smote.fit_resample(X_train, y_train)

In [154]:
svm_clf = SVMClassifier(cv=10, param_grid = {
    'svc__C': [0.1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90,  100],       
    'svc__gamma': ['scale', 0.001, 0.01, 0.1, 1] 
}, kernel='rbf', scoring='f1_macro')

In [152]:
X_train_subset_Smote

array([[ 1.30730387e+03, -8.53327366e+02, -1.13951346e+03, ...,
        -2.30566855e+02, -6.94791920e+01, -7.79034518e+01],
       [ 1.50432357e+03, -4.84312008e+02, -1.07021055e+03, ...,
        -7.69496495e+01,  2.02684484e+02, -3.12307502e+02],
       [ 6.57342603e+02, -1.74101439e+02, -9.53171200e+02, ...,
         3.31937265e+01, -1.43128769e+00,  1.78243164e+01],
       ...,
       [ 1.18447291e+03, -6.02057209e+02, -1.38893641e+03, ...,
        -1.44429103e+02, -3.09516403e+01, -3.17827157e+01],
       [ 1.10368887e+03,  2.72794568e+02, -1.61530515e+03, ...,
        -2.00133573e+02,  1.55399302e+02, -1.18798948e+02],
       [ 9.45385953e+02,  3.59997393e+02, -1.79010796e+03, ...,
        -2.22539281e+02,  1.59696393e+02, -1.64652117e+02]],
      shape=(1832, 68))

In [153]:
y_train_subset_Smote

array(['Neutral', 'Happiness', 'Neutral', ..., 'Surprise', 'Surprise',
       'Surprise'], shape=(1832,), dtype=object)

In [155]:
svm_clf.fit(X=X_train_subset_Smote, y=y_train_subset_Smote)

Best Parameters: {'svc__C': 60, 'svc__gamma': 'scale'}
Best Score: 0.9900427814924159


In [156]:
y_pred_subset_smote = svm_clf.predict(X=X_test)

In [157]:
svm_clf.report(X=X_test, y_true=y_test)

Classification Report:
               precision    recall  f1-score   support

       Anger       0.50      0.57      0.53        14
    Contempt       0.33      0.20      0.25         5
     Disgust       0.85      0.61      0.71        18
        Fear       0.20      0.12      0.15         8
   Happiness       0.89      0.76      0.82        21
     Neutral       0.75      0.94      0.84        98
     Sadness       0.00      0.00      0.00         8
    Surprise       0.94      0.68      0.79        25

    accuracy                           0.74       197
   macro avg       0.56      0.49      0.51       197
weighted avg       0.72      0.74      0.72       197



In [158]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_pred=y_pred, y_true=y_test)
cm

array([[ 7,  1,  0,  0,  0,  6,  0,  0],
       [ 0,  1,  0,  1,  0,  2,  1,  0],
       [ 2,  0, 11,  0,  0,  5,  0,  0],
       [ 0,  0,  0,  2,  2,  3,  0,  1],
       [ 0,  0,  1,  2, 16,  2,  0,  0],
       [ 5,  3,  0,  0,  0, 89,  0,  1],
       [ 1,  0,  0,  0,  0,  7,  0,  0],
       [ 2,  0,  0,  1,  0,  3,  1, 18]])

In [159]:
import numpy as np


unique, counts = np.unique(y_test, return_counts=True)
count_dict = dict(zip(unique, counts))

count_dict

{'Anger': np.int64(14),
 'Contempt': np.int64(5),
 'Disgust': np.int64(18),
 'Fear': np.int64(8),
 'Happiness': np.int64(21),
 'Neutral': np.int64(98),
 'Sadness': np.int64(8),
 'Surprise': np.int64(25)}