In [1]:
import numpy as np

y_labelled = np.load('emotion_labels.npy')
X_labelled_pca = np.load('X_labelled_pca.npy')

In [51]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline

class SVMClassifier:
    def __init__(self, kernel='poly', param_grid=None, cv=5, scoring='accuracy', probability=False):
        self.param_grid = param_grid or {
            'svc__degree': [2, 3, 4],
            'svc__coef0': [0, 1, 5],
            'svc__C': [0.1, 1, 10]
        }
        self.pipeline = Pipeline([
            ('svc', SVC(kernel=kernel, class_weight='balanced', probability=probability))
        ])

        self.grid_search = GridSearchCV(self.pipeline, self.param_grid, cv=cv, scoring=scoring)
        self.best_model = None

    def fit(self, X, y):
        self.grid_search.fit(X, y)
        self.best_model = self.grid_search.best_estimator_
        print("Best Parameters:", self.grid_search.best_params_)
        print("Best Score:", self.grid_search.best_score_)

    def predict(self, X):
        if self.best_model is None:
            raise Exception("Model has not been trained. Call `.fit(X, y)` first.")
        return self.best_model.predict(X)
    
    def report(self, X, y_true):
        y_pred = self.predict(X)
        print("Classification Report:\n", classification_report(y_true, y_pred))


In [101]:
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
X_train, X_test, y_train, y_test = train_test_split(X_labelled_pca, y_labelled, test_size=0.3, random_state=42,  stratify=y_labelled)


In [102]:
import numpy as np


unique, counts = np.unique(y_labelled, return_counts=True)
count_dict = dict(zip(unique, counts))

count_dict

{np.str_('Anger'): np.int64(45),
 np.str_('Contempt'): np.int64(18),
 np.str_('Disgust'): np.int64(59),
 np.str_('Fear'): np.int64(25),
 np.str_('Happiness'): np.int64(69),
 np.str_('Neutral'): np.int64(593),
 np.str_('Sadness'): np.int64(28),
 np.str_('Surprise'): np.int64(83)}

In [103]:
svm_clf = SVMClassifier(cv=10, param_grid = {
    'svc__C': [0.1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90,  100],       
    'svc__gamma': ['scale', 0.001, 0.01, 0.1, 1] 
}, kernel='rbf', scoring='f1_macro')

In [104]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [112]:
X_train

array([[ 4.00245201e+02,  1.31429920e+03, -8.19288829e+01, ...,
        -7.16436622e+01, -7.53521525e+01, -7.18449925e+01],
       [ 2.14747523e+03,  1.91194861e+02, -8.81364639e+02, ...,
        -1.07228203e+02,  9.56854329e+01,  5.94205646e+01],
       [ 2.21178541e+03,  7.43415098e+02, -1.22692141e+03, ...,
        -6.33190605e+01,  1.11993576e+02, -6.22877999e+01],
       ...,
       [ 2.67059487e+03, -6.82254837e+02, -1.75548252e+03, ...,
        -9.89438628e+00, -2.74781834e+01,  5.20158623e+01],
       [ 4.33327701e+02, -1.07726678e+03, -7.80405134e+02, ...,
        -1.04277281e+02, -1.39700642e+02,  6.34564220e-02],
       [ 1.10932392e+03,  1.66515260e+03, -8.34306346e+02, ...,
        -9.17801665e+01,  6.00429870e+01, -1.12222773e+02]],
      shape=(644, 68))

In [111]:
X_train_resampled

array([[  400.24520125,  1314.29920256,   -81.92888292, ...,
          -71.64366222,   -75.35215248,   -71.84499247],
       [ 2147.47523369,   191.19486105,  -881.36463896, ...,
         -107.22820323,    95.68543295,    59.42056456],
       [ 2211.78540826,   743.41509778, -1226.9214102 , ...,
          -63.31906048,   111.99357595,   -62.28779993],
       ...,
       [ -152.61205458,  -549.19312706, -1178.92403603, ...,
         -102.7284136 ,    67.82082177,  -117.07407531],
       [   28.47140028,   105.85970637, -1194.30587301, ...,
           35.36852202,    83.11954833,  -146.04920606],
       [ 1607.25165236,   490.91092164, -1851.55967849, ...,
          103.24460165,   -37.94873302,    75.93426311]], shape=(3320, 68))

In [105]:
svm_clf.fit(X=X_train_resampled, y=y_train_resampled)

Best Parameters: {'svc__C': 90, 'svc__gamma': 'scale'}
Best Score: 0.9960564202515207


In [106]:
y_pred = svm_clf.predict(X=X_test)

In [109]:
y_pred

array(['Neutral', 'Happiness', 'Surprise', 'Neutral', 'Neutral',
       'Neutral', 'Neutral', 'Anger', 'Disgust', 'Neutral', 'Anger',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Surprise', 'Neutral',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Anger',
       'Neutral', 'Happiness', 'Happiness', 'Neutral', 'Fear', 'Neutral',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Disgust',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Sadness', 'Happiness',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral',
       'Neutral', 'Happiness', 'Neutral', 'Neutral', 'Neutral', 'Neutral',
       'Neutral', 'Neutral', 'Happiness', 'Contempt', 'Neutral',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Happiness', 'Neutral',
       'Neutral', 'Disgust', 'Neutral', 'Surprise', 'Neutral', 'Surprise',
       'Sadness', 'Surprise', 'Happiness', 'Neutral', 'Surprise',
       'Neutral', 'Surprise', 'Neutral', 'Happiness', 'Contempt',
    

In [110]:
svm_clf.report(X=X_test, y_true=y_test)

Classification Report:
               precision    recall  f1-score   support

       Anger       0.71      0.38      0.50        13
    Contempt       0.00      0.00      0.00         5
     Disgust       1.00      0.61      0.76        18
        Fear       1.00      0.38      0.55         8
   Happiness       0.95      0.86      0.90        21
     Neutral       0.83      0.97      0.89       178
     Sadness       0.75      0.38      0.50         8
    Surprise       0.89      0.64      0.74        25

    accuracy                           0.83       276
   macro avg       0.77      0.53      0.61       276
weighted avg       0.84      0.83      0.82       276



In [108]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_pred=y_pred, y_true=y_test)
cm

array([[  5,   0,   0,   0,   0,   8,   0,   0],
       [  0,   0,   0,   0,   0,   5,   0,   0],
       [  0,   0,  11,   0,   1,   6,   0,   0],
       [  0,   3,   0,   3,   0,   2,   0,   0],
       [  1,   0,   0,   0,  18,   2,   0,   0],
       [  0,   2,   0,   0,   0, 173,   1,   2],
       [  1,   0,   0,   0,   0,   4,   3,   0],
       [  0,   0,   0,   0,   0,   9,   0,  16]])

In [136]:
from sklearn.utils import resample
import pandas as pd

# Combine X and y into a DataFrame
df = pd.DataFrame(X_labelled_pca)
df['label'] = y_labelled

# Create separate dataframes for each class
neutral_df = df[df['label'] == 'Neutral']
minority_df = df[df['label'] != 'Neutral']

# Downsample Neutral to match the minority size
neutral_downsampled = resample(neutral_df, 
                               replace=False, 
                               n_samples=len(minority_df), 
                               random_state=42)

# Combine and shuffle
balanced_subset = pd.concat([neutral_downsampled, minority_df])
balanced_subset = balanced_subset.sample(frac=1, random_state=42)  # shuffle

X_subset = balanced_subset.drop('label', axis=1).values
y_subset = balanced_subset['label'].values


In [137]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59,60,61,62,63,64,65,66,67,label
0,-396.303520,949.051268,-111.104701,-538.943371,199.580067,-845.017971,-349.541572,-580.330680,-373.950313,133.142826,...,145.214582,-45.109076,-105.038878,-229.007887,-200.261830,-48.528872,-87.416680,144.755620,-144.690525,Neutral
1,2100.259086,474.722929,-2320.169145,-600.447396,2387.355348,1092.173397,329.944673,-56.052682,-448.693229,549.181735,...,-381.262964,158.246769,-228.482400,-376.579220,-488.382019,30.056388,79.087073,-125.018024,-45.230635,Neutral
2,404.833623,654.554513,-1387.410773,-66.524433,2169.383725,75.470377,9.505847,770.841489,-848.653878,1008.312867,...,-71.737138,2.699155,-191.099919,-372.914756,-356.559912,-135.285255,243.080246,226.491997,-197.173851,Neutral
3,3109.171996,173.046114,-1376.551194,-394.079477,2913.549884,668.442897,181.869845,25.466892,-580.445211,992.901128,...,2.396512,314.383404,-212.659519,-147.079742,-434.102296,-44.397042,-10.293212,11.472211,-6.268691,Neutral
4,2147.475234,191.194861,-881.364639,-742.537040,2081.106484,-350.717502,-76.491123,102.556122,-265.885635,511.479933,...,-285.786856,337.139618,-160.554938,-125.240813,-352.414820,74.929273,-107.228203,95.685433,59.420565,Neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
915,3480.230358,298.008164,110.478042,-1173.910843,2685.800702,-81.636357,-141.154329,-85.025914,-611.922107,938.644764,...,-136.344071,28.020016,30.671036,-532.954898,84.377678,-94.753970,37.530072,303.226470,-24.601827,Surprise
916,-1425.466716,-27.355083,-536.776608,-601.039476,450.396471,-903.678907,-376.209347,-206.613449,-414.152282,426.446675,...,-216.309523,177.577334,-115.267890,-166.442630,-225.119375,13.379492,-46.102861,32.376972,-35.025196,Surprise
917,2281.093014,52.390126,-1379.147762,-676.383289,1912.865592,312.296603,4.606913,-619.066320,-443.051372,300.917326,...,-133.973307,-71.618230,-182.707330,-180.207003,35.388049,138.225703,-221.413784,61.117156,-11.849902,Surprise
918,1186.409681,1943.653535,-1380.650188,-808.185303,963.090386,1706.000692,588.065425,-230.622254,-778.570315,386.218710,...,-154.780183,148.757109,-15.050085,-583.600784,-205.727097,183.911879,-68.198638,55.874591,135.063733,Surprise


In [138]:
X_subset

array([[ 2.09680522e+03,  1.64658942e+02, -1.93998579e+03, ...,
        -1.65685598e+02,  3.90469542e+00, -1.16358790e+02],
       [ 4.13676691e+03,  1.74880248e+03, -1.96998476e+03, ...,
         9.94847440e+01, -2.56253146e+02,  1.58595535e+02],
       [ 3.68341217e+03,  8.23473236e+02, -1.16086239e+03, ...,
         1.16799859e+00, -4.70913902e+01, -9.42529543e+01],
       ...,
       [-1.93851930e+03, -4.66175209e+02, -5.90004124e+01, ...,
        -1.19412344e+02,  2.18448992e+02, -6.99186794e+01],
       [-1.19441320e+03, -7.16413004e+02,  3.91550274e+02, ...,
         6.16106752e+00, -3.80985094e+01, -4.16924529e+01],
       [ 3.12089458e+03, -7.95874950e+01, -2.25299506e+03, ...,
         2.19233683e+02, -1.63770752e+02,  6.82761473e+01]],
      shape=(654, 68))

In [139]:
import numpy as np


unique, counts = np.unique(y_labelled, return_counts=True)
count_dict = dict(zip(unique, counts))

count_dict

{np.str_('Anger'): np.int64(45),
 np.str_('Contempt'): np.int64(18),
 np.str_('Disgust'): np.int64(59),
 np.str_('Fear'): np.int64(25),
 np.str_('Happiness'): np.int64(69),
 np.str_('Neutral'): np.int64(593),
 np.str_('Sadness'): np.int64(28),
 np.str_('Surprise'): np.int64(83)}

In [140]:
import numpy as np


unique, counts = np.unique(y_subset, return_counts=True)
count_dict = dict(zip(unique, counts))

count_dict

{'Anger': np.int64(45),
 'Contempt': np.int64(18),
 'Disgust': np.int64(59),
 'Fear': np.int64(25),
 'Happiness': np.int64(69),
 'Neutral': np.int64(327),
 'Sadness': np.int64(28),
 'Surprise': np.int64(83)}

In [121]:
## Using subset

In [142]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_subset, y_subset, test_size=0.3, random_state=42,  stratify=y_subset)


In [143]:
svm_clf = SVMClassifier(cv=10, param_grid = {
    'svc__C': [0.1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90,  100],       
    'svc__gamma': ['scale', 0.001, 0.01, 0.1, 1] 
}, kernel='rbf', scoring='f1_macro')

In [144]:
svm_clf.fit(X=X_train, y=y_train)

Best Parameters: {'svc__C': 100, 'svc__gamma': 'scale'}
Best Score: 0.4785311472250739


In [145]:
y_pred = svm_clf.predict(X=X_test)

In [146]:
svm_clf.report(X=X_test, y_true=y_test)

Classification Report:
               precision    recall  f1-score   support

       Anger       0.41      0.50      0.45        14
    Contempt       0.20      0.20      0.20         5
     Disgust       0.92      0.61      0.73        18
        Fear       0.33      0.25      0.29         8
   Happiness       0.89      0.76      0.82        21
     Neutral       0.76      0.91      0.83        98
     Sadness       0.00      0.00      0.00         8
    Surprise       0.90      0.72      0.80        25

    accuracy                           0.73       197
   macro avg       0.55      0.49      0.51       197
weighted avg       0.72      0.73      0.72       197



In [147]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_pred=y_pred, y_true=y_test)
cm

array([[ 7,  1,  0,  0,  0,  6,  0,  0],
       [ 0,  1,  0,  1,  0,  2,  1,  0],
       [ 2,  0, 11,  0,  0,  5,  0,  0],
       [ 0,  0,  0,  2,  2,  3,  0,  1],
       [ 0,  0,  1,  2, 16,  2,  0,  0],
       [ 5,  3,  0,  0,  0, 89,  0,  1],
       [ 1,  0,  0,  0,  0,  7,  0,  0],
       [ 2,  0,  0,  1,  0,  3,  1, 18]])

In [148]:
## Subset plus SMOTE

In [149]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_subset, y_subset, test_size=0.3, random_state=42,  stratify=y_subset)


In [150]:
smote = SMOTE(random_state=42)
X_train_subset_Smote, y_train_subset_Smote = smote.fit_resample(X_train, y_train)

In [154]:
svm_clf = SVMClassifier(cv=10, param_grid = {
    'svc__C': [0.1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90,  100],       
    'svc__gamma': ['scale', 0.001, 0.01, 0.1, 1] 
}, kernel='rbf', scoring='f1_macro')

In [152]:
X_train_subset_Smote

array([[ 1.30730387e+03, -8.53327366e+02, -1.13951346e+03, ...,
        -2.30566855e+02, -6.94791920e+01, -7.79034518e+01],
       [ 1.50432357e+03, -4.84312008e+02, -1.07021055e+03, ...,
        -7.69496495e+01,  2.02684484e+02, -3.12307502e+02],
       [ 6.57342603e+02, -1.74101439e+02, -9.53171200e+02, ...,
         3.31937265e+01, -1.43128769e+00,  1.78243164e+01],
       ...,
       [ 1.18447291e+03, -6.02057209e+02, -1.38893641e+03, ...,
        -1.44429103e+02, -3.09516403e+01, -3.17827157e+01],
       [ 1.10368887e+03,  2.72794568e+02, -1.61530515e+03, ...,
        -2.00133573e+02,  1.55399302e+02, -1.18798948e+02],
       [ 9.45385953e+02,  3.59997393e+02, -1.79010796e+03, ...,
        -2.22539281e+02,  1.59696393e+02, -1.64652117e+02]],
      shape=(1832, 68))

In [153]:
y_train_subset_Smote

array(['Neutral', 'Happiness', 'Neutral', ..., 'Surprise', 'Surprise',
       'Surprise'], shape=(1832,), dtype=object)

In [None]:
svm_clf.fit(X=X_train_subset_Smote, y=y_train_subset_Smote)

Best Parameters: {'svc__C': 60, 'svc__gamma': 'scale'}
Best Score: 0.9900427814924159


In [156]:
y_pred_subset_smote = svm_clf.predict(X=X_test)

In [157]:
svm_clf.report(X=X_test, y_true=y_test)

Classification Report:
               precision    recall  f1-score   support

       Anger       0.50      0.57      0.53        14
    Contempt       0.33      0.20      0.25         5
     Disgust       0.85      0.61      0.71        18
        Fear       0.20      0.12      0.15         8
   Happiness       0.89      0.76      0.82        21
     Neutral       0.75      0.94      0.84        98
     Sadness       0.00      0.00      0.00         8
    Surprise       0.94      0.68      0.79        25

    accuracy                           0.74       197
   macro avg       0.56      0.49      0.51       197
weighted avg       0.72      0.74      0.72       197



In [158]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_pred=y_pred, y_true=y_test)
cm

array([[ 7,  1,  0,  0,  0,  6,  0,  0],
       [ 0,  1,  0,  1,  0,  2,  1,  0],
       [ 2,  0, 11,  0,  0,  5,  0,  0],
       [ 0,  0,  0,  2,  2,  3,  0,  1],
       [ 0,  0,  1,  2, 16,  2,  0,  0],
       [ 5,  3,  0,  0,  0, 89,  0,  1],
       [ 1,  0,  0,  0,  0,  7,  0,  0],
       [ 2,  0,  0,  1,  0,  3,  1, 18]])

In [159]:
import numpy as np


unique, counts = np.unique(y_test, return_counts=True)
count_dict = dict(zip(unique, counts))

count_dict

{'Anger': np.int64(14),
 'Contempt': np.int64(5),
 'Disgust': np.int64(18),
 'Fear': np.int64(8),
 'Happiness': np.int64(21),
 'Neutral': np.int64(98),
 'Sadness': np.int64(8),
 'Surprise': np.int64(25)}

In [3]:
## new approach

In [52]:
X_train_final, X_test_final, y_train_final, y_test_final = train_test_split(X_labelled_pca, y_labelled, stratify=y_labelled, test_size=0.3)

In [53]:
X_train_final

array([[ 2.84589790e+03,  5.66151585e+01, -8.99322996e+02, ...,
        -3.64235095e+01,  9.39171025e+01,  7.20068692e+01],
       [ 9.40035927e+02,  5.11164415e+02, -2.08017297e+03, ...,
         8.63841427e+01,  6.61884123e+00, -1.37063008e+01],
       [ 2.22098816e+03,  9.34490177e+02, -9.85345093e+02, ...,
         6.22697616e+01, -2.67059861e+02,  2.38087267e+01],
       ...,
       [ 3.65074156e+03,  1.20842372e+03, -1.36938853e+03, ...,
         9.71028121e+01,  3.11992089e+01, -1.14623142e+02],
       [ 2.87722336e+03, -1.07366233e+02, -2.15161091e+03, ...,
         1.77357876e+02, -4.15199093e-01, -7.16536601e+01],
       [-1.10778135e+03, -1.30287065e+03, -2.27840946e+02, ...,
        -1.38706340e+01, -2.52229326e+01,  9.86829008e+01]],
      shape=(644, 68))

In [54]:
y_train_final

array(['Neutral', 'Happiness', 'Neutral', 'Neutral', 'Contempt',
       'Surprise', 'Neutral', 'Neutral', 'Anger', 'Neutral', 'Neutral',
       'Surprise', 'Neutral', 'Disgust', 'Neutral', 'Neutral', 'Disgust',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral',
       'Happiness', 'Neutral', 'Anger', 'Neutral', 'Happiness', 'Neutral',
       'Neutral', 'Neutral', 'Neutral', 'Disgust', 'Surprise', 'Disgust',
       'Fear', 'Neutral', 'Happiness', 'Surprise', 'Disgust', 'Neutral',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Happiness',
       'Happiness', 'Neutral', 'Happiness', 'Happiness', 'Anger', 'Fear',
       'Neutral', 'Neutral', 'Disgust', 'Happiness', 'Neutral', 'Neutral',
       'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral',
       'Neutral', 'Neutral', 'Surprise', 'Neutral', 'Happiness',
       'Neutral', 'Disgust', 'Neutral', 'Neutral', 'Surprise', 'Neutral',
       'Happiness', 'Disgust', 'Neutral', 'Happiness', 'Ne

In [55]:
svm1_y = np.array(['NotNeutral' if val != 'Neutral' else 'Neutral' for val in y_train_final])

In [56]:
svm1_x = X_train_final

In [57]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(svm1_x, svm1_y, test_size=0.3, random_state=42,  stratify=svm1_y)

In [58]:
svm_1 = SVMClassifier(cv=10, param_grid = {
    'svc__C': [0.1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90,  100],       
    'svc__gamma': ['scale', 0.001, 0.01, 0.1, 1] 
}, kernel='rbf', scoring='f1_macro', probability=True)

In [59]:
svm_1.fit(X=X_train, y=y_train)

Best Parameters: {'svc__C': 90, 'svc__gamma': 'scale'}
Best Score: 0.799585521868724


In [60]:
svm_1.report(X=X_test, y_true=y_test)

Classification Report:
               precision    recall  f1-score   support

     Neutral       0.81      0.91      0.86       125
  NotNeutral       0.79      0.61      0.69        69

    accuracy                           0.80       194
   macro avg       0.80      0.76      0.77       194
weighted avg       0.80      0.80      0.80       194



In [61]:
import pandas as pd

# Combine X and y into a DataFrame
df1 = pd.DataFrame(X_train_final)
df1['label'] = y_train_final

In [62]:
df1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59,60,61,62,63,64,65,66,67,label
0,2845.897899,56.615158,-899.322996,-1270.096942,2948.267695,432.701742,134.997794,175.023005,-477.010966,834.282055,...,-273.809330,262.684508,-135.275080,-388.499696,-400.482453,90.899744,-36.423509,93.917102,72.006869,Neutral
1,940.035927,511.164415,-2080.172969,450.720572,2930.336720,860.680250,224.921884,825.773292,-229.929943,764.544975,...,-169.710153,-10.268035,-185.567233,-340.103364,-239.272682,-152.552125,86.384143,6.618841,-13.706301,Happiness
2,2220.988164,934.490177,-985.345093,-587.965845,1639.882754,723.053475,507.817407,-1056.205322,-1257.884920,824.477797,...,-217.909094,166.830981,-445.782638,59.854217,-217.773700,110.878036,62.269762,-267.059861,23.808727,Neutral
3,1672.856564,-895.800267,-984.321644,-1117.430098,2369.974051,105.601434,157.100289,482.477684,-336.409126,711.667521,...,-208.859168,369.533617,-23.985073,-178.624611,-339.551689,253.899239,-142.695613,236.444594,-290.342741,Neutral
4,590.588799,217.817502,-394.360133,-701.559744,1144.381058,-189.134139,-129.987013,-247.003331,-386.258307,304.580381,...,-200.821006,129.817777,-123.248162,-175.145200,-322.595242,-0.453963,58.576818,25.719788,29.710971,Contempt
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
639,1829.629115,213.216976,-1535.589931,-458.547384,3087.997354,248.861442,294.266639,743.621154,-713.318361,977.681766,...,-29.436638,398.390140,-27.365949,-562.221379,-517.957663,-103.459606,51.044105,34.892544,-66.791800,Neutral
640,740.593974,808.741451,-1081.959558,-276.033945,1643.605172,979.826584,640.203586,1117.233204,-524.529220,881.391624,...,-215.660945,-258.342757,-111.417295,-268.727750,-303.645419,248.431468,55.210180,100.742075,29.220600,Happiness
641,3650.741561,1208.423720,-1369.388535,-798.766796,2268.784501,671.012558,306.179588,-224.607220,-662.153851,577.780335,...,-320.025761,59.979432,-220.308057,-152.708629,-330.789830,222.688628,97.102812,31.199209,-114.623142,Disgust
642,2877.223360,-107.366233,-2151.610912,-1154.774205,3867.036969,822.908187,775.131612,1302.678830,-758.124800,1239.919944,...,-4.252794,227.992855,190.898878,-512.740625,-323.011917,114.767668,177.357876,-0.415199,-71.653660,Neutral


In [63]:
other_df = df1[df1['label'] != 'Neutral']

In [64]:
other_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59,60,61,62,63,64,65,66,67,label
1,940.035927,511.164415,-2080.172969,450.720572,2930.336720,860.680250,224.921884,825.773292,-229.929943,764.544975,...,-169.710153,-10.268035,-185.567233,-340.103364,-239.272682,-152.552125,86.384143,6.618841,-13.706301,Happiness
4,590.588799,217.817502,-394.360133,-701.559744,1144.381058,-189.134139,-129.987013,-247.003331,-386.258307,304.580381,...,-200.821006,129.817777,-123.248162,-175.145200,-322.595242,-0.453963,58.576818,25.719788,29.710971,Contempt
5,1253.461595,141.850904,-786.139846,-459.546563,2158.558717,-368.209156,-128.278049,-325.172976,-550.826529,674.112280,...,-229.182032,198.154973,-162.541126,-205.542914,-221.427314,96.621289,73.163109,189.625660,-169.894576,Surprise
8,4735.242083,1356.070030,-872.509726,-1265.296003,3597.109836,1175.353669,490.621589,461.663742,-480.140295,879.941329,...,-304.861375,215.552231,40.750276,-60.712113,121.207916,168.359240,139.393165,-244.057895,-41.609023,Anger
11,1158.277442,-473.495808,-1066.470509,-973.734887,2085.439611,544.616979,315.224011,-217.639472,-307.621864,628.336965,...,-333.150918,142.424491,-246.395284,-297.279042,-278.629596,203.432725,-207.743762,153.474286,-221.959662,Surprise
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
633,-2366.263913,-907.339185,351.980862,-169.207710,-632.156205,-1136.050618,-556.523869,-549.868248,-416.269688,398.240879,...,-193.295388,160.249928,-100.693371,-224.698972,-322.563353,55.202514,24.855233,77.091406,-35.543380,Anger
635,936.259347,682.706635,-1812.792685,-410.349254,1680.697404,768.285817,111.680461,-403.276859,-326.276811,329.383492,...,-505.945325,-53.836218,154.592268,-146.213181,-291.836286,130.169026,-8.562464,-264.700983,26.754983,Happiness
640,740.593974,808.741451,-1081.959558,-276.033945,1643.605172,979.826584,640.203586,1117.233204,-524.529220,881.391624,...,-215.660945,-258.342757,-111.417295,-268.727750,-303.645419,248.431468,55.210180,100.742075,29.220600,Happiness
641,3650.741561,1208.423720,-1369.388535,-798.766796,2268.784501,671.012558,306.179588,-224.607220,-662.153851,577.780335,...,-320.025761,59.979432,-220.308057,-152.708629,-330.789830,222.688628,97.102812,31.199209,-114.623142,Disgust


In [65]:
other_df = other_df.sample(frac=1, random_state=42) 

X_subset = other_df.drop('label', axis=1).values
y_subset = other_df['label'].values

In [66]:
X_subset

array([[ 1784.51491073,   462.57726098, -1023.1893046 , ...,
           58.83036581,   259.32734445,  -159.103999  ],
       [  256.02242908,   399.06922424, -2057.25949583, ...,
          -90.43899129,   153.33546737,  -117.38775079],
       [  470.83947026,   623.73028446, -1311.93731381, ...,
          288.0041779 ,   368.14500789,  -198.05032768],
       ...,
       [ -982.48697325,    98.86179303,  -560.35407035, ...,
           15.96941665,   232.26298572,  -143.84507455],
       [ 2610.85724388,   289.46335363, -1033.89066872, ...,
           21.01367607,    31.3016986 ,   -30.60538732],
       [  365.88315137,   204.84624556,   545.5954225 , ...,
          281.79988374,   -91.14701621,  -106.69380244]], shape=(229, 68))

In [67]:
y_subset

array(['Fear', 'Surprise', 'Happiness', 'Surprise', 'Surprise',
       'Surprise', 'Surprise', 'Anger', 'Fear', 'Anger', 'Surprise',
       'Happiness', 'Sadness', 'Surprise', 'Disgust', 'Fear', 'Sadness',
       'Disgust', 'Anger', 'Sadness', 'Anger', 'Disgust', 'Happiness',
       'Happiness', 'Happiness', 'Happiness', 'Surprise', 'Fear',
       'Disgust', 'Disgust', 'Surprise', 'Surprise', 'Disgust',
       'Surprise', 'Disgust', 'Surprise', 'Disgust', 'Happiness',
       'Sadness', 'Sadness', 'Surprise', 'Anger', 'Anger', 'Anger',
       'Disgust', 'Happiness', 'Surprise', 'Surprise', 'Happiness',
       'Happiness', 'Anger', 'Surprise', 'Disgust', 'Sadness', 'Sadness',
       'Disgust', 'Surprise', 'Happiness', 'Anger', 'Happiness',
       'Surprise', 'Anger', 'Disgust', 'Disgust', 'Anger', 'Happiness',
       'Surprise', 'Sadness', 'Anger', 'Disgust', 'Surprise', 'Sadness',
       'Fear', 'Surprise', 'Anger', 'Happiness', 'Happiness', 'Happiness',
       'Happiness', 'Disgust', '

In [68]:
from sklearn.model_selection import train_test_split

X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_subset, y_subset, test_size=0.3, random_state=42,  stratify=y_subset)

In [69]:
svm_2 = SVMClassifier(cv=10, param_grid = {
    'svc__C': [0.1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90,  100],       
    'svc__gamma': ['scale', 0.001, 0.01, 0.1, 1] 
}, kernel='rbf', scoring='f1_macro')

In [70]:
svm_2.fit(X=X_train_2, y=y_train_2)



Best Parameters: {'svc__C': 70, 'svc__gamma': 'scale'}
Best Score: 0.4180385487528344


In [71]:
svm_2.report(X=X_test_2, y_true=y_test_2)

Classification Report:
               precision    recall  f1-score   support

       Anger       0.22      0.20      0.21        10
    Contempt       0.67      0.50      0.57         4
     Disgust       0.50      0.50      0.50        12
        Fear       0.00      0.00      0.00         5
   Happiness       0.73      0.79      0.76        14
     Sadness       0.40      0.33      0.36         6
    Surprise       0.73      0.89      0.80        18

    accuracy                           0.57        69
   macro avg       0.46      0.46      0.46        69
weighted avg       0.53      0.57      0.54        69



In [72]:
final_pred = []

for i in range(len(X_test_final)):
    x = X_test_final[i].reshape(1, -1)

    pred1 = svm_1.predict(x)[0]

    if(pred1 == "Neutral"):
        final_pred.append("Neutral")
    else:
        pred2 = svm_2.predict(x)[0]
        final_pred.append(pred2)

In [73]:
print(classification_report(y_pred=final_pred, y_true=y_test_final))

              precision    recall  f1-score   support

       Anger       0.21      0.23      0.22        13
    Contempt       0.00      0.00      0.00         5
     Disgust       0.67      0.44      0.53        18
        Fear       0.20      0.12      0.15         8
   Happiness       0.82      0.86      0.84        21
     Neutral       0.85      0.92      0.88       178
     Sadness       0.20      0.12      0.15         8
    Surprise       0.78      0.72      0.75        25

    accuracy                           0.77       276
   macro avg       0.47      0.43      0.44       276
weighted avg       0.75      0.77      0.75       276



In [74]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_pred=final_pred, y_true=y_test_final)
cm

array([[  3,   0,   1,   0,   0,   7,   1,   1],
       [  0,   0,   1,   0,   0,   3,   1,   0],
       [  4,   0,   8,   1,   1,   3,   0,   1],
       [  1,   0,   0,   1,   2,   3,   0,   1],
       [  0,   1,   1,   0,  18,   1,   0,   0],
       [  5,   2,   1,   2,   1, 163,   2,   2],
       [  1,   0,   0,   0,   0,   6,   1,   0],
       [  0,   0,   0,   1,   0,   6,   0,  18]])