In [47]:
import numpy as np
from sklearn.preprocessing import StandardScaler
import pandas as pd

def csv_values_to_numpy(file_path) -> np.ndarray:
    # Read the file and split values by commas
    array = np.loadtxt(file_path, delimiter=',')
    # Convert the data into a numpy array of floats
    numpy_array = np.array(array, dtype=float)
    return numpy_array

# Emotions in the RAVDESS dataset
emotions = {
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

# all_timbral_spectral_python = all_fvector
dataset = pd.read_csv("./python_csv/ravdess_newest.csv")
# dataset = pd.read_csv("./python_csv/ravdess_newfvector.csv")
# dataset = pd.read_csv("./python_csv/ravdess_bs2048hamming_ov50_nopreemph.csv")
dataset.head(5)

Unnamed: 0,label,class,ASC,ASC_VAR,ASS,ASS_VAR,ASF1,ASF2,ASF3,ASF4,...,MFCC11,MFCC12,MFCC13,MFCC14,MFCC15,MFCC16,MFCC17,MFCC18,MFCC19,MFCC20
0,03-01-01-01-01-01-01,neutral,0.262576,4.105433,1.846318,0.940693,0.568814,0.569572,0.569927,0.570066,...,-6.58907,-1.150713,2.979093,-1.692057,1.399712,2.269598,-1.924479,0.701273,2.745627,-1.022919
1,03-01-01-01-01-02-01,neutral,0.112637,3.723916,1.951623,0.911739,0.562607,0.567296,0.569895,0.570165,...,-6.595328,-0.601736,3.781223,-1.473038,1.07304,1.487969,-2.064317,0.508282,2.030039,-1.230331
2,03-01-01-01-02-01-01,neutral,0.182097,3.721678,1.898036,0.923167,0.579172,0.583726,0.58458,0.58482,...,-6.434525,-1.905909,2.324135,-1.775626,1.560941,2.296244,-1.674526,-1.00093,2.237385,-0.820765
3,03-01-01-01-02-02-01,neutral,-0.096935,3.272078,2.004574,0.80586,0.579285,0.584046,0.586181,0.586307,...,-6.261805,-0.81764,3.121935,0.390796,1.139399,2.495877,-1.836653,0.126683,1.711222,0.108703
4,03-01-03-01-01-01-01,happy,0.076252,3.025101,1.918917,0.948989,0.526643,0.527984,0.528867,0.529189,...,-7.307597,-1.803983,2.294008,-4.650183,-0.937298,1.940438,-3.026904,-1.706932,2.401735,-2.019348


In [48]:
# We would usually use df.describe(), but it provides a bit of a mess of information we don't need at the moment.
def print_features(df):
    # Check MFCC feature values
    features_df = df.iloc[:,2:]
    # print(features_df.shape)
    # print(features_df.head(5))
    feature_min = features_df.min().min()
    feature_max = features_df.max().max()
    # stack all features into a single series so we don't get a mean of means or stdev of stdevs
    feature_mean = features_df.stack().mean()
    feature_stdev = features_df.stack().std()
    print(f'Feature vector statistics:\
    min = {feature_min:.3f},\
    max = {feature_max:.3f},\
    mean = {feature_mean:.3f},\
    deviation = {feature_stdev:.3f}')
print_features(dataset)

Feature vector statistics:    min = -935.504,    max = 5090004.129,    mean = 24817.778,    deviation = 259000.225


# SVM tuning using cross-validation

In [49]:
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

def perform_cross_val(X, y, classifier):
    scaler = StandardScaler() # Gaussian with zero mean and unit variance.
    features_scaled = X
    features_scaled = scaler.fit_transform(features_scaled)
    print('Standard Scaling:')
    features_scaled_df = pd.DataFrame(features_scaled)
    print_features(features_scaled_df)
    scores = cross_val_score(classifier, features_scaled, y, cv=5)
    print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))


In [50]:
from sklearn.svm import SVC
X, y = dataset.iloc[:,2:], dataset.iloc[:,1]
X.shape, y.shape

exponents_c = list(range(-5,15,2))
Cs = [2 ** exp for exp in exponents_c]
exponents_gamma = list(range(-15,3,2))
gammas = [2 ** exp for exp in exponents_gamma]

for C in Cs:
    for gamma in gammas:
        print(f"C={C} gamma={gamma}")
        classifier = SVC(
            C=C,
            gamma=gamma,
            kernel='rbf',
            random_state=2137,
        )
        perform_cross_val(X=X,y=y,classifier=classifier)
        print('-------------------')

C=0.03125 gamma=3.0517578125e-05
Standard Scaling:
Feature vector statistics:    min = -15.646,    max = 15.114,    mean = 0.000,    deviation = 1.000
0.44 accuracy with a standard deviation of 0.05
-------------------
C=0.03125 gamma=0.0001220703125
Standard Scaling:
Feature vector statistics:    min = -15.646,    max = 15.114,    mean = 0.000,    deviation = 1.000
0.44 accuracy with a standard deviation of 0.05
-------------------
C=0.03125 gamma=0.00048828125
Standard Scaling:
Feature vector statistics:    min = -15.646,    max = 15.114,    mean = 0.000,    deviation = 1.000
0.44 accuracy with a standard deviation of 0.05
-------------------
C=0.03125 gamma=0.001953125
Standard Scaling:
Feature vector statistics:    min = -15.646,    max = 15.114,    mean = 0.000,    deviation = 1.000
0.44 accuracy with a standard deviation of 0.05
-------------------
C=0.03125 gamma=0.0078125
Standard Scaling:
Feature vector statistics:    min = -15.646,    max = 15.114,    mean = 0.000,    deviati

# Train SVM and k-NN

In [51]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA

def train_models(X, y, perform_pca: bool = False, pca_components: int = 2, C = 10, gamma = 0.125):

    scaler = StandardScaler() # Gaussian with zero mean and unit variance.
    # keep our unscaled features just in case we need to process them alternatively
    features_scaled = X
    features_scaled = scaler.fit_transform(features_scaled)

    if perform_pca:
        pca = PCA(n_components=pca_components)
        features_scaled = pca.fit_transform(features_scaled)
        print(f"explained ratio: {pca.explained_variance_ratio_}")
        print(f"explained ratio sum: {sum(pca.explained_variance_ratio_)}")

    print('Standard Scaling:')
    features_scaled_df = pd.DataFrame(features_scaled)
    print_features(features_scaled_df)

    X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(
        features_scaled,
        y,
        test_size=0.2, 
        random_state=69
    )

    classification_models = [
        KNeighborsClassifier(
            n_neighbors = 3,
            weights = 'distance',
            algorithm = 'brute',
            leaf_size = 30,
            n_jobs=4
        ),
        SVC(kernel='linear'),
        SVC(
            C=C,
            gamma=gamma,
            kernel='rbf',
            random_state=2137
        ),
        ]

    scores = []
    for model in classification_models:
        model.fit(X_train_scaled, y_train)
        score_train = model.score(X_train_scaled, y_train)
        score = model.score(X_test_scaled, y_test)
        model_name = type(model).__name__
        if model_name=='SVC' and model.kernel=='rbf': model_name+=' RBF kernel'
        scores.append((model_name, (f'{100*score_train:.2f}%'), (f'{100*score:.2f}%')))
    # Make it pretty
    scores_df = pd.DataFrame(scores,columns=['Classifier','Train Accuracy Score', 'Test Accuracy Score'])
    
    print(scores_df.sort_values(by='Test Accuracy Score',axis=0,ascending=False))

In [54]:
perform_pca = False
pca_components = 20
# Tuning SVM RBF
C = 2**3
gamma = 2**-5

In [55]:
feature_subset_map = {
    'BasicSpectral': range(2,56),
    'SignalParameters': range(56, 58),
    'TimbralTemporal': range(58,60),
    'TimbralSpectral': range(60,66),
    'SpectralBasis': range(66,109),
    'MFCC': range(109,129),
}

In [56]:
# Experiment 1: All Available MPEG-7
columns_to_remove = list(feature_subset_map['MFCC'])
dataset_1 = dataset.drop(dataset.columns[columns_to_remove], axis=1)
X_1, y_1 = dataset_1.iloc[:,2:], dataset_1.iloc[:,1]
train_models(X_1, y_1, perform_pca=perform_pca, pca_components=pca_components, C=C, gamma=gamma)

Standard Scaling:
Feature vector statistics:    min = -15.646,    max = 15.114,    mean = 0.000,    deviation = 1.000
             Classifier Train Accuracy Score Test Accuracy Score
1                   SVC               86.16%              69.79%
0  KNeighborsClassifier              100.00%              67.71%
2        SVC RBF kernel              100.00%              63.54%


In [57]:
# Experiment 2: Basic Spectral + Timbral
columns_to_remain = [0,1] + list(feature_subset_map['BasicSpectral']) + list(feature_subset_map['TimbralTemporal']) + list(feature_subset_map['TimbralSpectral'])
dataset_2 = dataset.iloc[:, columns_to_remain]
X_2, y_2 = dataset_2.iloc[:,2:], dataset_2.iloc[:,1]
train_models(X_2, y_2, perform_pca=perform_pca, pca_components=pca_components, C=C, gamma=gamma)

Standard Scaling:
Feature vector statistics:    min = -5.700,    max = 4.601,    mean = 0.000,    deviation = 1.000
             Classifier Train Accuracy Score Test Accuracy Score
1                   SVC               76.24%              78.12%
2        SVC RBF kernel               96.87%              77.08%
0  KNeighborsClassifier              100.00%              70.83%


In [58]:
# Experiment 3: Only MFCCs
columns_to_remain = [0,1] + list(feature_subset_map['MFCC'])
dataset_3 = dataset.iloc[:, columns_to_remain]
X_3, y_3 = dataset_3.iloc[:,2:], dataset_3.iloc[:,1]
train_models(X_3, y_3, perform_pca=perform_pca, pca_components=pca_components, C=C, gamma=gamma)

Standard Scaling:
Feature vector statistics:    min = -3.805,    max = 5.716,    mean = -0.000,    deviation = 1.000
             Classifier Train Accuracy Score Test Accuracy Score
0  KNeighborsClassifier              100.00%              81.25%
2        SVC RBF kernel               93.47%              76.04%
1                   SVC               71.54%              59.38%


In [59]:
# Experiment 4: Basic Spectral + Timbral + 20 MFCCs
columns_to_remain = [0,1] + list(feature_subset_map['BasicSpectral']) + list(feature_subset_map['TimbralTemporal']) + list(feature_subset_map['TimbralSpectral']) + list(feature_subset_map['MFCC'])
dataset_4 = dataset.iloc[:, columns_to_remain]
X_4, y_4 = dataset_4.iloc[:,2:], dataset_4.iloc[:,1]
train_models(X_4, y_4, perform_pca=perform_pca, pca_components=pca_components, C=C, gamma=gamma)

Standard Scaling:
Feature vector statistics:    min = -5.700,    max = 5.716,    mean = 0.000,    deviation = 1.000
             Classifier Train Accuracy Score Test Accuracy Score
2        SVC RBF kernel              100.00%              82.29%
0  KNeighborsClassifier              100.00%              79.17%
1                   SVC               87.47%              72.92%


In [None]:
# Experiment 5: Subcategories of MPEG-7
columns_to_remain_list = [
    [0,1] + list(feature_subset_map['BasicSpectral']),
    [0,1] + list(feature_subset_map['SignalParameters']),
    [0,1] + list(feature_subset_map['TimbralTemporal']),
    [0,1] + list(feature_subset_map['TimbralSpectral']),
    [0,1] + list(feature_subset_map['SpectralBasis'])
]

for columns_to_remain_instance in columns_to_remain_list:
    dataset_5 = dataset.iloc[:, columns_to_remain_instance]
    X_5, y_5 = dataset_5.iloc[:,2:], dataset_5.iloc[:,1]
    train_models(X_5, y_5, perform_pca=perform_pca, pca_components=pca_components, C=C, gamma=gamma)

TypeError: list indices must be integers or slices, not tuple