In [141]:
import numpy as np
import pandas as pd

def csv_values_to_numpy(file_path) -> np.ndarray:
    # Read the file and split values by commas
    array = np.loadtxt(file_path, delimiter=',')
    # Convert the data into a numpy array of floats
    numpy_array = np.array(array, dtype=float)
    return numpy_array

# Emotions in the RAVDESS dataset
emotions = {
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

# all_timbral_spectral_python = all_fvector
# dataset = pd.read_csv("./python_csv/ravdess_3classes_preemphasis.csv")
# dataset = pd.read_csv("./python_csv/ravdess_wholedataset.csv")
dataset = pd.read_csv("./python_csv/nEMO_wholedataset.csv")
dataset = dataset.fillna(0)
# dataset = pd.read_csv("./python_csv/ravdess_newest.csv")
# dataset = pd.read_csv("./python_csv/ravdess_newfvector.csv")
# dataset = pd.read_csv("./python_csv/ravdess_bs2048hamming_ov50_nopreemph.csv")
dataset = dataset.drop_duplicates(subset='label')
dataset.head(5), len(dataset)
# dataset.iloc[40:4000:200,:]

(          label  class       ASC   ASC_VAR       ASS   ASS_VAR      ASF1  \
 0   EB0_anger_1  anger -1.014424  1.664194  1.316892  0.381699  0.018763   
 1  EB0_anger_10  anger -0.981852  0.980242  1.067413  0.267500  0.028557   
 2  EB0_anger_11  anger -1.308893  0.833898  1.005187  0.196945  0.028928   
 3  EB0_anger_12  anger -1.196214  1.190325  1.099372  0.384295  0.036835   
 4  EB0_anger_13  anger -1.061688  1.266889  1.203192  0.414269  0.039796   
 
        ASF2      ASF3      ASF4  ...     MFCC11    MFCC12    MFCC13  \
 0  0.019457  0.021270  0.022729  ...  -6.794261 -1.364080 -8.251892   
 1  0.029120  0.035248  0.035732  ...  -7.334883  0.527785 -9.577043   
 2  0.040296  0.045253  0.046051  ... -12.745518  4.687151 -2.866292   
 3  0.040647  0.046487  0.049039  ... -10.367361  0.666284 -9.921471   
 4  0.043942  0.046717  0.047542  ... -16.382733  1.155685 -5.097772   
 
       MFCC14    MFCC15     MFCC16     MFCC17    MFCC18    MFCC19     MFCC20  
 0  -7.603094 -1.047732

In [142]:
# We would usually use df.describe(), but it provides a bit of a mess of information we don't need at the moment.
def print_features(df):
    # Check MFCC feature values
    features_df = df.iloc[:,2:]
    # print(features_df.shape)
    # print(features_df.head(5))
    feature_min = features_df.min().min()
    feature_max = features_df.max().max()
    # stack all features into a single series so we don't get a mean of means or stdev of stdevs
    feature_mean = features_df.stack().mean()
    feature_stdev = features_df.stack().std()
    print(f'Feature vector statistics:\
    min = {feature_min:.3f},\
    max = {feature_max:.3f},\
    mean = {feature_mean:.3f},\
    deviation = {feature_stdev:.3f}')
print_features(dataset)

Feature vector statistics:    min = -1070.190,    max = 5704079.508,    mean = 16373.052,    deviation = 190434.915


# SVM tuning using cross-validation

In [143]:
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import Normalizer

def perform_cross_val(X, y, classifier):
    scaler = StandardScaler() # Gaussian with zero mean and unit variance.
    features_scaled = X
    features_scaled = scaler.fit_transform(features_scaled)
    print('Standard Scaling:')
    features_scaled_df = pd.DataFrame(features_scaled)
    print_features(features_scaled_df)
    scores = cross_val_score(classifier, features_scaled, y, cv=5)
    print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))


# Train SVM and k-NN

In [144]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt


def train_models(X, y, scaler = None, perform_pca: bool = False, pca_components: int = 2, C = 10, gamma = 0.125):

    if scaler is None:
        scaler = StandardScaler()
    # keep our unscaled features just in case we need to process them alternatively
    features_scaled = X
    features_scaled = scaler.fit_transform(features_scaled)

    if perform_pca:
        pca = PCA(n_components=pca_components)
        features_scaled = pca.fit_transform(features_scaled)
        # print(f"explained ratio: {pca.explained_variance_ratio_}")
        # print(f"explained ratio sum: {sum(pca.explained_variance_ratio_)}")

    # print('Standard Scaling:')
    # features_scaled_df = pd.DataFrame(features_scaled)
    # print_features(features_scaled_df)

    X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(
        features_scaled,
        y,
        test_size=0.2, 
        random_state=69
    )

    classification_models = [
        KNeighborsClassifier(
            n_neighbors = 3,
            weights = 'distance',
            algorithm = 'brute',
            leaf_size = 30,
            n_jobs=4
        ),
        SVC(kernel='linear'),
        SVC(
            C=C,
            gamma=gamma,
            kernel='rbf',
            random_state=2137
        ),
        ]

    scores = []
    # Create subplots for 1x3 layout
    # fig, axes = plt.subplots(3, 1, figsize=(6, 18))  # Adjust the figure size as needed

    for i, model in enumerate(classification_models):
        model.fit(X_train_scaled, y_train)
        score_train = model.score(X_train_scaled, y_train)
        score = model.score(X_test_scaled, y_test)
        model_name = type(model).__name__
        if model_name == 'SVC' and model.kernel == 'rbf': 
            model_name += ' RBF kernel'
        scores.append((model_name, f'{100 * score_train:.2f}%', f'{100 * score:.2f}%'))

        # # Confusion matrix
        # predictions = model.predict(X_test_scaled)
        # cm = confusion_matrix(y_test, predictions, labels=model.classes_)
        # labels_pl_nEMO = ["złość", "strach", "szczęście", "neutralność", "smutek", "zaskoczenie"]
        
        # # Use subplot axes
        # disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels_pl_nEMO)
        # disp.plot(ax=axes[i], colorbar=False)  # Add `colorbar=False` to simplify the layout
        # model_names_pl = ["k-NN", "SVM, jądro libiowe", "SVM, jądro RBF"]
        # axes[i].set_title(f"Macierz pomyłek: {model_names_pl[i]}")
        # axes[i].set_xticklabels(labels_pl_nEMO, rotation=45, ha='right')  # Rotate x-ticks
        # axes[i].set_xlabel("Klasa przewidywana")
        # axes[i].set_ylabel("Klasa rzeczywista")

    # Make it pretty
    scores_df = pd.DataFrame(scores,columns=['Classifier','Train Accuracy Score', 'Test Accuracy Score'])
    
    print(scores_df.sort_values(by='Test Accuracy Score',axis=0,ascending=False))
    # Adjust layout to avoid overlap
    # plt.tight_layout()
    # plt.show()

In [145]:
perform_pca = True
pca_components = 20
# Tuning SVM RBF
C = 2**3
gamma = 2**-5

In [146]:
feature_subset_map = {
    'BasicSpectral': range(2,56),
    'SignalParameters': range(56, 58),
    'TimbralTemporal': range(58,60),
    'TimbralSpectral': range(60,66),
    'SpectralBasis': range(66,109),
    'MFCC': range(109,129),
}

In [147]:
# Experiment 1: All Available MPEG-7
columns_to_remove = list(feature_subset_map['MFCC'])
dataset_1 = dataset.drop(dataset.columns[columns_to_remove], axis=1)
X_1, y_1 = dataset_1.iloc[:,2:], dataset_1.iloc[:,1]
train_models(X_1, y_1, perform_pca=perform_pca, pca_components=pca_components, C=C, gamma=gamma)

             Classifier Train Accuracy Score Test Accuracy Score
2        SVC RBF kernel               95.84%              66.89%
0  KNeighborsClassifier              100.00%              57.97%
1                   SVC               50.70%              49.72%


In [148]:
# Experiment 2: Basic Spectral + Timbral
columns_to_remain = [0,1] + list(feature_subset_map['BasicSpectral']) + list(feature_subset_map['TimbralTemporal']) + list(feature_subset_map['TimbralSpectral'])
dataset_2 = dataset.iloc[:, columns_to_remain]
X_2, y_2 = dataset_2.iloc[:,2:], dataset_2.iloc[:,1]
train_models(X_2, y_2, perform_pca=perform_pca, pca_components=pca_components, C=C, gamma=gamma)

             Classifier Train Accuracy Score Test Accuracy Score
2        SVC RBF kernel               94.34%              76.25%
0  KNeighborsClassifier              100.00%              67.67%
1                   SVC               59.77%              58.97%


In [149]:
# Experiment 3: Only MFCCs
columns_to_remain = [0,1] + list(feature_subset_map['MFCC'])
dataset_3 = dataset.iloc[:, columns_to_remain]
X_3, y_3 = dataset_3.iloc[:,2:], dataset_3.iloc[:,1]
train_models(X_3, y_3, perform_pca=perform_pca, pca_components=pca_components, C=C, gamma=gamma)

             Classifier Train Accuracy Score Test Accuracy Score
2        SVC RBF kernel               96.65%              88.52%
0  KNeighborsClassifier              100.00%              84.50%
1                   SVC               61.22%              56.63%


In [150]:
# Experiment 4: Basic Spectral + Timbral + 20 MFCCs
columns_to_remain = [0,1] + list(feature_subset_map['BasicSpectral']) + list(feature_subset_map['TimbralTemporal']) + list(feature_subset_map['TimbralSpectral']) + list(feature_subset_map['MFCC'])
dataset_4 = dataset.iloc[:, columns_to_remain]
X_4, y_4 = dataset_4.iloc[:,2:], dataset_4.iloc[:,1]
train_models(X_4, y_4, perform_pca=perform_pca, pca_components=pca_components, C=C, gamma=gamma)

             Classifier Train Accuracy Score Test Accuracy Score
2        SVC RBF kernel               99.89%              88.85%
0  KNeighborsClassifier              100.00%              82.16%
1                   SVC               62.28%              59.98%


In [151]:
# Experiment 5: Subcategories of MPEG-7
columns_to_remain_list = [
    [0,1] + list(feature_subset_map['BasicSpectral']),
    [0,1] + list(feature_subset_map['SignalParameters']),
    [0,1] + list(feature_subset_map['TimbralTemporal']),
    [0,1] + list(feature_subset_map['TimbralSpectral']),
    [0,1] + list(feature_subset_map['SpectralBasis']),
]
for columns_to_remain_instance in columns_to_remain_list:
    dataset_5 = dataset.iloc[:, columns_to_remain_instance]
    X_5, y_5 = dataset_5.iloc[:,2:], dataset_5.iloc[:,1]
    train_models(X_5, y_5, perform_pca=perform_pca, pca_components=pca_components, C=C, gamma=gamma)

             Classifier Train Accuracy Score Test Accuracy Score
2        SVC RBF kernel               87.30%              75.25%
0  KNeighborsClassifier              100.00%              66.44%
1                   SVC               56.19%              57.08%


ValueError: n_components=20 must be between 0 and min(n_samples, n_features)=2 with svd_solver='full'

In [None]:
# from sklearn.svm import SVC

# # Tune Experiment2
# # X_tune, y_tune = X_2, y_2
# # Tune Experiment3
# # X_tune, y_tune = X_3, y_3
# # Tune Experiment4
# X_tune, y_tune = X_4, y_4
# # X, y = dataset.iloc[:,2:], dataset.iloc[:,1]
# # X.shape, y.shape

# exponents_c = list(range(-5,15,2))
# exponents_gamma = list(range(-15,3,2))

# for exponent_c in exponents_c:
#     for exponent_gamma in exponents_gamma:
#         print(f"exponent_c={exponent_c} exponent_gamma={exponent_gamma}")
#         classifier = SVC(
#             C=2**exponent_c,
#             gamma=2**exponent_gamma,
#             kernel='rbf',
#             random_state=2137,
#         )
#         perform_cross_val(X=X_tune,y=y_tune,classifier=classifier)
#         print('-------------------')

In [None]:
# from sklearn.model_selection import GridSearchCV
# from sklearn.svm import SVC

# # Define the SVM model
# svm = SVC(kernel='rbf', random_state=2137)

# # Define the parameter grid for C and gamma
# exponents_c = list(range(-5,15,2))
# exponents_gamma = list(range(-15,3,2))
# param_grid = {
#     'C': [2 ** exp for exp in exponents_c],
#     'gamma': [2 ** exp for exp in exponents_gamma]
# }

# # Perform grid search with cross-validation
# grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=5, scoring='accuracy')
# # Tune Experiment2
# X_tune, y_tune = X_2, y_2
# # Tune Experiment3
# # X_tune, y_tune = X_3, y_3
# # Tune Experiment4
# # X_tune, y_tune = X_4, y_4
# grid_search.fit(X_tune, y_tune)

# # Best parameters
# print("Best parameters:", grid_search.best_params_)

# # Best score
# print("Best cross-validation score:", grid_search.best_score_)


In [None]:
C, gamma

(8, 0.03125)

# Visualization

In [None]:
# import sys
# sys.path.append("..")
# import plap
# import numpy as np

# block_size = 1024
# overlap = 50
# step = int((100 - overlap) / 100 * block_size)
# window_type = "hamming"

# input_file = "C:/Users/windo/Documents/ravdess-dataset/Actor_21/03-01-01-01-01-02-21.wav"
# preprocessor = plap.Preprocessor(preemphasis_coeff=0.68, block_size=block_size, window_type=window_type)
# fvector = plap.FeatureVector("asc")
# # fvector = plap.FeatureVector("ase","asf")

# plap.parameterize(audio_path=input_file, fvector=fvector, preprocessor=preprocessor)

# import librosa
# signal, sr = librosa.load(input_file, sr=None)

# power_spectrum = np.abs(librosa.stft(y=signal, n_fft=block_size, hop_length=step, window=window_type)) ** 2


# # Find ASF middle of band freqs
# low_edge = 250
# high_edge = 16000
# num_bands = int(np.floor(4 * np.log2(high_edge / low_edge)))
# freqs = librosa.fft_frequencies(sr=sr, n_fft=block_size)

# band_centers = []
# for k in range(num_bands):
#     # Get the frequency indices for the current band
#     f_low = low_edge * (2 ** (k / 4))
#     f_high = high_edge * (2 ** ((k+1) / 4))
#     band_centers.append(np.searchsorted(freqs, (f_high+f_low)/2))

# import matplotlib.pyplot as plt
# # times = librosa.times_like(power_spectrum)
# # fig, ax = plt.subplots()
# # librosa.display.specshow(librosa.amplitude_to_db(power_spectrum, ref=np.max),
# #                          y_axis='log', x_axis='time', ax=ax)
# # # ax.legend(loc='upper right')
# # ax.set(title='log Power spectrogram')

# # ase = fvector.values[:372]
# # asf = fvector.values[373:]
# # Plot
# # plt.figure(figsize=(10, 5))
# # plt.plot(ase, label='Array 1 (1, 372)', marker='o')
# # plt.plot(band_centers, asf, label='Array 2 (1, 24)', marker='x')
# # plt.legend()
# # plt.title("Two Arrays with Different Lengths")
# # plt.xlabel("Index")
# # plt.ylabel("Value")
# # plt.show()
# fvector.values.shape