In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler
import pandas as pd

def csv_values_to_numpy(file_path) -> np.ndarray:
    # Read the file and split values by commas
    array = np.loadtxt(file_path, delimiter=',')
    # Convert the data into a numpy array of floats
    numpy_array = np.array(array, dtype=float)
    return numpy_array

# Emotions in the RAVDESS dataset
emotions = {
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

# all_timbral_spectral_python = all_fvector
dataset = pd.read_csv("./python_csv/ravdess_basspectr_timbral_no_preemphasis.csv")
dataset.head(5)

Unnamed: 0,label,class,ASC,ASS,ASF,LAT,TC,SC,HSC,HSD,HSS,HSV
0,03-01-01-01-01-01-01,neutral,0.326844,1.921181,0.312615,-0.556346,1.578731,6974.496812,2619.138535,0.197505,0.84238,0.074087
1,03-01-01-01-01-02-01,neutral,0.04679,2.017717,0.268841,-0.572473,1.592446,6973.020862,2894.047724,0.189728,0.859455,0.084299
2,03-01-01-01-02-01-01,neutral,0.092315,2.008383,0.280217,-0.808794,1.527533,7016.023531,3141.525852,0.201903,0.832002,0.129106
3,03-01-01-01-02-02-01,neutral,-0.13594,2.048527,0.239789,-0.83509,1.473981,6826.133575,3334.957798,0.184985,0.830947,0.098327
4,03-01-02-01-01-01-01,calm,0.129211,1.862701,0.290359,-0.782845,1.733736,6614.486816,3201.1927,0.187025,0.868279,0.136652


In [2]:
# We would usually use df.describe(), but it provides a bit of a mess of information we don't need at the moment.
def print_features(df):
    # Check MFCC feature values
    features_df = df.iloc[:,2:]
    # print(features_df.shape)
    # print(features_df.head(5))
    feature_min = features_df.min().min()
    feature_max = features_df.max().max()
    # stack all features into a single series so we don't get a mean of means or stdev of stdevs
    feature_mean = features_df.stack().mean()
    feature_stdev = features_df.stack().std()
    print(f'\nSome MPEG-7 features:\
    min = {feature_min:.3f},\
    max = {feature_max:.3f},\
    mean = {feature_mean:.3f},\
    deviation = {feature_stdev:.3f}')
print_features(dataset)


Some MPEG-7 features:    min = -3.064,    max = 7135.559,    mean = 825.156,    deviation = 1760.913


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

def train_models(dataset_subset):

    scaler = StandardScaler() # Gaussian with zero mean and unit variance.
    # keep our unscaled features just in case we need to process them alternatively
    features = dataset_subset.iloc[:,2:]
    features_scaled = features
    features_scaled = scaler.fit_transform(features_scaled)

    print('\033[1m'+'Standard Scaling:\n'+'\033[0m')
    features_scaled_df = pd.DataFrame(features_scaled)
    print_features(features_scaled_df)

    dataset_emotions = dataset_subset.iloc[:,1]

    X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(
        features_scaled,
        dataset_emotions,
        test_size=0.2, 
        random_state=69
    )

    classification_models = [
        KNeighborsClassifier(
            n_neighbors = 5,
            weights = 'distance',
            algorithm = 'brute',
            leaf_size = 30,
            n_jobs=4
        ),
        SVC(kernel='linear'),
        SVC(
            C=10,
            gamma='auto',
            kernel='rbf',
            random_state=69
        ),
        DecisionTreeClassifier(),
        RandomForestClassifier(),
        AdaBoostClassifier(),
        GaussianNB(),
        QuadraticDiscriminantAnalysis(),
        ]

    scores = []
    for model in classification_models:
        model.fit(X_train_scaled, y_train)
        score_train = model.score(X_train_scaled, y_train)
        score = model.score(X_test_scaled, y_test)
        model_name = type(model).__name__
        if model_name=='SVC' and model.kernel=='rbf': model_name+=' RBF kernel'
        scores.append((model_name, (f'{100*score_train:.2f}%'), (f'{100*score:.2f}%')))
    # Make it pretty
    scores_df = pd.DataFrame(scores,columns=['Classifier','Train Accuracy Score', 'Test Accuracy Score'])
    
    print(scores_df.sort_values(by='Test Accuracy Score',axis=0,ascending=False))

In [4]:
# Just Basic Spectral
train_models(dataset.iloc[:,:5])

[1mStandard Scaling:
[0m

Some MPEG-7 features:    min = -1.369,    max = 2.964,    mean = 0.000,    deviation = 1.001
                      Classifier Train Accuracy Score Test Accuracy Score
2                 SVC RBF kernel               32.47%              26.90%
3         DecisionTreeClassifier              100.00%              21.38%
4         RandomForestClassifier              100.00%              20.69%
5             AdaBoostClassifier               27.95%              19.31%
7  QuadraticDiscriminantAnalysis               22.22%              18.62%
0           KNeighborsClassifier              100.00%              17.93%
1                            SVC               19.44%              16.55%
6                     GaussianNB               19.10%              15.17%




In [7]:
# Just Timbral Spectral
dataset_timbral_spectral = dataset.iloc[:, list(range(2)) + list(range(-5, 0))]
# dataset_timbral_spectral.head(5)
train_models(dataset_timbral_spectral)

[1mStandard Scaling:
[0m

Some MPEG-7 features:    min = -3.586,    max = 4.537,    mean = -0.000,    deviation = 1.000
                      Classifier Train Accuracy Score Test Accuracy Score
7  QuadraticDiscriminantAnalysis               35.24%              30.34%
1                            SVC               30.38%              28.97%
4         RandomForestClassifier              100.00%              28.28%
2                 SVC RBF kernel               57.64%              26.90%
6                     GaussianNB               29.17%              24.14%
0           KNeighborsClassifier              100.00%              22.07%
3         DecisionTreeClassifier              100.00%              21.38%
5             AdaBoostClassifier               26.91%              13.10%




In [8]:
# All Timbral
dataset_timbral = dataset.iloc[:, list(range(2)) + list(range(-7, 0))]
# dataset_timbral.head(5)
train_models(dataset_timbral)

[1mStandard Scaling:
[0m

Some MPEG-7 features:    min = -3.586,    max = 4.537,    mean = 0.000,    deviation = 1.000
                      Classifier Train Accuracy Score Test Accuracy Score
2                 SVC RBF kernel               75.52%              35.17%
4         RandomForestClassifier              100.00%              34.48%
7  QuadraticDiscriminantAnalysis               43.06%              31.72%
0           KNeighborsClassifier              100.00%              30.34%
1                            SVC               37.15%              30.34%
3         DecisionTreeClassifier              100.00%              26.90%
6                     GaussianNB               29.69%              26.90%
5             AdaBoostClassifier               30.03%              26.21%




In [6]:
# Basic Spectral (Without ASE) + Timbral
train_models(dataset)

[1mStandard Scaling:
[0m

Some MPEG-7 features:    min = -3.586,    max = 5.392,    mean = -0.000,    deviation = 1.000




                      Classifier Train Accuracy Score Test Accuracy Score
2                 SVC RBF kernel               86.63%              38.62%
4         RandomForestClassifier              100.00%              38.62%
7  QuadraticDiscriminantAnalysis               53.12%              31.03%
0           KNeighborsClassifier              100.00%              30.34%
1                            SVC               43.40%              30.34%
5             AdaBoostClassifier               29.69%              24.14%
3         DecisionTreeClassifier              100.00%              22.76%
6                     GaussianNB               32.47%              22.76%
