### Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import librosa
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.utils import shuffle
from algorithms import encode_columns, svm_model, accuracy_calculator, random_forest_model, array_column_spread
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

### Loading the dataset using Pandas
The data is found in development.csv (development set): a comma-separated values file containing the records from the development set. This portion does have the action and object columns, which you should use to obtain the labels to train and validate your models.
The dataset consists in a collection of audio file in a WAV format. 

Each record is characterized by several attributes. The following is a short description for each of them.
- path: the path of the audio file.
- speakerId: the id of the speaker.
- action: the type of action required through the intent.
- object: the device involved by intent.
- Self-reported fluency level: the speaking fluency of the speaker.
- First Language spoken: the first language spoken by the speaker.
- Current language used for work/school: the main language spoken by the speaker during daily activities.
- gender: the gender of the speaker.
- ageRange: the age range of the speaker.

In [2]:
df = pd.read_csv("dsl_data/development.csv")
df_eval = pd.read_csv("dsl_data/evaluation.csv")

In [3]:
y = df.values[:,3:5].sum(axis=1)
y

array(['change languagenone', 'activatemusic', 'deactivatelights', ...,
       'deactivatelights', 'deactivatelights', 'increasevolume'],
      dtype=object)

In [4]:
column_names = ["Self-reported fluency level ","First Language spoken", "Current language used for work/school", "speakerId", "gender","ageRange"]


In [5]:
encode_columns(df_eval, column_names)

In [6]:

encode_columns(df,column_names)

In [7]:
data_array= []
rate_array = []
for audio in df['path']:
        data, rate = librosa.load(audio)
        data_array.append(data)
        rate_array.append(rate)

df['data'] = data_array
df['rate'] = rate_array

### Chroma feature

In [8]:
def chroma_feature():
    chroma_array = []
    for data, rate in zip(df['data'], df['rate']):
        chroma = librosa.feature.chroma_stft(y=data, sr=rate)
        chroma_mean = np.mean(chroma, axis=1)
        chroma_array.append(chroma_mean)

    df['chroma'] = chroma_array
chroma_feature()

In [9]:
# df['mean_chroma'] = array_column_spread(df,'chroma')

array_column_spread(df,'chroma')

### Tonnetz feature

In [11]:
def tonnetz_feature():
    tonnetz_array = []
    for data, rate in zip(df['data'], df['rate']):
        tonnetz = librosa.feature.tonnetz(y=data, sr=rate)
        tonnetz_mean = np.mean(tonnetz, axis=1)
        tonnetz_array.append(tonnetz_mean)

    df['tonnetz'] = tonnetz_array
tonnetz_feature()

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **

In [56]:
# df['mean_tonnetz'] = array_column_spread(df,'tonnetz')
array_column_spread(df, 'tonnetz')

### Spectral Contrast

In [57]:
def spectral_contrast():
    spectral_contrast_array = []
    for data, rate in zip(df['data'], df['rate']):
        spectral_contrast = librosa.feature.spectral_contrast(y=data, sr=rate)
        spectral_contrast_mean = np.mean(spectral_contrast, axis=1)
        spectral_contrast_array.append(spectral_contrast_mean)
    df['spectral_contrast'] = spectral_contrast_array
spectral_contrast()
    

In [61]:
# df['mean_spectral_contrast'] = array_column_spread(df,'spectral_contrast')
array_column_spread(df, 'spectral_contrast')

### Root Mean Square Energy(RMSE)

In [66]:
def rmse_feature():
    # create an empty list to store the RMSE values
    rmse_list = []

    # iterate through the audio files in the dataset
    for data, rate in zip(df['data'], df['rate']):
        # calculate the root mean square energy
        rmse = librosa.feature.rms(y=data)
        # take the mean of the rmse
        rmse_mean = np.mean(rmse)
        # append the rmse mean to the rmse_list
        rmse_list.append(rmse_mean)

    # add the rmse_list as a new column to the dataframe
    df['rmse'] = rmse_list
rmse_feature()

### Spectral Flatness (SF)

In [67]:
def spectral_flatness():
    # create an empty list to store the SF values
    sf_list = []

    # iterate through the audio files in the dataset
    for data, rate in zip(df['data'], df['rate']):
        # calculate the spectral flatness
        sf = librosa.feature.spectral_flatness(y=data)
        # take the mean of the SF
        sf_mean = np.mean(sf)
        # append the SF mean to the sf_list
        sf_list.append(sf_mean)

    # add the sf_list as a new column to the dataframe
    df['sf'] = sf_list

spectral_flatness()

### Spectral Roll-off (SRO)

In [68]:
def sro_feature():
    # Create an empty list to store the spectral roll-off values
    spectral_rolloff_array = []

    for data, rate in zip(df['data'], df['rate']):
        spectral_rolloff = librosa.feature.spectral_rolloff(y=data, sr=rate)
        spectral_rolloff_mean = np.mean(spectral_rolloff)
        spectral_rolloff_array.append(spectral_rolloff_mean)

    # Add the spectral roll-off values to the dataframe as a new column
    df['spectral_rolloff'] = spectral_rolloff_array

sro_feature()

### Zero-Crossing Rate

In [70]:
def zcr_feature():
    # Create arrays to store the zero-crossing rate values
    zero_crossing_rate_array = []

    for data in df['data']:
        
        # Compute the zero-crossing rate for the current audio file
        zero_crossing_rate = sum(librosa.zero_crossings(data))
        # Append the zero-crossing rate to the zero_crossing_rate_array
        zero_crossing_rate_array.append(zero_crossing_rate)

    # Add the zero-crossing rate arrays as new columns in the dataframe
    df['zero_crossing_rate'] = zero_crossing_rate_array

zcr_feature()


### Mel-frequency cepstral coefficients (MFCC)

In [71]:
def mfcc_feature():
    # Create arrays to store the mfcc rate values
    mfcc_array = []

    for data, rate in zip(df['data'], df['rate']):
        
        # Compute the mfccs for the current audio file
        mfcc = librosa.feature.mfcc(y=data, sr=rate, n_mfcc=50)
        # Compute the mean of the mfccs and append it to the mfcc_array
        mfcc_mean = np.mean(mfcc, axis=1)
        mfcc_array.append(mfcc_mean)

    # Add the mfcc as a new column in the dataframe
    df['mfcc'] = mfcc_array

mfcc_feature()

In [77]:
# df['mean_mfcc'] = df['mfcc'].apply(lambda x: sum(x) / len(x))

df['mean_mfcc'] = array_column_spread(df,'mfcc')

In [78]:
df.columns

Index(['Id', 'path', 'action', 'object', 'Self-reported fluency level ',
       'First Language spoken', 'Current language used for work/school',
       'speakerId', 'gender', 'ageRange', 'data', 'rate', 'chroma',
       'mean_chroma', 'tonnetz', 'mean_tonnetz', 'spectral_contrast',
       'mean_spectral_contrast', 'rmse', 'sf', 'spectral_rolloff',
       'zero_crossing_rate', 'mfcc', 'mean_mfcc'],
      dtype='object')

In [83]:
features = ['Self-reported fluency level ',
       'First Language spoken','speakerId', 'gender', 'ageRange','mean_chroma','mean_tonnetz','mean_spectral_contrast', 
       'rmse', 'sf', 'spectral_rolloff',
       'zero_crossing_rate','mean_mfcc']

In [113]:
# X_eval = df_eval[features].copy()
# X_eval

In [84]:
X = df[features].copy()
X

Unnamed: 0,Self-reported fluency level,First Language spoken,speakerId,gender,ageRange,mean_chroma,mean_tonnetz,mean_spectral_contrast,rmse,sf,spectral_rolloff,zero_crossing_rate,mean_mfcc
0,0,1,0,0,0,0.344729,0.013238,25.438396,0.013352,0.005090,4646.652561,9479,-10.598666
1,0,1,0,0,0,0.413149,-0.021496,23.500646,0.008137,0.008828,3541.682409,6519,-11.506034
2,0,1,0,0,0,0.422798,-0.011401,23.197301,0.004666,0.005855,3523.972024,9087,-11.398731
3,0,1,0,0,0,0.461785,0.015130,21.895512,0.018952,0.011705,3468.663441,7641,-10.909122
4,0,1,0,0,0,0.476735,0.027568,21.571120,0.006073,0.011597,3102.216797,13085,-11.664955
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9849,3,1,76,1,0,0.634121,0.008615,23.579975,0.037891,0.000036,1555.043987,1483,-5.796893
9850,3,1,76,1,0,0.614684,0.001136,22.845198,0.029514,0.000121,2363.552375,3957,-5.746099
9851,3,1,76,1,0,0.607508,0.010426,23.911601,0.037515,0.000120,2171.453536,3471,-5.386017
9852,3,1,76,1,0,0.713582,-0.005653,22.243380,0.026489,0.000139,1870.266357,2491,-5.889110


In [85]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [149]:
# y_pred_eval = svm_model(X, y, X_eval)

# predictions = pd.DataFrame(y_pred_eval,columns=["Predicted"])
# predictions.to_csv('my_data.csv', index=True)

In [86]:
y_pred = svm_model(X_train, y_train, X_test)
svm_accuracy = accuracy_calculator(y_test, y_pred)

In [87]:
svm_accuracy

0.2648401826484018

In [141]:
y_pred_eval = random_forest_model(X , X_eval, y)

predictions = pd.DataFrame(y_pred_eval,columns=["Predicted"])
predictions.to_csv('predictions.csv', index=True)


NameError: name 'X_eval' is not defined

In [88]:
y_pred = random_forest_model(X_train, X_test, y_train)
random_forest_accuracy = accuracy_calculator(y_test, y_pred)
random_forest_accuracy

0.3287671232876712

In [89]:
## Implementation using k-fold
from sklearn.model_selection import KFold, cross_val_score
# define the number of folds and whether to shuffle the data
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# create an instance of the SVM model
clf = SVC()

# use cross_val_score function to perform k-fold cross-validation
scores = cross_val_score(clf, X, y, cv=kf, scoring='accuracy')

# print the mean accuracy and standard deviation
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std()))

Accuracy: 0.27 (+/- 0.01)


In [90]:
clf = RandomForestClassifier(n_estimators=100, random_state=0)

# shuffle the data before performing k-fold cross validation
X, y = shuffle(X, y)

# perform k-fold cross validation with 5 folds
scores = cross_val_score(clf, X, y, cv=5)

# calculate the mean accuracy of the model across all folds
accuracy = np.mean(scores)
print("Accuracy:", accuracy)


Accuracy: 0.32880111876009244
