### Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import librosa
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.utils import shuffle
from algorithms import encode_columns, svm_model, accuracy_calculator, random_forest_model, array_column_spread
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

### Loading the dataset using Pandas
The data is found in development.csv (development set): a comma-separated values file containing the records from the development set. This portion does have the action and object columns, which you should use to obtain the labels to train and validate your models.
The dataset consists in a collection of audio file in a WAV format. 

Each record is characterized by several attributes. The following is a short description for each of them.
- path: the path of the audio file.
- speakerId: the id of the speaker.
- action: the type of action required through the intent.
- object: the device involved by intent.
- Self-reported fluency level: the speaking fluency of the speaker.
- First Language spoken: the first language spoken by the speaker.
- Current language used for work/school: the main language spoken by the speaker during daily activities.
- gender: the gender of the speaker.
- ageRange: the age range of the speaker.

In [2]:
df = pd.read_csv("dsl_data/development.csv")
df_eval = pd.read_csv("dsl_data/evaluation.csv")

In [3]:
y = df.values[:,3:5].sum(axis=1)
y

array(['change languagenone', 'activatemusic', 'deactivatelights', ...,
       'deactivatelights', 'deactivatelights', 'increasevolume'],
      dtype=object)

In [4]:
column_names = ["Self-reported fluency level ","First Language spoken", "Current language used for work/school", "speakerId", "gender","ageRange"]


In [5]:
encode_columns(df_eval, column_names)

In [6]:

encode_columns(df,column_names)

In [7]:
def audio_feature_extraction(df):
        data_array= []
        rate_array = []
        for audio in df['path']:
                data, rate = librosa.load(audio)
                data_array.append(data)
                rate_array.append(rate)

        df['data'] = data_array
        df['rate'] = rate_array

In [8]:
audio_feature_extraction(df)
audio_feature_extraction(df_eval)

### Chroma feature

In [9]:
def chroma_feature(df):
    chroma_array = []
    for data, rate in zip(df['data'], df['rate']):
        chroma = librosa.feature.chroma_stft(y=data, sr=rate)
        chroma_mean = np.mean(chroma, axis=1)
        chroma_array.append(chroma_mean)

    df['chroma'] = chroma_array

In [10]:
chroma_feature(df)
chroma_feature(df_eval)


In [11]:
# df['mean_chroma'] = array_column_spread(df,'chroma')

array_column_spread(df,'chroma')
array_column_spread(df_eval,'chroma')

### Tonnetz feature

In [12]:
def tonnetz_feature(df):
    tonnetz_array = []
    for data, rate in zip(df['data'], df['rate']):
        tonnetz = librosa.feature.tonnetz(y=data, sr=rate)
        tonnetz_mean = np.mean(tonnetz, axis=1)
        tonnetz_array.append(tonnetz_mean)

    df['tonnetz'] = tonnetz_array

In [13]:
tonnetz_feature(df)
tonnetz_feature(df_eval)

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **

In [14]:
# df['mean_tonnetz'] = array_column_spread(df,'tonnetz')
array_column_spread(df, 'tonnetz')
array_column_spread(df_eval, 'tonnetz')

### Spectral Contrast

In [15]:
def spectral_contrast(df):
    spectral_contrast_array = []
    for data, rate in zip(df['data'], df['rate']):
        spectral_contrast = librosa.feature.spectral_contrast(y=data, sr=rate)
        spectral_contrast_mean = np.mean(spectral_contrast, axis=1)
        spectral_contrast_array.append(spectral_contrast_mean)
    df['spectral_contrast'] = spectral_contrast_array    

In [16]:
spectral_contrast(df)
spectral_contrast(df_eval)


In [17]:
# df['mean_spectral_contrast'] = array_column_spread(df,'spectral_contrast')
array_column_spread(df, 'spectral_contrast')
array_column_spread(df_eval, 'spectral_contrast')

### Root Mean Square Energy(RMSE)

In [18]:
def rmse_feature(df):
    # create an empty list to store the RMSE values
    rmse_list = []

    # iterate through the audio files in the dataset
    for data, rate in zip(df['data'], df['rate']):
        # calculate the root mean square energy
        rmse = librosa.feature.rms(y=data)
        # take the mean of the rmse
        rmse_mean = np.mean(rmse)
        # append the rmse mean to the rmse_list
        rmse_list.append(rmse_mean)

    # add the rmse_list as a new column to the dataframe
    df['rmse'] = rmse_list

In [19]:
rmse_feature(df)
rmse_feature(df_eval)

### Spectral Flatness (SF)

In [20]:
def spectral_flatness(df):
    # create an empty list to store the SF values
    sf_list = []

    # iterate through the audio files in the dataset
    for data, rate in zip(df['data'], df['rate']):
        # calculate the spectral flatness
        sf = librosa.feature.spectral_flatness(y=data)
        # take the mean of the SF
        sf_mean = np.mean(sf)
        # append the SF mean to the sf_list
        sf_list.append(sf_mean)

    # add the sf_list as a new column to the dataframe
    df['sf'] = sf_list

In [21]:
spectral_flatness(df)
spectral_flatness(df_eval)

### Spectral Roll-off (SRO)

In [22]:
def sro_feature(df):
    # Create an empty list to store the spectral roll-off values
    spectral_rolloff_array = []

    for data, rate in zip(df['data'], df['rate']):
        spectral_rolloff = librosa.feature.spectral_rolloff(y=data, sr=rate)
        spectral_rolloff_mean = np.mean(spectral_rolloff)
        spectral_rolloff_array.append(spectral_rolloff_mean)

    # Add the spectral roll-off values to the dataframe as a new column
    df['spectral_rolloff'] = spectral_rolloff_array

In [23]:
sro_feature(df)
sro_feature(df_eval)

### Zero-Crossing Rate

In [24]:
def zcr_feature(df):
    # Create arrays to store the zero-crossing rate values
    zero_crossing_rate_array = []

    for data in df['data']:
        
        # Compute the zero-crossing rate for the current audio file
        zero_crossing_rate = sum(librosa.zero_crossings(data))
        # Append the zero-crossing rate to the zero_crossing_rate_array
        zero_crossing_rate_array.append(zero_crossing_rate)

    # Add the zero-crossing rate arrays as new columns in the dataframe
    df['zero_crossing_rate'] = zero_crossing_rate_array

In [25]:
zcr_feature(df)
zcr_feature(df_eval)

### Mel-frequency cepstral coefficients (MFCC)

In [29]:
def mfcc_feature(df):
    # Create arrays to store the mfcc rate values
    mfcc_array = []

    for data, rate in zip(df['data'], df['rate']):
        
        # Compute the mfccs for the current audio file
        mfcc = librosa.feature.mfcc(y=data, sr=rate, n_mfcc=50)
        # Compute the mean of the mfccs and append it to the mfcc_array
        mfcc_mean = np.mean(mfcc, axis=1)
        mfcc_array.append(mfcc_mean)

    # Add the mfcc as a new column in the dataframe
    df['mfcc'] = mfcc_array

In [30]:
mfcc_feature(df)
mfcc_feature(df_eval)

In [31]:
# # df['mean_mfcc'] = df['mfcc'].apply(lambda x: sum(x) / len(x))
# df['mean_mfcc'] = array_column_spread(df,'mfcc')
# df['mean_mfcc'] = array_column_spread(df_eval,'mfcc')


In [50]:
df.columns

Index(['Id', 'path', 'action', 'object', 'Self-reported fluency level ',
       'First Language spoken', 'Current language used for work/school',
       'speakerId', 'gender', 'ageRange', 'data', 'rate', 'chroma_0',
       'chroma_1', 'chroma_2', 'chroma_3', 'chroma_4', 'chroma_5', 'chroma_6',
       'chroma_7', 'chroma_8', 'chroma_9', 'chroma_10', 'chroma_11',
       'tonnetz_0', 'tonnetz_1', 'tonnetz_2', 'tonnetz_3', 'tonnetz_4',
       'tonnetz_5', 'spectral_contrast_0', 'spectral_contrast_1',
       'spectral_contrast_2', 'spectral_contrast_3', 'spectral_contrast_4',
       'spectral_contrast_5', 'spectral_contrast_6', 'rmse', 'sf',
       'spectral_rolloff', 'zero_crossing_rate', 'mfcc_0', 'mfcc_1', 'mfcc_2',
       'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 'mfcc_9',
       'mfcc_10', 'mfcc_11', 'mfcc_12', 'mfcc_13', 'mfcc_14', 'mfcc_15',
       'mfcc_16', 'mfcc_17', 'mfcc_18', 'mfcc_19', 'mfcc_20', 'mfcc_21',
       'mfcc_22', 'mfcc_23', 'mfcc_24', 'mfcc_25', 'm

In [35]:
features = [ 'Self-reported fluency level ',
       'First Language spoken',
       'speakerId', 'gender', 'ageRange', 'chroma_0',
       'chroma_1', 'chroma_2', 'chroma_3', 'chroma_4', 'chroma_5', 'chroma_6',
       'chroma_7', 'chroma_8', 'chroma_9', 'chroma_10', 'chroma_11',
       'tonnetz_0', 'tonnetz_1', 'tonnetz_2', 'tonnetz_3', 'tonnetz_4',
       'tonnetz_5', 'spectral_contrast_0', 'spectral_contrast_1',
       'spectral_contrast_2', 'spectral_contrast_3', 'spectral_contrast_4',
       'spectral_contrast_5', 'spectral_contrast_6', 'rmse', 'sf',
       'spectral_rolloff', 'zero_crossing_rate', 'mfcc_0', 'mfcc_1', 'mfcc_2',
       'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 'mfcc_9',
       'mfcc_10', 'mfcc_11', 'mfcc_12', 'mfcc_13', 'mfcc_14', 'mfcc_15',
       'mfcc_16', 'mfcc_17', 'mfcc_18', 'mfcc_19', 'mfcc_20', 'mfcc_21',
       'mfcc_22', 'mfcc_23', 'mfcc_24', 'mfcc_25', 'mfcc_26', 'mfcc_27',
       'mfcc_28', 'mfcc_29', 'mfcc_30', 'mfcc_31', 'mfcc_32', 'mfcc_33',
       'mfcc_34', 'mfcc_35', 'mfcc_36', 'mfcc_37', 'mfcc_38', 'mfcc_39',
       'mfcc_40', 'mfcc_41', 'mfcc_42', 'mfcc_43', 'mfcc_44', 'mfcc_45',
       'mfcc_46', 'mfcc_47', 'mfcc_48', 'mfcc_49']

In [42]:
X_eval = df_eval[features].copy()
X_eval

X_eval.to_csv('x_eval.csv')

In [43]:
X = df[features].copy()
X
X.to_csv('x.csv')

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [44]:
y_pred_eval = svm_model(X, y, X_eval)

predictions = pd.DataFrame(y_pred_eval,columns=["Predicted"])
predictions.to_csv('my_data.csv', index=True)

In [47]:
y_pred = svm_model(X_train, y_train, X_test)
svm_accuracy = accuracy_calculator(y_test, y_pred)

In [48]:
svm_accuracy

0.26382546930492135

In [None]:
y_pred_eval = random_forest_model(X , X_eval, y)

predictions = pd.DataFrame(y_pred_eval,columns=["Predicted"])
predictions.to_csv('predictions.csv', index=True)


In [None]:
y_pred = random_forest_model(X_train, X_test, y_train)
random_forest_accuracy = accuracy_calculator(y_test, y_pred)
random_forest_accuracy

In [None]:
## Implementation using k-fold
from sklearn.model_selection import KFold, cross_val_score
# define the number of folds and whether to shuffle the data
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# create an instance of the SVM model
clf = SVC()

# use cross_val_score function to perform k-fold cross-validation
scores = cross_val_score(clf, X, y, cv=kf, scoring='accuracy')

# print the mean accuracy and standard deviation
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std()))

In [None]:
clf = RandomForestClassifier(n_estimators=100, random_state=0)

# shuffle the data before performing k-fold cross validation
X, y = shuffle(X, y)

# perform k-fold cross validation with 5 folds
scores = cross_val_score(clf, X, y, cv=5)

# calculate the mean accuracy of the model across all folds
accuracy = np.mean(scores)
print("Accuracy:", accuracy)
