### Importing Libraries

In [43]:
import pandas as pd
import numpy as np
# Importing LabelEncoder from Sklearn
# library from preprocessing Module.
from sklearn.preprocessing import LabelEncoder
import librosa

### Loading the dataset using Pandas
The data is found in development.csv (development set): a comma-separated values file containing the records from the development set. This portion does have the action and object columns, which you should use to obtain the labels to train and validate your models.
The dataset consists in a collection of audio file in a WAV format. 

Each record is characterized by several attributes. The following is a short description for each of them.
- path: the path of the audio file.
- speakerId: the id of the speaker.
- action: the type of action required through the intent.
- object: the device involved by intent.
- Self-reported fluency level: the speaking fluency of the speaker.
- First Language spoken: the first language spoken by the speaker.
- Current language used for work/school: the main language spoken by the speaker during daily activities.
- gender: the gender of the speaker.
- ageRange: the age range of the speaker.

In [33]:
df = pd.read_csv("dsl_data/development.csv")

In [34]:
df['action'].unique()

array(['change language', 'activate', 'deactivate', 'increase',
       'decrease'], dtype=object)

In [35]:
y = df.values[:,3:5].sum(axis=1)
y

array(['change languagenone', 'activatemusic', 'deactivatelights', ...,
       'deactivatelights', 'deactivatelights', 'increasevolume'],
      dtype=object)

In [36]:
def label_encoder(column_name):
    le = LabelEncoder()
 
    # Using .fit_transform function to fit label
    # encoder and return encoded label
    label = le.fit_transform(df[column_name])
    # removing the column 'Purchased' from df
    # as it is of no use now.
    df.drop(column_name, axis=1, inplace=True)
    
    # Appending the array to our dataFrame
    # with column name 'Purchased'
    df[column_name] = label
    

In [37]:
from collections import Counter
label_encoder("Self-reported fluency level ")
label_encoder("First Language spoken")
label_encoder("Current language used for work/school")
label_encoder("speakerId")
label_encoder("gender")
label_encoder("ageRange")

In [38]:
df.drop(columns=["action", "object"], axis=1, inplace=True)

In [39]:
df["mfccs"] = df.apply(lambda row: librosa.feature.mfcc(y=librosa.load(row["path"])[0], sr=librosa.load(row["path"])[1]), axis=1)

In [50]:

# Flatten the 2D MFCCs into a 1-dimensional array
mfccs = df["mfccs"].values
flattened_mfccs = [elem.flatten() for elem in mfccs]

# Normalize the values so that they are all within a specific range
normalized_mfccs = [(elem - np.min(elem)) / (np.max(elem) - np.min(elem)) for elem in flattened_mfccs]

# Add the normalized MFCCs to the dataframe as a new column
df["normalized_mfccs"] = normalized_mfccs

In [54]:
df['normalized_mfccs']

0       [0.007520065, 0.033770956, 0.019489864, 0.0051...
1       [0.0, 0.0036412734, 0.027942263, 0.06786933, 0...
2       [0.0, 0.020646865, 0.016027262, 0.026616339, 0...
3       [0.0, 0.0, 0.0040836576, 0.009220844, 0.004798...
4       [0.023517348, 0.02781044, 0.019429127, 0.02861...
                              ...                        
9849    [0.18867134, 0.16981147, 0.07505758, 0.0469596...
9850    [0.20160349, 0.18449712, 0.08112351, 0.0523808...
9851    [0.1389897, 0.14935379, 0.077551, 0.048554912,...
9852    [0.21293646, 0.19557634, 0.07309175, 0.0251101...
9853    [0.19629487, 0.18029657, 0.064385556, 0.027703...
Name: normalized_mfccs, Length: 9854, dtype: object

In [55]:
df.drop(columns=["mfccs", "path"], axis=1, inplace=True)

In [58]:
df

Unnamed: 0,Id,Self-reported fluency level,First Language spoken,Current language used for work/school,gender,ageRange,normalized_mfccs,speakerId
0,0,0,1,2,0,0,"[0.007520065, 0.033770956, 0.019489864, 0.0051...",0
1,1,0,1,2,0,0,"[0.0, 0.0036412734, 0.027942263, 0.06786933, 0...",0
2,2,0,1,2,0,0,"[0.0, 0.020646865, 0.016027262, 0.026616339, 0...",0
3,3,0,1,2,0,0,"[0.0, 0.0, 0.0040836576, 0.009220844, 0.004798...",0
4,4,0,1,2,0,0,"[0.023517348, 0.02781044, 0.019429127, 0.02861...",0
...,...,...,...,...,...,...,...,...
9849,9849,3,1,2,1,0,"[0.18867134, 0.16981147, 0.07505758, 0.0469596...",76
9850,9850,3,1,2,1,0,"[0.20160349, 0.18449712, 0.08112351, 0.0523808...",76
9851,9851,3,1,2,1,0,"[0.1389897, 0.14935379, 0.077551, 0.048554912,...",76
9852,9852,3,1,2,1,0,"[0.21293646, 0.19557634, 0.07309175, 0.0251101...",76
