In [1]:
import numpy as np
import pandas as pd
from scipy.io.wavfile import read 
import os

In [2]:
# Storing the address in 'url' variable
data_path =   "dsl_data/development.csv"
# Importing the file from the address contained in 'url' into 'df' 
df = pd.read_csv(data_path)

In [3]:
# Check null values in Training dataset (pandas dataframe)
df.isnull().sum().sum()

0

In [4]:
# To get unique values in order to map values to objects imported from src/data.py

print(df['object'].unique())
print(df['action'].unique())


['none' 'music' 'lights' 'volume' 'heat']
['change language' 'activate' 'deactivate' 'increase' 'decrease']


In [5]:
df.shape

(9854, 10)

In [6]:
import dict_data 

df['gender'] = df['gender'].map(dict_data.gender_map)
df['Self-reported fluency level '] = df['Self-reported fluency level '].map(dict_data.language_fluency_map)
df['ageRange'] = df['ageRange'].map(dict_data.age_range_map)
df['Current language used for work/school'] = df['Current language used for work/school'].map(dict_data.current_language_map)
df['First Language spoken'] = df['First Language spoken'].map(dict_data.first_language_map)


# Try to combine action & object in 1 column
df["action-object"] = df['action'].astype(str) +"-"+ df["object"]

# df = df.iloc[:10]


In [7]:
import librosa

audios_array = []
duration_array = []
zero_crossing_array = []
mfcc_array = []



for audio in df['path']:
    # rate, data = read(audio, mmap=False) 

    # Load audio files (wav) as amplitude and rate using 
    # Default rate: 22050
    data ,rate = librosa.load(audio)

    zero_crossing_array.append(sum(librosa.zero_crossings(data)))
    audios_array.append(data)

    # feature_vector = get_feature_vector(data, rate)
    # norm_audios_feat.append(feature_vector) 

    # mfcc = librosa.feature.mfcc(y=data, sr=rate)
    mfcc = librosa.feature.mfcc(y=data, sr=rate, n_mfcc=50)
    mfcc_mean = np.mean(mfcc,axis=1)
    
    mfcc_array.append(mfcc_mean) 

    # X = librosa.stft(data)
    # Xdb = librosa.amplitude_to_db(abs(X))
    # print("Xdb",Xdb.shape)
    # print("X",X.shape)
    # Get duration by dividing number of columns in data by rate (Number of channels)
    duration_array.append(data.shape[0] / rate)


df['zero_crossing'] = zero_crossing_array
df['audio'] = audios_array 
df['duration'] = duration_array 
df['mfcc'] = mfcc_array 


# we can realize two different sample rates for our wav files 16000 & 22050
# print(df.rate.value_counts())
# print(df.rate.unique())



In [8]:


# extract the array column
array_column = df['mfcc']

# reshape the array to several columns
array_column = array_column.apply(pd.Series)

# rename the columns

array_column.columns = [f'mfcc_{i}' for i in range(array_column.shape[1])]

# join the new DataFrame with the original one
df = pd.concat([df, array_column], axis=1)

# drop the array column
df = df.drop('mfcc', axis=1)

df.to_csv('file_name.csv')


In [9]:
from sklearn.model_selection import train_test_split


static_features_array = ['Self-reported fluency level ','First Language spoken', 'Current language used for work/school', 'gender', 'ageRange','zero_crossing','duration']
dynamic_features_array = [f'mfcc_{x}' for x in range(50)]

all_features_array = np.concatenate((static_features_array, dynamic_features_array))

# x = df_copy[all_features_array]
x = df[all_features_array].copy()
y = df[['action-object']].copy()


#train_test_split splits the data into 70% training data and 30% test data
x_train, x_test, y_train, y_test = train_test_split(x, y,test_size = .3)

In [10]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Classifier
clf=RandomForestClassifier(n_estimators=100)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(x_train,y_train)
y_pred=clf.predict(x_test)

y_pred

  clf.fit(x_train,y_train)


array(['decrease-heat', 'increase-volume', 'decrease-heat'], dtype=object)

In [11]:
from sklearn import svm

clf = svm.SVC()
clf.fit(x_train,y_train)
y_pred_svm=clf.predict(x_test)
y_pred_svm


  y = column_or_1d(y, warn=True)


array(['increase-volume', 'decrease-heat', 'decrease-heat'], dtype=object)

In [12]:

# from sklearn.tree import DecisionTreeClassifier
# clf = DecisionTreeClassifier(max_depth = 10, min_impurity_decrease=0.01)
# clf.fit(x_train, y_train)
# y_pred = clf.predict(x_test)
# y_pred

In [13]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

# Model Accuracy using test data (25%)
print("Test set accuracy:",metrics.accuracy_score(y_test, y_pred))

Test set accuracy: 0.0


In [14]:
# Storing the address in 'url' variable
evaluation_path =   "dsl_data/evaluation.csv"

# Importing the csv file from the address contained in 'url' into 'evaluation_df' 
evaluation_df = pd.read_csv(evaluation_path)

# Check null values in Training dataset (pandas dataframe)
evaluation_df.isnull().sum().sum()

import dict_data 

# map features in evaluation_df to predefined dictionaries
evaluation_df['gender'] = evaluation_df['gender'].map(dict_data.gender_map)
evaluation_df['Self-reported fluency level '] = evaluation_df['Self-reported fluency level '].map(dict_data.language_fluency_map)
evaluation_df['ageRange'] = evaluation_df['ageRange'].map(dict_data.age_range_map)
evaluation_df['Current language used for work/school'] = evaluation_df['Current language used for work/school'].map(dict_data.current_language_map)
evaluation_df['First Language spoken'] = evaluation_df['First Language spoken'].map(dict_data.first_language_map)


import librosa

audios_array = []
duration_array = []
zero_crossing_array = []
mfcc_array = []



for audio in evaluation_df['path']:
    # Load audio files (wav) as amplitude and rate using 
    # Default rate: 22050
    data ,rate = librosa.load(audio)

    zero_crossing_array.append(sum(librosa.zero_crossings(data)))
    audios_array.append(data)

   
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=rate, n_mfcc=50).T, axis=0)
    mfcc_array.append(mfcc) 

    
    duration_array.append(data.shape[0] / rate)

print(mfcc_array)

evaluation_df['zero_crossing'] = zero_crossing_array
evaluation_df['audio'] = audios_array 
evaluation_df['duration'] = duration_array 
evaluation_df['mfcc'] = mfcc_array 




x_evaluation = evaluation_df[['Self-reported fluency level ','First Language spoken', 'Current language used for work/school', 'gender', 'ageRange', 'zero_crossing','duration','mfcc']].copy()



In [None]:
y_evaluation=clf.predict(x_evaluation)

for element in y_evaluation:
    element = element.replace("none", "")
    element = element.replace("-", "")

remove_dash = list(map(lambda s: s.replace("-", ""), y_evaluation))
modified_y_evaluation = list(map(lambda s: s.replace("none", ""), remove_dash))

# print map(lambda s: s.replace('-' , 'n'), y_evaluation)



y_evaluation_df = pd.DataFrame(modified_y_evaluation, columns = ['Predicted'])
y_evaluation_df
y_evaluation_df.index.name = 'Id'

y_evaluation_df.to_csv('evaluation/predictions.csv',index=True,header=True)