<a id=1></a>
# 1. Importing libraries  

In [14]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

import librosa
import librosa.display
from IPython.display import Audio, display
import IPython
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras.utils import to_categorical

%matplotlib inline

<a id=2></a>
# 2. Loading data

In [3]:
# Let's define the train and the test paths
main_path = "/kaggle/input/moroccan-darija-trigger-word-classification-ed-2/"
train_path = main_path + "data/train/"
test_path = main_path + "data/test/"

In [4]:
# Loading .csv files 
df_train = pd.read_csv(main_path + "train.csv")
df_test = pd.read_csv(main_path + "test.csv")
submission_file = pd.read_csv(main_path + "sample_submission.csv")

**Let's take a look to our training set**

In [5]:
df_train.sample(5)

In [20]:
filePath = train_path +'k1xcfnu6vwm3p8y5lior2.wav'
IPython.display.Audio(filePath)

In [21]:
print("Shape of training set: ", df_train.shape)

<a id=4></a>
# 3. Features Extraction

In [50]:
sound1, _ = librosa.load(filePath,res_type="kaiser_fast", duration=2.5 , offset=0.5)
mfccs = librosa.feature.mfcc(y=sound1, sr=44100, n_mfcc=30)
print(mfccs.shape)

In [23]:
librosa.display.specshow(mfccs, x_axis='time')
plt.colorbar()
plt.title('mfcc')
plt.show()

In [24]:
def data_preparation(df, n, mfcc, path):

    X = np.zeros(shape=(df.shape[0], n, 216, 1))
    input_length = sample_rate*audio_duration
    
    counter = 0
    
    for fileName in tqdm(df.id):
        filePath = path + str(fileName) + ".wav"
        soundData, _ = librosa.load(filePath, sr=sample_rate, res_type="kaiser_fast",duration=2.5 , offset=0.5)
        
        # Random Padding/ offset    
        if len(soundData) > input_length:
            max_offset = len(soundData) - input_length
            offset = np.random.randint(max_offset)
            soundData = soundData[offset:int(input_length+offset)]
        else:
            if input_length > len(soundData):
                max_offset = input_length - len(soundData)
                offset = np.random.randint(max_offset)
            else:
                offset = 0
                
            soundData = np.pad(soundData, (offset, int(input_length) - len(soundData) - offset), "constant")
            
        # Features Extraction
        if mfcc == 1:
            MFCC = librosa.feature.mfcc(soundData, sr=sample_rate, n_mfcc=n_mfcc)
            MFCC = np.expand_dims(MFCC, axis=-1)
            X[counter,] = MFCC
        
        counter +=1
            
    return X
        

In [33]:
sample_rate=44100
audio_duration=2.5
n_mfcc = 30

# Features Extraction (MFCCS)
X_mfccs = data_preparation(df_train, n_mfcc, 1, train_path)

In [35]:
# Concatenate X_mfccs, X_WnAug, W_SPAug, W_SfAug
X = X_mfccs
y = df_train['label'].values
y = to_categorical(y)
y = np.concatenate([y])

In [36]:
# 2D matrix of 30 MFCC bands by 216 audio length.
X.shape, y.shape

In [37]:
# Features Extraction
X_test_mfccs = data_preparation(df_test, n_mfcc, 1, test_path)

In [38]:
X_train, X_valid, y_train, y_valid = train_test_split(X
                                                    , y
                                                    , test_size=0.1
                                                    , shuffle=True
                                                    , random_state=43
                                                   )
print(f'training set : {X_train.shape} , validation set :{X_valid.shape}, test set : {X_test_mfccs.shape}')

In [39]:
# Normalization 
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

X_train = (X_train - mean)/std
X_valid = (X_valid - mean)/std
X_test = (X_test_mfccs - mean)/std

<a id = 5></a>
# 4. Modeling 

In [40]:
from keras import losses, models
from tensorflow.keras.optimizers import Adam 
from keras.activations import relu, softmax
from keras.layers import (Convolution2D, GlobalAveragePooling2D, BatchNormalization, Flatten, Dropout,
                          GlobalMaxPool2D, MaxPool2D, concatenate, Activation, Input, Dense)
from keras.models import Sequential, Model

In [41]:

def convNet_model(input_shape):
    
    input_ = Input(shape=input_shape)  #2D matrix of 30 MFCC bands by 216 audio length.
    x = Convolution2D(32, (3,3), padding="same")(input_)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D()(x)
    x = Dropout(rate=0.2)(x)
    
    x = Convolution2D(64, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D()(x)
    x = Dropout(rate=0.2)(x)
    
    x = Convolution2D(128, (3,3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D()(x)
    x = Dropout(rate=0.2)(x)

    x = Flatten()(x)
    x = Dense(256)(x)
    x = Dropout(rate=0.2)(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    
    out = Dense(2, activation='softmax')(x)
    model = Model(inputs=input_, outputs=out)
    
    
    model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['acc'])
    
    return model


input_shape = X[0].shape
model = convNet_model(input_shape)
model.summary()

In [42]:
model_history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), 
                          batch_size=8, verbose = 2, shuffle=True, epochs=50)

In [43]:
def plot_loss_function(history):
    "train and validation loss"
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()
    
plot_loss_function(model_history)

In [44]:
predictions = model.predict(X_test).argmax(axis=1)
predictions

In [46]:
#0 if the trigger word is "yallahbda" and 1 if it is "safi7bess".
i = 72
print("prediction:",predictions[i])

In [48]:
submission_file['label'] = predictions
submission_file.sample(5)

In [49]:
submission_file.to_csv('submission.csv', index=False)