# Language Word Detection (Urdu)

## Importing Data

In [170]:
import os
path = '../input/urdu-speech-dataset/files'

In [171]:
import pandas as pd
import numpy as np

In [172]:
data = pd.DataFrame(path_ for path_ in os.listdir(path) if path_.startswith('A'))
data.columns = ['Speakers']

### Extracting identifiers from speaker's name

 Name format = *Name[2]_Gender[1]_Native[1]_Age_group[1]*
 
* Example:AAMNG1
* Speaker Name = AA
* Gender = Male
* Native /Non-Native = Y
* Age Group = G1

In [173]:
data['Gender'] = data['Speakers'].apply(lambda x: x[2])
data['Native'] = data['Speakers'].apply(lambda x: x[3])
data['Age_group'] = data['Speakers'].apply(lambda x: x[4:])

### Determining value counts for each identifier

In [174]:
data = data.sort_values(by='Age_group').sort_values(by='Gender')

In [175]:
data['Age_group'].value_counts()

In [176]:
data[data['Age_group']=='G1']['Gender'].value_counts()

In [177]:
data[(data['Age_group']=='G1') & (data['Gender']=='M')]['Native'].value_counts()

In [178]:
data[(data['Age_group']=='G1') & (data['Gender']=='F')]['Native'].value_counts()

In [179]:
data[data['Age_group']=='G2']['Gender'].value_counts()

In [180]:
data[(data['Age_group']=='G2') & (data['Gender']=='M')]['Native'].value_counts()

|Age_group|Gender|Native|
| --- | --- | --- |
|G1|M=4||
|||N=4|
|||Y=0|
||F=2||
|||N=0|
|||Y=2|
|G2|M=4||
|||N=3|
|||Y=1|
||F=0||

## Splitting Data into train and test

#### To ensure that model gets evaluated on unseen data, sampling one female and male speaker from G1 and one male speaker with Native 'N' for evalution.

Chosen audio files for prediction

|Code|Urdu Word|English Meaning|
|---|---|---|
|001|Sifar|Zero|
|002|Ek|One|
|003|Do|Two|
|004|Teen|Three|
|005|Chaar|Four|
|006|Paanch|Five|
|007|Che|Six|
|008|Saath|Seven|
|009|Aath|Eight|
|010|Nau|Nine|
|015|Mangal|Tuesday|
|016|Budh|Wednesday|
|017|Jumeraat|Thursday|
|018|Subah|Morning|
|019|Dopahar|Afternoon|

In [181]:
file_num = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 19]

In [182]:
train_ = data[(data['Age_group']=='G1') & 
             (data['Gender']=='M')]['Speakers'][:3] # 3 entries of G1,M,N
train_ = train_.append(data[(data['Age_group']=='G1') & 
                          (data['Gender']=='F')]['Speakers'][:1]) # 1 entry of G1,F,Y
train_ = train_.append(data[(data['Age_group']=='G2') & 
                          (data['Gender']=='M')]['Speakers'][:-1]) # 2 entries of G1,M.N and 1 entry of G1,M,Y
train_ = pd.DataFrame(train_).reset_index(drop=True)

In [183]:
test_ = pd.DataFrame([i for i in data['Speakers'] if i not in train_['Speakers'].values]) 
test_.columns = ['Speakers']

In [184]:
y_ ={1:'Sifar', 2:'Ek', 3:'Do', 4:'Teen', 5:'Chaar', 6:'Paanch',
     7:'Che', 8:'Saath', 9:'Aath', 10:'Nau', 15:'Mangal', 16:'Budh',
     17:'Jumeraat', 18:'Subah', 19:'Dopahar'}

In [185]:
train_

In [186]:
test_

In [187]:
train = []
for i in train_['Speakers']:
    audio_files = os.listdir(os.path.join(path, i))
    audio_files.sort()
    x_ = os.path.join(path, i)
    [train.append([os.path.join(x_, j), y_[int(j[6:-4])]]) for j in audio_files if (int(j[6:-4]) in file_num)]

In [188]:
train = pd.DataFrame(train)
train.columns = ['Audio_path', 'Word']

In [189]:
train.head(2)

In [190]:
test = []
for i in test_['Speakers']:
    audio_files = os.listdir(os.path.join(path, i))
    audio_files.sort()
    x_ = os.path.join(path, i)
    [test.append([os.path.join(x_, j), y_[int(j[6:-4])]]) for j in audio_files if (int(j[6:-4]) in file_num)]

In [191]:
test = pd.DataFrame(test)
test.columns = ['Audio_path', 'Word']

In [192]:
test.head(2)

## Extracting MFCC features

In [193]:
import librosa

In [370]:
def mfcc(audio_file):
    try:
        x, sr = librosa.load(audio_file)
        mfccs = librosa.feature.mfcc(y = x, sr = sr, n_mfcc=64)
        mfccs = np.mean(mfccs.T, axis=0)
    except:
        print('Error reading file')
    return mfccs

In [371]:
X_train = []
[X_train.append(mfcc(i)) for i in train['Audio_path']];
X_test = []
[X_test.append(mfcc(i)) for i in test['Audio_path']];

In [372]:
y_train = train['Word']
y_test = test['Word']

In [373]:
np.shape(X_train)

In [409]:
np.shape(X_test)

### Encoding target values

In [374]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [375]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

## Model

In [376]:
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [377]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb_pred = gnb.fit(X_train, y_train).predict(X_test)
print(classification_report(y_test, gnb_pred))

In [378]:
set(y_test) - set(gnb_pred)

In [379]:
from sklearn.svm import SVC
ovr = SVC(decision_function_shape='ovr').fit(X_train, y_train)
ovr_pred = ovr.predict(X_test)
print(classification_report(y_test, ovr_pred))

In [380]:
ovo = SVC(decision_function_shape='ovo').fit(X_train, y_train)
ovo_pred = ovr.predict(X_test)
print(classification_report(y_test, ovo_pred))

In [381]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
print(classification_report(y_test, knn_pred))

In [263]:
X_train = np.array(X_train).reshape(np.shape(X_train)[0], 8, 8, 1)
X_test = np.array(X_test).reshape(np.shape(X_test)[0], 8, 8, 1)

In [270]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, InputLayer, Dropout

tf.keras.backend.clear_session()
nn = Sequential()
nn.add(InputLayer(input_shape=(8, 8, 1)))
nn.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding = "same"))
nn.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding='valid'))
#nn.add(Dropout(0.3))
nn.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding = "same"))
nn.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding='valid'))
nn.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding = "same"))
nn.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding='valid'))
nn.add(Flatten())
nn.add(Dense(64, activation='relu'))
nn.add(Dense(len(y_), activation='softmax'))
#nn.summary()

In [271]:
from tensorflow.keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor='val_loss', patience=2)

In [272]:
nn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = nn.fit(X_train, y_train, epochs=500, validation_data=(X_test, y_test), callbacks=[callback])

In [274]:
history = pd.DataFrame(history.history)
history[['loss', 'val_loss']].plot(figsize=(20,3))

In [275]:
history[['accuracy', 'val_accuracy']].plot(figsize=(20,3))

In [276]:
nn_pred = nn.predict(X_test)
nn_pred = np.argmax(nn_pred,axis=1)
print(classification_report(y_test, nn_pred))

### Model Comaprison

|Model|Accuracy(in %)|
|---|---|
|naive_bayes|0.24|
|SVC(one vs rest)|0.40|
|SVC(one vs one)|0.40|
|KNN Classifier|0.44|
|CNN|0.42|

## Predicting on custom data using KNN model

In [278]:
x, sr = librosa.load('../input/language-word-detection-test-file/demo.wav')

In [281]:
import librosa.display

In [282]:
librosa.display.waveshow(x,sr)

In [350]:
import matplotlib.pyplot as plt

In [355]:
def mfcc_plot(x, sr, word):
    plt.figure(figsize=(10,2))
    plt.title(word)
    librosa.display.waveshow(x, sr)
    try:
        mfccs = librosa.feature.mfcc(y = x, sr = sr, n_mfcc=64)
        mfccs = np.mean(mfccs.T, axis=0)
    except:
        print('Error reading file')
    return mfccs

In [357]:
sample = [(x[22050*1:22050*2], sr), (x[int(22050*2.2):int(22050*3.2)], sr),
          (x[int(22050*3.5):int(22050*4.5)], sr), (x[int(22050*4.5):22050*6], sr)]
sample = pd.DataFrame(sample)
sample.columns = ['x', 'sr']
sample['Word'] = ['ek', 'do', 'teen', 'chaar']

In [358]:
sample

In [393]:
sample['mfcc'] = [mfcc_plot(sample['x'][i], sample['sr'][i], 
                            sample['Word'][i]) for i in range(len(sample))]

In [411]:
np.shape(sample['mfcc'][0])

In [413]:
sample['Prediction'] = [knn.predict(sample['mfcc'][i].reshape(1,64)) for i in range(len(sample))]

In [414]:
sample