In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/Major_Modeling

/content/drive/MyDrive/Major_Modeling


# Import

In [3]:
%tensorflow_version 2.x
from modules.create_MFCC_h5_File import create_mfcc_h5_for_dataframe, read_h5_file
from modules.audio_file_load import processingDataFrame
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Conv1D, Input, MaxPooling1D, Dropout, BatchNormalization, TimeDistributed, Activation
from tensorflow.keras.layers import LSTM, GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from sklearn.model_selection import train_test_split

In [4]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [5]:
import warnings
warnings.filterwarnings('ignore')

To remove warning that appear due to decoding fallback

## Loading dataframe

In [6]:
proc_df = processingDataFrame()
df = proc_df.get_dataFrame()
df.head()

Unnamed: 0,_id,userId,fileName,gender,devnagariSentence
0,60e2d811552fd6002e30b8fd,L5WMnUqwFRlZUFg48A4DRu9dYwP9srB5s2cqsA/rDZg=,dd42d217-11b5-4107-b8c5-8c60939db63c,male,रोमान्चक बनेको खेलमा बुलबुलेले आर्मीलाई विस-पच...
1,60e2d964552fd6002e30b901,4UO9IETvAMYKoPU5GhL4DRjqb5rNgF1FpAKkyXQ9v/c=,744f492e-8e7f-4a1e-a13e-d65a0f0716c4,male,आईसीसी महिला टी-विस विश्वकपको एसिया छनोट एक मह...
2,61ab150e7526df002f75e921,3upRZGf2oFJMajP1LMVx5vNAMKY+PdM+rIdTvmQHUus=,cfea7257-9d98-481b-a4b2-aa8da3132cca,female,आईसीसी महिला टी-विस विश्वकपको एसिया छनोट एक मह...
3,60e2d958552fd6002e30b8fe,zcRQLjrvRyhg0PDjjhxlGJ1PoM7deRWnvlx08Ja1Wl4=,95ccf7f4-b198-4623-bcf8-45deb7f914e7,male,कार्यक्रममा राष्ट्रिय क्रिकेट टोलीका सदस्यहरुम...
4,60e2d98b552fd6002e30b903,4UO9IETvAMYKoPU5GhL4DRjqb5rNgF1FpAKkyXQ9v/c=,1ad1149b-be62-4099-8356-0e7c5f892374,male,तर त्यसपछि म रियल मड्रिडका लागि धेरै राम्रो गर...


In [7]:
a = read_h5_file(title=df["fileName"][1])
a.shape

(39, 225)

# Dataset Split for Train dataset and Validation dataset

In [8]:
#creating total dataset
data_index = 0
all_data = []
while data_index<710:
  all_data.append(read_h5_file(title=df["fileName"][data_index]))
  data_index+=1

In [9]:
#Adding mfcc_data in the existing dataframe
df['mfcc_data'] = all_data

In [10]:
#Spliting the dataset to train and test dataset
x_train, x_test, y_train, y_test  = train_test_split(np.array(df['mfcc_data']), np.array(df[['fileName','devnagariSentence']]), test_size = 0.25)

# Model Building

In [11]:
#input layer
data_input = Input(shape=(None,39))

#normalize the input
data = BatchNormalization()(data_input)

#First CNN layer
cnn_1 = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(data)
mp = MaxPooling1D(pool_size=3, strides=3)(cnn_1)
drop = Dropout(0.25)(mp)
bn = BatchNormalization()(drop)

#Second CNN layer
cnn_2 = Conv1D(filters=64, kernel_size=3, padding='same', activation='relu')(bn)
mp = MaxPooling1D(pool_size=3, strides=3)(cnn_2)
drop = Dropout(0.25)(mp)
bn = BatchNormalization()(drop)

#LSTM layer
lstm = LSTM(units=64,return_sequences=True, activation='relu')(bn)
bn = BatchNormalization()(lstm)

#GRU layer
gru = GRU(units=64,return_sequences=True, activation='relu')(bn)
bn = BatchNormalization()(gru)

time_dense = TimeDistributed(Dense(20))(bn)
outputs = Activation('softmax', name='Softmax')(time_dense)

model = Model(data_input, outputs, name='CNN_LSTM_GRU_model')

model.summary()

Model: "CNN_LSTM_GRU_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, 39)]        0         
                                                                 
 batch_normalization (BatchN  (None, None, 39)         156       
 ormalization)                                                   
                                                                 
 conv1d (Conv1D)             (None, None, 32)          3776      
                                                                 
 max_pooling1d (MaxPooling1D  (None, None, 32)         0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, None, 32)          0         
                                                                 
 batch_normalization_1 (Batc  (None, None, 32)  

# Model Compiling

In [12]:
# model.compile(optimizer=Adam(),
#               loss=MeanSquaredError(),
#               metrics=['accuracy'],
#               )

# Model Fitting

In [13]:
# history = model.fit(x_train, y_train,
#                    validation_data=(x_test, y_test),
#                    epochs=5,
#                    batch_size=32,
#                    )