In [None]:
# Import all the required libraries
import tensorflow as tf 
import numpy as np
import matplotlib.pyplot as plt
#from sklearn.preprocessing import MinMaxScaler, StandardScaler 
from sklearn.model_selection import train_test_split
#from sklearn.metrics import roc_auc_score, precision_score, recall_score
from tensorflow.keras import Model
from tensorflow.keras import regularizers
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.activations import *
#from tensorflow.keras import backend as K

In [None]:
# Portion to load and prepare the dataset
X = np.load('../data/data_used/np_data/DR_50s_400ms_cat_all_vowels.npy')
# Normalize the dataset to values between 0 and 1.
X = X / 184.5
y_dom = np.load('../data/data_used/np_data/label_dom.npy')
y_rec = np.load('../data/data_used/np_data/label_rec.npy')
X_train, X_test, y_dom_train, y_dom_test, y_rec_train, y_rec_test = train_test_split(X, y_dom, y_rec, test_size=0.15, random_state=42)


In [None]:
# Customised weighted loss function 
w_dom = 0.98
w_rec = 0.02
# def weighted_loss(y_true, y_pred):
#     cce = tf.keras.losses.CategoricalCrossentropy()
#     cce_dom = cce(tf.square(y_true[0] - y_pred[0])).numpy()
#     cce_rec = cce(tf.square(y_true[1] - y_pred[1])).numpy()
#     loss_fn = w_dom * cce_dom + w_rec * cce_rec
#     return loss_fn

# Due to heavy regularization, the output of the model might become very small. Vary the corr_factor to validate the identification scores pattern.
# Keep the value as close to 1 as possible.
corr_factor = 1.3125

In [None]:
### I have also included the code from Peddinti et. al, 2015. Would strongly recommend experimenting with that model too.

In [None]:
# Model Architecture
reg = 0.008
def get_model(input_units = 40000, output_units = 5, pretrained_weights = None):
    Inputs = Input(input_units,)
    hidden = Dropout(0.4)(Inputs)
    # Activation Function is Relu for the all the layers other than the last one. For last layer Activation function is Softmax.
    hidden_1_1 = Lambda(lambda x: x[:,0:5000])(hidden)
    hidden_1_2 = Lambda(lambda x: x[:,5000:10000])(hidden)
    hidden_1_3 = Lambda(lambda x: x[:,10000:15000])(hidden)
    hidden_1_4 = Lambda(lambda x: x[:,15000:20000])(hidden)
    hidden_1_5 = Lambda(lambda x: x[:,20000:25000])(hidden)
    hidden_1_6 = Lambda(lambda x: x[:,25000:30000])(hidden)
    hidden_1_7 = Lambda(lambda x: x[:,30000:35000])(hidden)
    hidden_1_8 = Lambda(lambda x: x[:,35000:40000])(hidden)
    
    hidden_1_1 = Dense(500, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_1_1)
    hidden_1_2 = Dense(500, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_1_2)
    hidden_1_3 = Dense(500, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_1_3)
    hidden_1_4 = Dense(500, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_1_4)
    hidden_1_5 = Dense(500, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_1_5)
    hidden_1_6 = Dense(500, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_1_6)
    hidden_1_7 = Dense(500, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_1_7)
    hidden_1_8 = Dense(500, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_1_8)

    hidden_2_1 = Concatenate()([hidden_1_1,hidden_1_2])
    hidden_2_2 = Concatenate()([hidden_1_3,hidden_1_4])
    hidden_2_3 = Concatenate()([hidden_1_5,hidden_1_6])
    hidden_2_4 = Concatenate()([hidden_1_7,hidden_1_8])    

    hidden_2_1 = Dense(200, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_2_1)
    hidden_2_2 = Dense(200, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_2_2)
    hidden_2_3 = Dense(200, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_2_3)
    hidden_2_4 = Dense(200, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_2_4)

    hidden_3_1 = Concatenate()([hidden_2_1,hidden_2_2])
    hidden_3_2 = Concatenate()([hidden_2_3,hidden_2_4])

    hidden_3_1 = Dense(80, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_3_1)
    hidden_3_2 = Dense(80, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg))(hidden_3_2)

    hidden_4_1 = Concatenate()([hidden_3_1,hidden_3_2])
    
    output_dominant = Dense(32, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg), bias_regularizer=regularizers.l2(reg))(hidden_4_1)
    output_dominant = Dense(output_units, activation = None, kernel_regularizer=regularizers.l2(reg), bias_regularizer=regularizers.l2(reg), name='dense_dominant')(output_dominant)

    output_recessive = Dense(32, activation = tf.nn.relu, kernel_regularizer=regularizers.l2(reg), bias_regularizer=regularizers.l2(reg))(hidden_4_1)
    output_recessive = Dense(output_units, activation = None, kernel_regularizer=regularizers.l2(reg), bias_regularizer=regularizers.l2(reg), name='dense_recessive')(output_recessive)
    
    model = Model(inputs = Inputs, outputs = [output_dominant,output_recessive])
    model.compile(optimizer = Adam(learning_rate = 0.001), loss = ['mse','mse'], loss_weights = [w_dom,w_rec], metrics = ['accuracy'])
    # In case we have pretrained weights, we can use them to initialise the parameters.
    if(pretrained_weights):
    	model.load_weights(pretrained_weights)
    return model
model = get_model()
model.summary()

In [None]:
log_dir = "../logs/TDNN/model_30" #Enter directory path for saving logs
model_name = '../models/TDNN/model_30.hdf5' #Enter filename path for saving model

#Callbacks
tensorboard_callback = TensorBoard(log_dir=log_dir,update_freq='epoch')
model_checkpoint = ModelCheckpoint(model_name, monitor='val_loss',verbose=1, save_best_only=True)
lr_decay = ReduceLROnPlateau(monitor='val_loss', factor=0.7,patience=5, min_lr=0.0001)
early_stop = EarlyStopping(monitor='val_loss', patience=10)

model = get_model()
history = model.fit(x=X_train,y=[y_dom_train,y_rec_train],batch_size=100,epochs=20,validation_data=(X_test,[y_dom_test,y_rec_test]),callbacks=[tensorboard_callback, model_checkpoint, lr_decay, early_stop])

In [None]:
# Plot the accuracy and Loss
#  "Accuracy"
plt.plot(history.history['dense_dominant_accuracy'])
plt.plot(history.history['val_dense_dominant_accuracy'])
plt.plot(history.history['dense_recessive_accuracy'])
plt.plot(history.history['val_dense_recessive_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['dense_dominant_accuracy', 'val_dense_dominant_accuracy','dense_recessive_accuracy','val_dense_recessive_accuracy'], loc='upper left')
plt.show()

# "Vowel Loss"
plt.plot(history.history['dense_dominant_loss'])
plt.plot(history.history['val_dense_dominant_loss'])
plt.plot(history.history['dense_recessive_loss'])
plt.plot(history.history['val_dense_recessive_loss'])
plt.title('vowel loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['dense_dominant_loss', 'val_dense_dominant_loss','dense_recessive_loss','dense_recessive_loss'], loc='upper left')
plt.show()

print(history.history['dense_dominant_loss'])
print(history.history['val_dense_dominant_loss'])
print(history.history['dense_recessive_loss'])
print(history.history['val_dense_recessive_loss'])


# "Model Loss"
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['loss', 'val_loss'], loc='upper left')
plt.show()

print(history.history['loss'])
print(history.history['val_loss'])

In [None]:
### Run the following code to obtain outputs from the model and to generate all the plots for identification scores.

In [None]:
#Obtaining outputs for all semitones:
outputs_dominant = []
outputs_recessive = []
model = tf.keras.models.load_model('../models/TDNN/model_30.hdf5')
semi_list = ["0s ","25s","50s","1s ","2s ","4s "]
for semi in semi_list:
    X = np.load('../data/65_dBSPL/np_data/DR_{}_400ms_cat_all_vowels.npy'.format(semi)) 
    X = X / 184.5
    output_dom,output_rec = model.predict(X)
    outputs_dominant.append(output_dom)
    outputs_recessive.append(output_rec)

In [None]:
num_semi = len(outputs_dominant)
# The output_code array stores the dominant-recessive relations between the vowels in the pair. For more details refer to Chintanpalli et. al., 2016.
output_code = np.array(
    [[0,0],[1,0],[2,0],[3,0],[4,0],
     [1,0],[1,1],[1,2],[3,1],[1,4],
     [2,0],[1,2],[2,2],[2,3],[2,4],
     [3,0],[3,1],[2,3],[3,3],[4,3],
     [4,0],[1,4],[2,4],[4,3],[4,4]]
     )  
preds_dom = np.zeros([25,num_semi])
preds_rec = np.zeros([25,num_semi])
for j in range(num_semi):
    output_dom = outputs_dominant[j]
    output_rec = outputs_recessive[j]
    for i in range(25):
        pred_dom = output_dom[100*i:100*(i+1),output_code[i,0]]
        pred_rec = output_rec[100*i:100*(i+1),output_code[i,1]]
        mean_vec_dom = np.mean(pred_dom,axis=0)
        mean_vec_rec= np.mean(pred_rec,axis=0)
        pred_dom = pred_dom > mean_vec_dom/corr_factor
        pred_rec = pred_rec > mean_vec_rec/corr_factor
        preds_dom[i,j] = np.sum(pred_dom)
        preds_rec[i,j] = np.sum(pred_rec)
print(preds_dom.shape)
print(preds_dom)

In [None]:
print(preds_dom.shape)
print(preds_rec.shape)

In [None]:
### Both Vowel Identification
threshold = 80
preds_bool = (preds_dom > threshold) * (preds_rec > threshold)
points = preds_bool.sum(axis=0)*4
print(points)

plt.plot(np.array([0,1.5,3,6,12,26]),points[[0,1,2,3,4,5]],marker='s')
plt.title('Both vowel identification')
plt.xlabel('F0 difference(Hz)')
plt.ylabel('Percentage identification of both vowels')
plt.xlim([0,26])
plt.ylim([0,110])
plt.show()

In [None]:
### Both Vowel Identification: Identical
threshold = 80
preds_bool = (preds_dom > threshold) * (preds_rec > threshold)
points = preds_bool[[0,6,12,18,24]].sum(axis=0)*20
print(points)

plt.plot(np.array([0,1.5,3,6,12,26]),points[[0,1,2,3,4,5]],marker='s')
plt.title('Both vowel identification (Identical)')
plt.xlabel('F0 difference(Hz)')
plt.ylabel('Percentage identification of both vowels')
plt.xlim([0,26])
plt.ylim([0,110])
plt.show()

In [None]:
### Both Vowel Identification: Different
threshold = 80
preds_bool = (preds_dom > threshold) * (preds_rec > threshold)
points = preds_bool[[1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,19,20,21,22,23]].sum(axis=0)*5
print(points)

plt.plot(np.array([0,1.5,3,6,12,26]),points[[0,1,2,3,4,5]],marker='s')
plt.title('Both vowel identification (Different)')
plt.xlabel('F0 difference(Hz)')
plt.ylabel('Percentage identification of both vowels')
plt.xlim([0,26])
plt.ylim([0,110])
plt.show()

In [None]:
### Dominant Vowel Identification
preds_bool = preds_dom > threshold
points = preds_bool.sum(axis=0)*4
print(points)

plt.plot(np.array([0,1.5,3,6,12,26]),points[[0,1,2,3,4,5]],marker='s')
plt.title('One vowel identification (Dominant)')
plt.xlabel('F0 difference(Hz)')
plt.ylabel('Percentage identification of both vowels')
plt.xlim([0,26])
plt.ylim([0,110])
plt.show()

In [None]:
### Recessive Vowel Identification
preds_bool = preds_rec > threshold
points = preds_bool.sum(axis=0)*4
print(points)

plt.plot(np.array([0,1.5,3,6,12,26]),points[[0,1,2,3,4,5]],marker='s')
plt.title('One vowel identification (Recessive)')
plt.xlabel('F0 difference(Hz)')
plt.ylabel('Percentage identification of both vowels')
plt.xlim([0,26])
plt.ylim([0,110])
plt.show()

In [None]:
### Single Vowel Identification
preds_bool = (preds_dom > threshold) + (preds_rec > threshold)
points = preds_bool.sum(axis=0)*4
print(points)

plt.plot(np.array([0,1.5,3,6,12,26]),points[[0,1,2,3,4,5]],marker='s')
plt.title('One vowel identification (Either)')
plt.xlabel('F0 difference(Hz)')
plt.ylabel('Percentage identification of both vowels')
plt.xlim([0,26])
plt.ylim([0,110])
plt.show()

In [None]:
### Single Vowel Identification: Identical
preds_bool = (preds_dom > threshold) + (preds_rec > threshold)
points = preds_bool[[0,6,12,18,24]].sum(axis=0)*20
print(points)

plt.plot(np.array([0,1.5,3,6,12,26]),points[[0,1,2,3,4,5]],marker='s')
plt.title('One vowel identification (Either) (Identical)')
plt.xlabel('F0 difference(Hz)')
plt.ylabel('Percentage identification of both vowels')
plt.xlim([0,26])
plt.ylim([0,110])
plt.show()

In [None]:
### Single Vowel Identification: Different
preds_bool = (preds_dom > threshold) + (preds_rec > threshold)
points = preds_bool[[1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,19,20,21,22,23]].sum(axis=0)*5
print(points)

plt.plot(np.array([0,1.5,3,6,12,26]),points[[0,1,2,3,4,5]],marker='s')
plt.title('One vowel identification (Either) (Different)')
plt.xlabel('F0 difference(Hz)')
plt.ylabel('Percentage identification of both vowels')
plt.xlim([0,26])
plt.ylim([0,110])
plt.show()

In [None]:
### Use the subsequent code for levels study.

In [None]:
#LEVELS STUDY:
#Obtaining outputs for all levels and semitones:

model = tf.keras.models.load_model('../models/TDNN_levels/model_1.hdf5')
output_code = np.array(
    [[0,0],[1,0],[2,0],[3,0],[4,0],
     [1,0],[1,1],[1,2],[3,1],[1,4],
     [2,0],[1,2],[2,2],[2,3],[2,4],
     [3,0],[3,1],[2,3],[3,3],[4,3],
     [4,0],[1,4],[2,4],[4,3],[4,4]]
     )
semi_list = ["0s ","4s "]
levels = ['25_dBSPL','35_dBSPL','50_dBSPL','65_dBSPL','75_dBSPL','85_dBSPL']
threshold = 80
num_levels = len(levels)
print(num_levels)
## In the same and diff F0, Column 0 is both vowel and Column 1 is single vowel
same_f0 = np.zeros(shape=(num_levels,2))
diff_f0 = np.zeros(shape=(num_levels,2))
norm_values = [184.5,184.5,184.5,184.5,184.5,184.5]
for num in range(num_levels):
    outputs_dominant = []
    outputs_recessive = []
    for semi in semi_list:
        X = np.load('../data/{}/np_data/DR_{}_400ms_cat_all_vowels.npy'.format(levels[num],semi)) 
        #X = scaler.transform(X)
        X = X / norm_values[num]
        output_dom,output_rec = model.predict(X)
        outputs_dominant.append(output_dom)
        outputs_recessive.append(output_rec)
    num_semi = len(outputs_dominant)
    preds_dom = np.zeros([25,num_semi])
    preds_rec = np.zeros([25,num_semi])
    for j in range(num_semi):
        output_dom = outputs_dominant[j]
        output_rec = outputs_recessive[j]
        for i in range(25):
            pred_dom = output_dom[100*i:100*(i+1),output_code[i,0]]
            pred_rec = output_rec[100*i:100*(i+1),output_code[i,1]]
            mean_vec_dom = np.mean(pred_dom,axis=0)
            mean_vec_rec= np.mean(pred_rec,axis=0)
            pred_dom = pred_dom > mean_vec_dom/corr_factor
            pred_rec = pred_rec > mean_vec_rec/corr_factor
            preds_dom[i,j] = np.sum(pred_dom)
            preds_rec[i,j] = np.sum(pred_rec)
    preds_bool_both = (preds_dom > threshold) * (preds_rec > threshold)
    preds_bool_either = (preds_dom > threshold) + (preds_rec > threshold)
    # Zero is same F0, 1 is different F0
    points_both = preds_bool_both.sum(axis=0)*4
    points_either = preds_bool_either.sum(axis=0)*4
    same_f0[num,0] = points_both[0]
    diff_f0[num,0] = points_both[1]
    same_f0[num,1] = points_either[0]
    diff_f0[num,1] = points_either[1]

In [None]:
### Level study: Both Vowel
print(same_f0[:,0])
print(diff_f0[:,0])
plt.plot(np.array([25,35,50,65,75,85]),same_f0[:,0],marker='*')
plt.plot(np.array([25,35,50,65,75,85]),diff_f0[:,0],marker='s')
plt.title('Level study: Both Vowel')
plt.xlabel('Vowel Level (dB SPL)')
plt.ylabel('Percentage identification of both vowels')
plt.legend(['Same F0','Different F0'],loc='best')
plt.xlim([25,85])
plt.ylim([0,110])
plt.show()

In [None]:
### Level study: Either Vowel
print(same_f0[:,1])
print(diff_f0[:,1])
plt.plot(np.array([25,35,50,65,75,85]),same_f0[:,1],marker='*')
plt.plot(np.array([25,35,50,65,75,85]),diff_f0[:,1],marker='s')
plt.title('Level study: Either Vowel')
plt.xlabel('Vowel Level (dB SPL)')
plt.ylabel('Percentage identification of both vowels')
plt.legend(['Same F0','Different F0'],loc='best')
plt.xlim([25,85])
plt.ylim([0,110])
plt.show()

In [None]:
# # Trying out new metrics: 

# for j in range(len(outputs_dominant)):
#     output_dom = outputs_dominant[j]
#     output_dom = (output_dom.max(axis=1,keepdims=1)== output_dom).astype(int) 
#     auc_scores_dom = precision_score(y_dom,output_dom, average='macro')
#     print(auc_scores_dom)


# for j in range(len(outputs_recessive)):
#     output_rec = outputs_recessive[j]
#     output_rec = (output_rec.max(axis=1,keepdims=1) == output_rec).astype(int)
#     auc_scores_rec = precision_score(y_rec,output_rec, average='macro')
#     print(auc_scores_rec)

In [None]:
# # Saving Model Visualization:
# from tensorflow.keras.utils import plot_model
# from tensorflow.keras.models import load_model
# model = load_model('../models/TDNN/model_4.hdf5')
# plot_model(
#     model,
#     to_file="../models/TDNN/model_4_keras_util.png",
#     show_shapes=False,
#     show_dtype=False,
#     show_layer_names=False,
#     rankdir="TB",
#     expand_nested=False,
#     dpi=96,
#     layer_range=None,
# )