In [0]:
import tensorflow as tf
from tensorflow import keras
import hickle as h
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
!pip install hickle

Collecting hickle
[?25l  Downloading https://files.pythonhosted.org/packages/b3/45/ebc9e2a77f2349a4947a2eedd4480fc7b248d3f315b0f5a3a1826adcc522/hickle-3.4.5-py3-none-any.whl (40kB)
[K     |████████▏                       | 10kB 32.1MB/s eta 0:00:01[K     |████████████████▎               | 20kB 1.7MB/s eta 0:00:01[K     |████████████████████████▌       | 30kB 2.4MB/s eta 0:00:01[K     |████████████████████████████████| 40kB 2.3MB/s 
Installing collected packages: hickle
Successfully installed hickle-3.4.5


In [0]:
mfccs = h.load('drive/My Drive/mfcc.hkl')
#pitches = h.load('drive/My Drive/pitches.hkl')
deltas = h.load('drive/My Drive/deltas.hkl')
ddeltas = h.load('drive/My Drive/deltas.hkl')

In [0]:
mfccs = [np.nan_to_num(m.T) for m in mfccs]
deltas = [np.nan_to_num(d.T) for d in deltas]
ddeltas = [np.nan_to_num(dd.T) for dd in ddeltas]

In [0]:
def find_max(egs):
        return max([e.shape[0] for e in egs])
    
def pad_mfcc(egs):
    
    max_val = find_max(egs)

    padded = np.array([np.pad(e, ((0, max_val-e.shape[0]), (0, 0)) , 'constant') for e in egs])
    return padded    

def pad_pitch(egs):
    max_val = find_max(egs)

    padded = np.array([np.pad(e, (0, max_val-e.shape[0]) , 'constant') for e in egs])
    return padded 

In [0]:
mfccs = pad_mfcc(mfccs)
deltas = pad_mfcc(deltas)
ddeltas = pad_mfcc(ddeltas)

data = np.array([[i,j,k] for i, j, k in zip(mfccs, deltas, ddeltas)])
data = data.reshape(2432, 5388, 20, 3)
print(data.shape)

(2432, 5388, 20, 3)


In [0]:
del(mfccs)
del(deltas)
del(ddeltas)

In [0]:
from tqdm import  tqdm_notebook
labels = np.zeros((2432,2))
path = "drive/My Drive/train_labels"
folder = os.fsencode(path)
for file in tqdm_notebook(os.listdir(folder)):
  fn = os.fsdecode(file)
  try:
    ind = int(fn.split('.')[0])
  except:
    ind = int(fn.split('.')[0].split()[0])
  labels[ind] = np.load(path +"/"+fn)

HBox(children=(IntProgress(value=0, max=2442), HTML(value='')))




In [0]:
#train = pd.read_csv('drive/My Drive/train.tsv', sep='\t')
#num_speakers = len(set(train['client_id']))
#spk = list(set(train['client_id']))
#mapped = {spk[i]:i for i in range(num_speakers)}

In [0]:
#train = train.replace(mapped)

In [0]:
# Get speaker IDs
#spkIDs = np.array(train['client_id'].values).reshape(-1,1)
#del(train)

In [0]:
#PARAMS

#TEMP_DIM_MAX1 = find_max(mfccs)
NUM_MFCCS = 20
BATCH_SIZE = 120

#TEMP_DIM_MAX2 = find_max(pitches)
NUM_HIDDEN = 128 #Number of hidden units
#NUM_SPK = len(spkIDs)
NUM_EPOCHS = 10

In [0]:
class LSTM(tf.keras.Model):
    def __init__(self, hidden_size, return_seq=True):
        super().__init__()
        initializer = tf.variance_scaling_initializer(scale = 2.0)
        self.l1 = keras.layers.LSTM(hidden_size, activation=tf.nn.tanh,
                                    kernel_initializer=initializer, 
                                    return_sequences=return_seq)
        
    def call(self, x):
        l = self.l1(x)
        return l

In [0]:
data = np.array([(i,j) for i, j in zip(data, labels)])
np.random.shuffle(data)
#mfccs_pad = np.array([i[0] for i in data])
#spkIDs = np.array([i[1] for i in data])
labels = np.array([i[1] for i in data])
data = np.array([i[0] for i in data])


In [0]:
labels = np.argmax(labels, axis=1)

In [0]:
labels.shape

(2432,)

In [0]:
data = data.reshape((2432, 20, 5388, 3))

In [0]:
def run_lstm(train, ytrain, test, ytest, num_epochs, batchsize):
    
    #Run model
    tf.reset_default_graph()
    
    X = tf.placeholder(tf.float32, [None, 20, train.shape[2]])
    y = tf.placeholder(tf.int32, [None]) 
    
    l1m = LSTM(NUM_HIDDEN)
    l1m_out = l1m(X)
    l1m_out = tf.layers.Dropout(0.4)(l1m_out)
    l2m = LSTM(NUM_HIDDEN, return_seq=False)
    l2m_out = l2m(l1m_out)
    l2m_out = tf.layers.Dropout(0.4)(l2m_out)
    output = tf.layers.dense(l2m_out, 100)
    
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=output)
    loss = tf.reduce_mean(loss)
    
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss)
    soft_out = tf.nn.softmax(output)
    
    y_onehot = tf.one_hot(y, 2)
    
    pred = tf.equal(tf.argmax(soft_out,1), tf.argmax(y_onehot,1))
    #pred = tf.equal(tf.argmax(soft_out,1), y)
    accuracy = tf.reduce_mean(tf.cast(pred, tf.float32))
    hist = {'train_loss':[], 'test_loss':[], 'train_acc':[], 'test_acc':[]}
    
    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      steps = train.shape[0]//batchsize
      for epoch in range(num_epochs):
          epoch_loss = 0
          epoch_acc = 0
          for step in range(steps):
              start = step*batchsize
              end = min(start + batchsize, train.shape[0])
              batch_mfcc = train[start:end]
              #batch_pitch = pitches[start:end]
              batchy = ytrain[start:end]
              _, cost = sess.run([optimizer, loss], feed_dict={X:batch_mfcc, 
                                                                y:batchy})
              train_acc = sess.run(accuracy, feed_dict = {X:batch_mfcc, 
                                                          y:batchy})
              epoch_loss += cost
              epoch_acc += train_acc
              
              print("Epoch:",epoch,"Step:",step+1,"TrainLoss:",epoch_loss/(step+1))
          train_loss, train_acc = sess.run([loss,accuracy], feed_dict={X:train,y:ytrain})
          print("Epoch:",epoch,"Loss:", train_loss, "accuracy:", train_acc)
          test_loss, test_acc = sess.run([loss,accuracy], feed_dict = {X:test, y:ytest})
          print("Test:","Epoch:",epoch,"Loss:", test_loss, "accuracy:", test_acc)
          hist['train_loss'].append(train_loss)
          hist['train_acc'].append(train_acc)
          hist['test_loss'].append(test_loss)
          hist['test_acc'].append(test_acc)
    return hist


In [0]:
#train, ytrain, test, ytest, num_epochs, batchsize

hist = run_lstm(data[:1500], labels[:1500], data[1500:], labels[1500:], 10, 128)

Epoch: 0 Step: 1 TrainLoss: 0.6919435262680054
Epoch: 0 Step: 2 TrainLoss: 0.6850835084915161
Epoch: 0 Step: 3 TrainLoss: 0.6908437410990397
Epoch: 0 Step: 4 TrainLoss: 0.692546159029007
Epoch: 0 Step: 5 TrainLoss: 0.6998616576194763
Epoch: 0 Step: 6 TrainLoss: 0.7023633023103079
Epoch: 0 Step: 7 TrainLoss: 0.7022409268787929
Epoch: 0 Step: 8 TrainLoss: 0.7031664103269577
Epoch: 0 Step: 9 TrainLoss: 0.7026065720452203
Epoch: 0 Step: 10 TrainLoss: 0.7039028763771057
Epoch: 0 Step: 11 TrainLoss: 0.7051765864545648
Epoch: 0 Loss: 0.42264774 accuracy: 0.936
Test: Epoch: 0 Loss: 0.6974143 accuracy: 0.52896994
Epoch: 1 Step: 1 TrainLoss: 0.38037627935409546
Epoch: 1 Step: 2 TrainLoss: 0.3592494875192642
Epoch: 1 Step: 3 TrainLoss: 0.3442332446575165
Epoch: 1 Step: 4 TrainLoss: 0.3239079490303993
Epoch: 1 Step: 5 TrainLoss: 0.3021161943674088
Epoch: 1 Step: 6 TrainLoss: 0.2853999709089597
Epoch: 1 Step: 7 TrainLoss: 0.2729417915855135
Epoch: 1 Step: 8 TrainLoss: 0.2650747410953045
Epoch: 1 St

## Monster LSTM

In [0]:
data.shape

(2432, 20, 5388, 3)

In [0]:
def run_lstm_monster(train, ytrain, test, ytest, num_epochs, batchsize):
    
    #Run model
    tf.reset_default_graph()
    
    X_mfcc = tf.placeholder(tf.float32, [None, 20, train.shape[2]])
    X_d = tf.placeholder(tf.float32, [None, 20, train.shape[2]])
    X_dd = tf.placeholder(tf.float32, [None, 20, train.shape[2]])
    y = tf.placeholder(tf.int32, [None]) 
    
    #LSTM for MFCC
    l1m = LSTM(NUM_HIDDEN)
    l1m_out = l1m(X_mfcc)
    l1m_out = tf.layers.Dropout(0.4)(l1m_out)
    l2m = LSTM(NUM_HIDDEN, return_seq=False)
    l2m_out = l2m(l1m_out)
    l2m_out = tf.layers.Dropout(0.4)(l2m_out)
    output1 = tf.layers.dense(l2m_out, 100)

    #LSTM for Deltas
    l1d = LSTM(NUM_HIDDEN)
    l1d_out = l1d(X_d)
    l1d_out = tf.layers.Dropout(0.4)(l1d_out)
    l2d = LSTM(NUM_HIDDEN, return_seq=False)
    l2d_out = l2d(l1d_out)
    l2d_out = tf.layers.Dropout(0.4)(l2d_out)
    output2 = tf.layers.dense(l2d_out, 100)

    #LSTM for Double Deltas
    l1dd = LSTM(NUM_HIDDEN)
    l1dd_out = l1dd(X_dd)
    l1dd_out = tf.layers.Dropout(0.4)(l1dd_out)
    l2dd = LSTM(NUM_HIDDEN, return_seq=False)
    l2dd_out = l2dd(l1dd_out)
    l2dd_out = tf.layers.Dropout(0.4)(l2dd_out)
    output3 = tf.layers.dense(l2dd_out, 100)


    output_common = tf.concat([output1, output2, output3], axis=1)
    output = tf.layers.dense(output_common, 2)
  
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=output)
    loss = tf.reduce_mean(loss)
    
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss)
    soft_out = tf.nn.softmax(output)
    
    y_onehot = tf.one_hot(y, 2)
    
    pred = tf.equal(tf.argmax(soft_out,1), tf.argmax(y_onehot,1))
    #pred = tf.equal(tf.argmax(soft_out,1), y)
    accuracy = tf.reduce_mean(tf.cast(pred, tf.float32))
    hist = {'train_loss':[], 'test_loss':[], 'train_acc':[], 'test_acc':[]}
    saver = tf.train.Saver()
    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      steps = train.shape[0]//batchsize
      for epoch in range(num_epochs):
          epoch_loss = 0
          epoch_acc = 0
          for step in range(steps):
              start = step*batchsize
              end = min(start + batchsize, train.shape[0])
              batch_mfcc = train[start:end,:,:,0]
              batch_d = train[start:end,:,:,1]
              batch_dd = train[start:end,:,:,2]
              batchy = ytrain[start:end]
              _, cost = sess.run([optimizer, loss], feed_dict={X_mfcc:batch_mfcc,
                                                               X_d:batch_d,
                                                               X_dd:batch_dd, 
                                                              y:batchy})
              train_acc = sess.run(accuracy, feed_dict = {X_mfcc:batch_mfcc,
                                                          X_d:batch_d,
                                                          X_dd:batch_dd, 
                                                          y:batchy})
              epoch_loss += cost
              epoch_acc += train_acc
              
              print("Epoch:",epoch,"Step:",step+1,"TrainLoss:",epoch_loss/(step+1))
          train_loss, train_acc = sess.run([loss,accuracy], feed_dict={X_mfcc:train[:,:,:,0],
                                                                       X_d:train[:,:,:,1],
                                                                       X_dd:train[:,:,:,2], 
                                                                        y:ytrain})
          print("Epoch:",epoch,"Loss:", train_loss, "accuracy:", train_acc)
          test_loss, test_acc = sess.run([loss,accuracy], feed_dict = {X_mfcc:test[:,:,:,0],
                                                                       X_d:test[:,:,:,1],
                                                                       X_dd:test[:,:,:,2],
                                                                       y:ytest})
          print("Test:","Epoch:",epoch,"Loss:", test_loss, "accuracy:", test_acc)
          hist['train_loss'].append(train_loss)
          hist['train_acc'].append(train_acc)
          hist['test_loss'].append(test_loss)
          hist['test_acc'].append(test_acc)
    return hist


In [0]:
#train, ytrain, test, ytest, num_epochs, batchsize

hist = run_lstm_monster(data[:1500], labels[:1500], data[1500:], labels[1500:], 10, 128)

Epoch: 0 Step: 1 TrainLoss: 0.7074707746505737
Epoch: 0 Step: 2 TrainLoss: 0.6830656826496124
Epoch: 0 Step: 3 TrainLoss: 0.6932757496833801
Epoch: 0 Step: 4 TrainLoss: 0.6955104172229767
Epoch: 0 Step: 5 TrainLoss: 0.6975646615028381
Epoch: 0 Step: 6 TrainLoss: 0.7004310886065165
Epoch: 0 Step: 7 TrainLoss: 0.7008459738322667
Epoch: 0 Step: 8 TrainLoss: 0.7013906016945839
Epoch: 0 Step: 9 TrainLoss: 0.7001996239026388
Epoch: 0 Step: 10 TrainLoss: 0.7020460665225983
Epoch: 0 Step: 11 TrainLoss: 0.7044320106506348
Epoch: 0 Loss: 0.3929954 accuracy: 0.902
Test: Epoch: 0 Loss: 0.7039482 accuracy: 0.54077256
Epoch: 1 Step: 1 TrainLoss: 0.28196802735328674
Epoch: 1 Step: 2 TrainLoss: 0.27334660291671753
Epoch: 1 Step: 3 TrainLoss: 0.26756832003593445
Epoch: 1 Step: 4 TrainLoss: 0.25512803345918655
Epoch: 1 Step: 5 TrainLoss: 0.2407944142818451
Epoch: 1 Step: 6 TrainLoss: 0.2319715271393458
Epoch: 1 Step: 7 TrainLoss: 0.2227249549967902
Epoch: 1 Step: 8 TrainLoss: 0.21352744475007057
Epoch: 

In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Dropout
model = Sequential()
model.add(Conv2D(40 , (5,5) , input_shape = (5388, 20 ,3), name = "Layer1"))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(MaxPooling2D(pool_size= (5,5), name = "MP1"))
model.add(Conv2D(20 ,(2,2), name = "Layer2"))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(MaxPooling2D(pool_size= (2,2), name="MP2"))
model.add(Conv2D(10, (2,1), name = "Layer3" ))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(100, activation="sigmoid", name=""))
model.add(Dense(28, activation = "softmax"))

In [0]:
from keras.utils.np_utils import to_categorical  
dataSI = [(i,j) for i,j in zip(data, spkIDs)]
np.random.shuffle(dataSI)
dataCNN = np.array([i[0] for i in dataSI])
y = np.array([i[1] for i in dataSI]).reshape(-1,1)
y = to_categorical(y, num_classes=28)
print(spkIDs.shape,y.shape, dataCNN.shape)

(2432, 1) (2432, 28) (2432, 2)


In [0]:
model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=['accuracy'])
hist = model.fit(dataCNN[:1500], y[:1500], validation_data=(dataCNN[1500:], y[1500:]), epochs=10, batch_size = 32)

In [0]:
for layer in model.layers: print(layer.get_config(), layer.get_weights())

{'name': 'Layer1', 'trainable': True, 'batch_input_shape': (None, 5388, 20, 3), 'dtype': 'float32', 'filters': 40, 'kernel_size': (5, 5), 'strides': (1, 1), 'padding': 'valid', 'data_format': 'channels_last', 'dilation_rate': (1, 1), 'activation': 'linear', 'use_bias': True, 'kernel_initializer': {'class_name': 'GlorotUniform', 'config': {'seed': None, 'dtype': 'float32'}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {'dtype': 'float32'}}, 'kernel_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None} [array([[[[ 0.00046797,  0.03231239,  0.04677802, ..., -0.01561338,
           0.07097224,  0.00056195],
         [-0.05607001,  0.01246016,  0.01733283, ...,  0.01473978,
          -0.07006636, -0.06815898],
         [ 0.01064105,  0.03229722,  0.04271815, ...,  0.04024877,
          -0.06826036,  0.04488011]],

        [[ 0.05024427, -0.02644952,  0.02171575, ..., -0.03840618,
           0.00394654, -0.04

In [0]:

def trainCNN(train, test, ytrain, ytest, num_epochs, batchsize):
  X = tf.placeholder(tf.float32, [None, 5388, 20, 3])
  y = tf.placeholder(tf.float32, [None, 2])

  layer1 = tf.nn.conv2d(X, np.random.rand(10,10,3,64), strides = [1,1,1,1], padding = 'VALID')
  act1 = tf.nn.relu(layer1)
  out1 = tf.nn.max_pool2d(act1, ksize=[5,5,3,3], strides = [1,1,1,1], padding = "VALID")

  layer2 = tf.nn.conv2d(out1, np.random.rand(5,5,3,32), strides = [1,1,1,1], padding = 'VALID')
  act2 = tf.nn.relu(layer2)
  out2 = tf.nn.max_pool2d(act1, [5,5,3,1], padding = "VALID")

  layer3 = tf.nn.conv2d(out2, np.random.rand(3,3,1,28), strides = [1,1,1,1], padding = 'VALID')
  act3 = tf.nn.relu(layer3)
  out3 = tf.nn.max_pool2d(act1, ksize=[5,5,3,1], strides = [1,1,1,1], padding = "VALID")

  layer4 = tf.nn.conv2d(out3, np.random.rand(3,3,3,16), strides = [1,1,1,1], padding = 'VALID')
  act4 = tf.nn.relu(layer4)
  out4 = tf.nn.max_pool2d(act1, ksize=[5,5,3,1], strides = [1,1,1,1], padding = "VALID")

  layer5 = tf.nn.conv2d(out4, np.random.rand(3,3,3,8), strides = [1,1,1,1], padding = 'VALID')
  act5 = tf.nn.relu(layer5)
  out5 = tf.nn.max_pool2d(act1, ksize=[5,5,3,1], strides = [1,1,1,1], padding = "VALID")
  print(out5.get_shape())

  out6 = tf.nn.relu(tf.reshape(out5, [-1, 2]))
  loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=output)
  loss = tf.reduce_mean(loss)

  optimizer = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(loss)

  soft_out = tf.nn.softmax(output)

  y_onehot = tf.one_hot(y, 2)

  pred = tf.equal(tf.argmax(soft_out,1), tf.argmax(y_onehot,1))
  accuracy = tf.reduce_mean(tf.cast(pred, tf.float32))
  hist = {'train_loss':[], 'val_loss':[], 'train_acc':[], 'val_acc':[]}

  with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      steps = data.shape[0]//batchsize
      for epoch in range(num_epochs):
          epoch_loss = 0
          epoch_acc = 0
          for step in range(steps):
              start = step*batchsize
              end = min(start + batchsize, mfcc.shape[0])
              batch_mfcc = train[start:end]
              #batch_pitch = pitches[start:end]
              batchy = ytrain[start:end]
              _, cost = sess.run([optimizer, loss], feed_dict={X:batch_mfcc, 
                                                                y:batchy})
              train_acc = sess.run(accuracy, feed_dict = {X:batch_mfcc, 
                                                          y:batchy})
              hist['train_loss'].append(cost)
              hist['train_acc'].append(train_acc)
              epoch_loss += cost
              epoch_acc += train_acc
              if (step+1)%5==0:
                print("Epoch:",epoch,"Step:",step+1,"TrainLoss:",epoch_loss/(step+1))
          print("Epoch:",epoch,"Loss:", epoch_loss/(steps+1), "accuracy:", epoch_acc/(steps+1))
          test_loss, test_acc = sess.run([loss,accuracy], feed_dict = {X:test, y:ytest})
          print("Test:","Epoch:",epoch,"Loss:", test_loss, "accuracy:", test_acc)
  return hist




In [0]:
hist = trainCNN(data[:1700,:,:,:], data[1700:,:,:,:], labels[:1700], labels[1700:], num_epochs=10, batchsize = 64)

In [0]:
import matplotlib.pyplot as plt
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])

In [0]:
loaded_model.load_weights('drive/My Drive/Models/embeds.h5', by_name=True) 

In [0]:
dataset = np.load('drive/My Drive/SiameseDataset.npy')