In [None]:
!wget --no-check-certificate --content-disposition https://github.com/AccentDB/one-speaker-vectors/archive/master.zip
!unzip one-speaker-vectors-master.zip

In [None]:
import numpy as np
import datetime, os, random
import tensorflow as tf
import seaborn as sns
from time import time

# Scikit imports for data handling
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Keras imports
import keras
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers import BatchNormalization
from keras.layers.convolutional import Convolution1D, MaxPooling1D
from keras.utils import np_utils
from keras.callbacks import TensorBoard
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D
from keras.layers import Activation, Dropout, Flatten, Dense

In [None]:
def normalize(v):
  # Suppose vector is v:
  # convert it to 2 dimensional for intuitive manipulation
  x = v.shape[1]
  y = v.shape[2]
  v = v.reshape((-1, x * y))
  # Compute norm
  nm = np.linalg.norm(v, axis=1)
  # Reshape it so it can be divided
  nm = nm.reshape((-1,1))
  #  Divide
  v = v/nm
  # get the matrix
  v = v.reshape((-1, x, y))
  return v
  
print('Loading data...')

bangla = normalize(np.load("one-speaker-vectors-master/bangla_speaker_01.files.npy"))
malayalam = normalize(np.load("one-speaker-vectors-master/malayalam_speaker_01.files.npy"))
odiya = normalize(np.load("one-speaker-vectors-master/odiya_speaker_01.files.npy"))
telugu = normalize(np.load("one-speaker-vectors-master/telugu_speaker_01.files.npy"))
indian = normalize(np.load("one-speaker-vectors-master/indian_speaker_01.files.npy"))
australian = normalize(np.load("one-speaker-vectors-master/australian_speaker_01.files.npy"))
british = normalize(np.load("one-speaker-vectors-master/british_speaker_01.files.npy"))
american = normalize(np.load("one-speaker-vectors-master/american_speaker_01.files.npy"))
welsh = normalize(np.load("one-speaker-vectors-master/welsh_speaker_01.files.npy"))

print(bangla.shape)

Loading data...
(778, 499, 13)


In [None]:
# Define categories
samples = {
  "bangla": bangla,
  "malayalam": malayalam,
  "odiya": odiya,
  "telugu": telugu,
  "indian": indian,
  "australian": australian,
  "british": british,
  "american": american,
  "welsh": welsh,

}

indian = ["bangla", "malayalam", "odiya", "telugu"]
non_indian = ["australian", "british", "american", "welsh"]
all_accents = indian + non_indian + ["indian"]
print("Indian accents: ", indian)
print("Non-Indian accents: ", non_indian)
print("All accents: ", all_accents)

In [None]:
def get_accents(accent_category):
  """
      Get the accents corresponding to the category
  """
  accents = None
  if category == 'indian':
    accents = indian
  elif category == 'non_indian':
    accents = non_indian
  elif category == 'all':
    accents = all
  
  return accents

def get_data(accent_category):
  """
      Get the data for the accent category
  """
  accents = get_accents(accent_category)
  if accents is None:
    return None

  x = np.zeros((0, 499, 13))
  y = []
  label = 0
  for accent in accents:
    y += (samples[accent].shape[0] * [label])
    x = np.concatenate((x, samples[accent]), axis=0)
    label += 1
  return x, y

In [None]:
# constants
maxlen = 100
nb_filter = 256
filter_length_1 = 10
filter_length_2 = 5
hidden_dims = 750

# split ration of 80:20
split_ratio = 0.20

In [None]:
def get_data_splits(x, y):
  """
      Get train, test and validation splits for the data
  """
  nb_classes = 1+int(np.max(y))

  X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=split_ratio, shuffle=True, random_state=seed)
  X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=split_ratio, shuffle=True, random_state=seed)

  Y_train = keras.utils.to_categorical(y_train, nb_classes)
  Y_test = keras.utils.to_categorical(y_test, nb_classes)
  Y_val = keras.utils.to_categorical(y_val, nb_classes)

  print("Number of classes : ", nb_classes)

  print("\nDATA SPLITS:")
  print("Train: ", X_train.shape[0], "samples")
  print("Test: ", X_test.shape[0], "samples")
  print("Val: ", X_val.shape[0], "samples\n")

  return X_train, X_test, X_val, Y_train, Y_test, Y_val

In [None]:
# 499 dimensions are produced by sampling a 5s audio file at 1ms
test_dim = 499
# 13 MFCC features 
input_shape = (test_dim, 13)

def build_model(nb_classes):
  model = Sequential()
  model = Sequential()
  model.add(Conv1D(32, (3), input_shape=input_shape))
  model.add(Activation('relu'))
  model.add(MaxPooling1D(pool_size=(2)))

  model.add(Conv1D(32, (3)))
  model.add(Activation('relu'))
  model.add(MaxPooling1D(pool_size=(2)))

  model.add(Conv1D(64, (3)))
  model.add(Activation('relu'))
  model.add(MaxPooling1D(pool_size=(2)))

  model.add(Flatten())
  model.add(Dense(128))
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
  model.add(Dense(32))
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
  model.add(Dense(nb_classes, activation='softmax'))

  model.compile(loss='categorical_crossentropy',
                optimizer='rmsprop',
                metrics=['accuracy'])
  
  return model


test_model = build_model(nb_classes=4)

In [None]:
batch_sizes = [32]
nb_epochs = [3]

def train(x, y):
  """
      Train the CNN model
  """
  X_train, X_test, X_val, Y_train, Y_test, Y_val = get_data_splits(x, y)

  nb_train_samples = X_train.shape
  nb_classes = 1+int(np.max(y))

  for batch_size in batch_sizes:
    for nb_epoch in nb_epochs:
      print('Build model...')
      print('Batch Size: ', batch_size, 'and Number of epochs: ', nb_epoch ,'\n')

      model = build_model(nb_classes)
      model.fit(X_train, Y_train, steps_per_epoch=nb_train_samples[0] // batch_size,
              nb_epoch=nb_epoch, shuffle='true', verbose=1, validation_data=(X_val, Y_val),
              validation_steps=1, callbacks=[tensorboard_callback])

      score = model.evaluate(X_test, Y_test, verbose=1)
      print(f"batch_size: {batch_size} epoch: {nb_epoch}", score)

In [None]:
category = 'indian' # or 'non_indian', or 'all'
print("Accent category : ", category)

(x, y) = get_data(category)
print("\nTraining data successfully loaded.")

print("Training...")
model = train(x, y)
print("\nTraining done.")

In [None]:
%tensorboard --logdir logs