<a href="https://colab.research.google.com/github/hmghaly/speech/blob/master/speech_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This is the colab notebook for the phoneme recognition project. We will need to follow the steps in each cell of the notebook to make sure we have the same data and code.

In [1]:
#Step 0 -We need to mount colab to our Google Drive
#When you run this cell, it will give you a link to ask your permission to google colab to access your drive. 
#When you click on the link, You will choose your Google account 
#it will ask you for your permission and then it will generate the code that you can paste in the field below
from google.colab import drive
import os
drive.mount('/content/drive')
cwd='/content/drive/MyDrive/speech_project' #directory where we keep the data
if not os.path.exists(cwd): os.makedirs(cwd)  #if the directory doesn't exist, create it
os.chdir(cwd) #change current work directory to the directory where we keep the data
!pwd

Mounted at /content/drive
/content/drive/MyDrive/speech_project


#Working with Arabic speech corpus - time aligned

##Get the corpus (first time only)

In [None]:
#First Step - Download the Arabic Speech Corpus to your drive
!wget http://en.arabicspeechcorpus.com/arabic-speech-corpus.zip

--2020-11-20 17:39:01--  http://en.arabicspeechcorpus.com/arabic-speech-corpus.zip
Resolving en.arabicspeechcorpus.com (en.arabicspeechcorpus.com)... 68.183.45.58
Connecting to en.arabicspeechcorpus.com (en.arabicspeechcorpus.com)|68.183.45.58|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1192302846 (1.1G) [application/zip]
Saving to: ‘arabic-speech-corpus.zip’


2020-11-20 17:39:37 (32.1 MB/s) - ‘arabic-speech-corpus.zip’ saved [1192302846/1192302846]



In [None]:
#Second step: unzip the zip file to a local direcory
!unzip arabic-speech-corpus.zip

Archive:  arabic-speech-corpus.zip
replace arabic-speech-corpus/lab/ARA NORM  0002.lab? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
#check if it is unzipped properly, and the folder exists
!ls arabic-speech-corpus

##convert text grids to json (first time only)
Once it is done, we don't need to do it again

In [None]:
#Basic functions for converting text grids
#Now here is the code for converting textgrid files (annotation files) into json, for easy processing
import os, re, json
from itertools import groupby
from sys import argv



def textgrid_2_dict(fpath,order_elements=False): #process the contents of textgrid file and return as a dictinoary 
	#parameters: fpath = file path, order_elements = if we want elements (points and intervals) presented as an ordered list within each tier or as an unordered dictionary
	open_item="" #identify which item/tier are we currently working with
	open_element="" #identify which element (point/interval) are we currently working with
	#order_elements=False #if we want elements (points and intervals) presented as an ordered list within each tier or as an unordered dictionary
	final_list=[]
	fopen=open(fpath)
	for fi,f in enumerate(fopen): #iterating over the entire textgrid file, line by line
		cur_line=f.strip("\t\n\r ") #strip the whitespaces and line breaks
		if not cur_line: continue #skip empty lines
		if cur_line=='item []:': continue #skip the empty item line 
		if cur_line.lower().startswith("item") and cur_line[-1]==":" : open_item,open_element=cur_line, "" #for non-empty items, indicate it as the current active item, and indicate that there is no active/open element
		elif cur_line[-1]==":" : open_element=cur_line #otherwise if the current line ends with colon, indicate that it is an active/open element
		if cur_line[-1]==":": continue #now we want to move to the contents of the elements, so we skip processing the lines that end with colon
		split=[v.strip() for v in cur_line.split("=")] #we split each item around equal sign
		if len(split)==2: #if we have 2 split strings, we process them as key and value 
			key,val = split
			#print "????", val
			if val.startswith('"'): #this is a string value
				our_val=val.strip('"')
			else: #this is a numerical value
				try: our_val=int(val.strip('"')) #check if int
				except:
					try: our_val=float(val.strip('"')) #check if float
					except: our_val=val.strip('"') #else, treat it as string
			#print key, our_val
			final_list.append((open_item, open_element,key, our_val)) #put all keys, values, together with their active items (tiers) and elements into a list


	fopen.close()
	grouped=[(key,[v[1:] for v in list(group)]) for key,group in groupby(final_list,lambda x:x[0])] #we group the list by the items
	final_dict={}
	for k, grp in grouped:
		if k=="": #for the first information lines about the file, outside the items
			for g in grp:
				final_dict[g[-2]]=g[-1] #put these keys and values directly into our output dictionary
			final_dict["items"]={} #then we start filling the items part within the output dictionary
		else:
			item_number=k.split()[1].strip("[]: ")
			tmp_dict={}
			element_dict={}
			element_list=[]
			tier_grouped=[(key,[v[1:] for v in list(group)]) for key,group in groupby(grp,lambda x:x[0])] #we group the key-value pairs for the current tier/item, by element
			tier_type=""
			for tk, tgrp in tier_grouped:
				#print tk, len(tgrp), tgrp[:10]
				if tk=="": #the outside information of the tier, without going into the elements
					for t0,t1 in tgrp:
						tmp_dict[t0]=t1
						if t0.startswith("points"): tier_type="points" #given these outside info, if one of the keys start with points, it is a points tier
						if t0.startswith("intervals"): tier_type="intervals" #otherwise, it is an interval tier
				else:
					element_number=tk.split()[1].strip("[]: ") #we get the element number
					local_dict=dict(iter(tgrp)) #and create a local dict for element data

					element_dict[element_number]=local_dict #we update the element dictionary with the local element dictionary
					local_dict["id"]=element_number #for the option that we want to keep the elements ordered, we add another key to the local dict to keep the id of the element
					element_list.append(local_dict) #and we put it in the ordered list of elements

			if not order_elements: #then we decide if we want the elements in a dicionary e.g. our_dict[iten_number][element_number]={"xmin":20,"xmax":21}
				if tier_type=="points": tmp_dict["points"]=element_dict #depending in the type of elements, we update the tmp_dict
				if tier_type=="intervals": tmp_dict["intervals"]=element_dict
			else: #or we want it an ordered list e.g. our_dict[iten_number]=[{"id":15,"xmin":20,"xmax":21},{"id":16,"xmin":21,"xmax":22}]
				if tier_type=="points": tmp_dict["points"]=element_list
				if tier_type=="intervals": tmp_dict["intervals"]=element_list

			
			final_dict["items"][item_number]=tmp_dict #and finally we update the final dict with the tmp dict
	return final_dict


def textgrid_2_json(input_fpath,out_fpath,order_elements=False): #convert the input textgrid file into json file
	cur_dict=textgrid_2_dict(input_fpath,order_elements)
	json_content=json.dumps(cur_dict)
	fopen=open(out_fpath,"w")
	fopen.write(json_content)
	fopen.close()

('A', 'ʔ')
('b', 'b')
('p', 'h')
('t', 't')
('v', 'θ')
('j', 'dʒ')
('H', 'ħ')
('x', 'x')
('d', 'd')
('*', 'ð')
('r', 'r')
('z', 'z')
('s', 's')
('$', 'ʃ')
('S', 'sˤ')
('D', 'dˤ')
('T', 'tˤ')
('Z', 'ðˤ')
('E', 'ʕ')
('g', 'ɣ')
('f', 'f')
('q', 'q')
('k', 'k')
('l', 'l')
('m', 'm')
('n', 'n')
('h', 'h')
('w', 'w')
('Y', 'aː')
('y', 'j')
('F', 'an')
('N', 'un')
('K', 'in')
('a', 'a')
('u', 'u')
('i', 'i')
('AA', 'ɑ')
("A'", 'ʔ')
('AA', 'aː')
("AA'", 'ʔ')
('AH', 'ʌ')
('Ah', 'ʌ')
('G', 'g')
('sil', 'sil')
('U', 'u')
('I', 'i')


In [None]:
#Now we convert all the text grid files to json - for easier processing
import os
root_dir="arabic-speech-corpus"
textgrid_dir=os.path.join(root_dir,"textgrid")
json_dir=os.path.join(root_dir,"json")
if not os.path.exists(json_dir): os.makedirs(json_dir)
for fname in os.listdir(textgrid_dir): #iterate over all textgrid files
  textgrid_fpath=os.path.join(textgrid_dir,fname)
  json_fname=fname.replace(".TextGrid",".json")
  json_fpath=os.path.join(json_dir,json_fname)
  print(json_fpath)
  textgrid_2_json(textgrid_fpath,json_fpath,order_elements=False) #convert them to json, put in json library


In [3]:
#Install speech features library if needed
!pip install python_speech_features

Collecting python_speech_features
  Downloading python_speech_features-0.6.tar.gz (5.6 kB)
Building wheels for collected packages: python-speech-features
  Building wheel for python-speech-features (setup.py) ... [?25l[?25hdone
  Created wheel for python-speech-features: filename=python_speech_features-0.6-py3-none-any.whl size=5888 sha256=3d4573827b934c37e52ca6e183ba93d3c03ff72bdd93ddcdf93b82b00858f078
  Stored in directory: /root/.cache/pip/wheels/b0/0e/94/28cd6afa3cd5998a63eef99fe31777acd7d758f59cf24839eb
Successfully built python-speech-features
Installing collected packages: python-speech-features
Successfully installed python-speech-features-0.6


## 1- Basic functions for processing features and labels

In [4]:
#Now these are the functions for processing features and labels, let's use them every time
from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav
from scipy import signal
from scipy.io import wavfile
import pandas as pd

transcription_xls='https://docs.google.com/spreadsheets/d/e/2PACX-1vSQnJplWovDMZT121xv3HpuFTErhX18wIOdORJa060mFHXlzYa-9xUh-L4iK7OB6ifiP09VrkVDLg2v/pub?output=xlsx'
cur_sheet_name="buckwalter"
cur_sheet_obj=pd.read_excel(transcription_xls, cur_sheet_name,keep_default_na=False)
#cur_sheet_obj.keys()
buckwalter_symbols=list(cur_sheet_obj["buckwalter"])
ipa_symbols=list(cur_sheet_obj["IPA"])
cur_conv_list=[(a,b) for a,b in zip(buckwalter_symbols,ipa_symbols) if b]
conversion_dict=dict(iter(cur_conv_list))
#print(conversion_dict)
#just in case
#conversion_dict={'A': 'ʔ', 'b': 'b', 'p': 'h', 't': 't', 'v': 'θ', 'j': 'dʒ', 'H': 'ħ', 'x': 'x', 'd': 'd', '*': 'ð', 'r': 'r', 'z': 'z', 's': 's', '$': 'ʃ', 'S': 'sˤ', 'D': 'dˤ', 'T': 'tˤ', 'Z': 'ðˤ', 'E': 'ʕ', 'g': 'ɣ', 'f': 'f', 'q': 'q', 'k': 'k', 'l': 'l', 'm': 'm', 'n': 'n', 'h': 'h', 'w': 'w', 'Y': 'aː', 'y': 'j', 'F': 'an', 'N': 'un', 'K': 'in', 'a': 'a', 'u': 'u', 'i': 'i', 'AA': 'aː', "A'": 'ʔ', "AA'": 'ʔ', 'AH': 'ʌ', 'Ah': 'ʌ', 'G': 'g', 'sil': 'sil', 'U': 'u', 'I': 'i'}
ipa_symbol_list=sorted(list(set(ipa_symbols+["-"])))
def buck2ipa(buckwalter_symbol0):
  if buckwalter_symbol0.lower() in ["sil","dist"]: return buckwalter_symbol0.lower()
  return conversion_dict.get(buckwalter_symbol0[0],"-")


def extract_audio_features(wav_fpath0,ft_type="logfbank"):
  #sample_rate, samples = wavfile.read(wav_fpath0)
  sample_rate, sig = wavfile.read(wav_fpath0)
  if len(sig.shape)>1: sig= sig.sum(axis=1)
  file_duration=len(sig)/sample_rate	
  if ft_type=="spectrogram": 
    frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)
    spectrogram=spectrogram.transpose()
    return times, spectrogram
  elif ft_type=="logfbank": 
    logfbank_out=logfbank(sig,sample_rate,nfft=2048)
    time_step=file_duration/len(logfbank_out)
    times=[i_*time_step for i_ in range(len(logfbank_out))]
    return times, logfbank_out
  elif ft_type=="mfcc": 
    mfcc_out=mfcc(sig,sample_rate,nfft=2048)
    time_step=file_duration/len(mfcc_out)
    times=[i_*time_step for i_ in range(len(mfcc_out))]
    return times, mfcc_out

def read_tsv(tsv_fpath0):
  tsv_fopen=open(tsv_fpath0)
  tsv_list0=[]
  for tsv0 in tsv_fopen: 
    line_split=tsv0.strip().split("\t")
    if len(line_split)!=2: continue
    a,b=line_split
    tsv_list0.append([float(a),b])
  tsv_fopen.close()
  return tsv_list0

def one_hot(el_vec,val_list):
  final_one_hot=[]
  for el0 in el_vec:
    tmp_list=[0.]*len(val_list)
    if el0 in val_list:
      found_i=val_list.index(el0)
      tmp_list[found_i]=1.
    final_one_hot.append(tmp_list)
  return np.array(final_one_hot) 

def out2labels(rnn_flat_out,label_list): #a flat rnn output to split into slices, and get the label weights for each slice
  final_list=[]
  n_slices=int(len(rnn_flat_out)/len(label_list))
  for i0 in range(n_slices):
    i1=i0+1
    cur_slice=rnn_flat_out[i0*len(label_list):i1*len(label_list)]
    tmp_list=[]
    for lb0,cs0 in zip(label_list,cur_slice): tmp_list.append((lb0,cs0))
    tmp_list.sort(key=lambda x:-x[-1])
    final_list.append(tmp_list)
  return final_list


wav_fpath="/content/drive/MyDrive/speech_project/arabic-speech-corpus/wav/ARA NORM  0002.wav"
t_list,ft_list=extract_audio_features(wav_fpath,"logfbank")
print(len(t_list),t_list[:50])
print(ft_list.shape)
print("loaded basic functions") #one_hot(el_vec,val_list)  out2labels(rnn_flat_out,label_list)

raw_labels=['$', '$$', '*', '**', '-', '<', '<<', 'A', "A'", 'AA', "AA'", 'AH', 'Ah', 'D', 'DD', 'E', 'E ', 'EE', 'G', 'H', 'HH', 'I0', "I0'", 'I1', "I1'", 'II0', "II0'", 'J', 'S', 'SS', 'T', 'TT', 'U0', "U0'", 'U1', 'UU0', "UU0'", "UU1'", 'Z', 'ZZ', '^', '^^', 'a', 'a ', "a'", 'aa', "aa'", 'b', 'bb', 'd', 'dd', 'dist', 'f', 'ff', 'g', 'gg', 'h', 'hh', 'i0', "i0'", 'i1', 'i1 ', "i1'", 'ii0', "ii0'", 'ii1', "ii1'", 'j', 'jj', 'k', 'kk', 'l', 'll', 'm', 'mm', 'n', 'nn', 'p', 'pp', 'q', 'qq', 'r', 'rr', 's', 'sil', 'ss', 't', 'tt', 'u', 'u0', "u0'", 'u1', "u1'", 'uu0', "uu0'", 'uu1', "uu1'", 'v', 'w', 'ww', 'x', 'xx', 'y', 'yy', 'z', 'zz']
#possible_ipa_labels=sorted(list(set([])))
# for ol in raw_labels:
# 	print(ol,buck2ipa(ol))



1527 [0.0, 0.010005593756821656, 0.02001118751364331, 0.03001678127046497, 0.04002237502728662, 0.05002796878410828, 0.06003356254092994, 0.07003915629775159, 0.08004475005457325, 0.0900503438113949, 0.10005593756821655, 0.11006153132503821, 0.12006712508185988, 0.13007271883868152, 0.14007831259550318, 0.15008390635232483, 0.1600895001091465, 0.17009509386596816, 0.1801006876227898, 0.19010628137961147, 0.2001118751364331, 0.21011746889325478, 0.22012306265007642, 0.23012865640689809, 0.24013425016371975, 0.2501398439205414, 0.26014543767736303, 0.27015103143418473, 0.28015662519100637, 0.290162218947828, 0.30016781270464965, 0.31017340646147135, 0.320179000218293, 0.3301845939751146, 0.3401901877319363, 0.35019578148875796, 0.3602013752455796, 0.37020696900240124, 0.38021256275922294, 0.3902181565160446, 0.4002237502728662, 0.4102293440296879, 0.42023493778650955, 0.4302405315433312, 0.44024612530015284, 0.45025171905697453, 0.46025731281379617, 0.4702629065706178, 0.4802685003274395

##2- RNN definition

In [12]:
#Let's build the network - here is a small cheat sheet for possible RNN classes based on input and output size
#https://github.com/hmghaly/rnn/blob/master/classes.py

#here the size of the output is the same as the size of the input
#the depth of the output depends on the number of possible outcome categories (e.g. different phonemes)
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random

torch.manual_seed(1)
random.seed(1)

torch.manual_seed(1)
random.seed(1)

device = torch.device('cpu')
#device = torch.device('cuda')

class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size,num_layers, matching_in_out=False, apply_sigmoid=True, apply_softmax=False, batch_size=1):
    super(RNN, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.num_layers = num_layers
    self.batch_size = batch_size
    self.apply_softmax=apply_softmax
    self.apply_sigmoid=apply_sigmoid
    self.matching_in_out = matching_in_out #length of input vector matches the length of output vector 
    self.lstm = nn.LSTM(input_size, hidden_size,num_layers)
    self.hidden2out = nn.Linear(hidden_size, output_size)
    if self.apply_softmax: self.softmax =nn.Softmax(dim=2)
    if self.apply_sigmoid: self.sigmoid =nn.Sigmoid() 
    
    #self.sigmoid = torch.sigmoid(dim=1)
    self.hidden = self.init_hidden()
  def forward(self, feature_list):
    self.hidden = self.init_hidden() ### check
    feature_list=torch.tensor(feature_list)
    feature_list=feature_list.to(device) #### <<<<<<<<<<<<<<<<< 
    if self.matching_in_out:
      lstm_out, _ = self.lstm( feature_list.view(len( feature_list), 1, -1))
      output_scores = self.hidden2out(lstm_out.view(len( feature_list), -1))
      if self.apply_sigmoid: output_scores=self.sigmoid(output_scores).to(device)
      elif self.apply_softmax: output_scores=self.softmax(output_scores).to(device)
      #output_scores = torch.sigmoid(output_space) #we'll need to check if we need this sigmoid
      return output_scores #output_scores
    else:
      outs=[]
      for i in range(len(feature_list)):
        cur_ft_tensor=feature_list[i]#.view([1,1,self.input_size])
        cur_ft_tensor=cur_ft_tensor.view([1,1,self.input_size])
        lstm_out, self.hidden = self.lstm(cur_ft_tensor, self.hidden)
        outs=self.hidden2out(lstm_out)
        if self.apply_sigmoid: outs = self.sigmoid(outs).to(device) #self.sigmoid =nn.Sigmoid()
        elif self.apply_softmax: outs = self.softmax(outs).to(device)
        
      return outs
  def init_hidden(self):
    #return torch.rand(self.num_layers, self.batch_size, self.hidden_size)
    return (torch.rand(self.num_layers, self.batch_size, self.hidden_size).to(device),
            torch.rand(self.num_layers, self.batch_size, self.hidden_size).to(device))


def load_model(model_fpath0):
  checkpoint = torch.load(model_fpath0)
  rnn0 = RNN(checkpoint["n_input"], checkpoint["n_hidden"] , checkpoint["n_output"] , checkpoint["n_layers"] , matching_in_out=checkpoint["n_layers"]).to(device)
  rnn0.load_state_dict(checkpoint['model_state_dict'])
  rnn0.eval()
  return rnn0

n_input=1
n_output=3
n_hidden =64#64
n_layers=2


n_input=5
n_hidden =64
n_layers=2
n_output=2
LR=0.01
loss_func = nn.MSELoss()
rnn = RNN(n_input, n_hidden, n_output,n_layers,matching_in_out=True).to(device)
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all cnn parameters

n_data_points=10
input_tensor=torch.rand((n_data_points, n_input)).to(device)
output = rnn(input_tensor)
print("input tensor shape", input_tensor.shape)
#print(output)
print("output tensor shape", output.shape)

feature_list=[(0,3,0),(2,1,0),(1,0,0),(5,0,0)]
label_list=[(0,1),(1,0),(0,0),(1,0)]
feature_tensor=torch.tensor(feature_list,dtype=torch.float32).to(device)
label_tensor=torch.tensor(label_list,dtype=torch.float32).to(device)
print("feature_tensor",feature_tensor.shape)
print("label_tensor",label_tensor.shape)
n_input=feature_tensor.shape[1]
n_hidden =64
n_layers=2
n_output=label_tensor.shape[1]
LR=0.01
loss_func = nn.MSELoss()
rnn = RNN(n_input, n_hidden, n_output,n_layers,matching_in_out=True).to(device)
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) 
for i in range(5):
  print("label_tensor",label_tensor)
  rnn.zero_grad()
  rnn_out=rnn(feature_tensor).to(device)
  print("rnn_out", rnn_out)
  loss = loss_func(rnn_out.ravel(),label_tensor.ravel()) #calculate the loss, difference between the output and the desired outcome tensors
  loss.backward()
  optimizer.step()  
  print("------")

#LR=0.005


# for fname in files:
#   json_file_path=""
#   wav_file_path=""
#   features=extract_features(wav_file_path)
#   labels=extract_labels(json_file_path)
#   labels_tensor=convert2tensor(labels)
#   n_data_points=len(labels)

#   rnn.hidden = rnn.init_hidden()
#   rnn.zero_grad()


#   input_tensor=torch.rand((n_data_points, n_input)) #n_input = 129
#   output = rnn(input_tensor)

#   loss = loss_func(output, labels_tensor) #calculate the loss, difference between the output and the desired outcome tensors

 
#   loss.backward()
#   optimizer.step()  


  # # a=random.randint(0,9) #start from a random number
  # # rand_tensor = 0.2*torch.rand((3, 4)) + a #generating input tensor from the random number, that consists of random numbers +/- 0.1 of the random number
  # # outcome=[0.]*n_output #initializing outcome tensor
  # # outcome[a]=1. #filling the index corresponding to the generated random number, which is the outcome
  # # outcome_tensor=torch.tensor(outcome).view([1,1,n_output]) #convert it to tensor with shape (1,1,size of outcome/output)
  # for i in range(len(rand_tensor)): #feed the network sequentially with the input tensors
  #   cur_tensor=rand_tensor[i].view([1,1,n_input])
  #   output = rnn(cur_tensor)

input tensor shape torch.Size([10, 5])
output tensor shape torch.Size([10, 2])
feature_tensor torch.Size([4, 3])
label_tensor torch.Size([4, 2])
label_tensor tensor([[0., 1.],
        [1., 0.],
        [0., 0.],
        [1., 0.]])
rnn_out tensor([[0.5042, 0.5179],
        [0.5057, 0.5200],
        [0.5056, 0.5207],
        [0.5071, 0.5232]], grad_fn=<SigmoidBackward0>)
------
label_tensor tensor([[0., 1.],
        [1., 0.],
        [0., 0.],
        [1., 0.]])
rnn_out tensor([[0.5028, 0.4996],
        [0.5086, 0.4825],
        [0.5121, 0.4710],
        [0.5213, 0.4592]], grad_fn=<SigmoidBackward0>)
------
label_tensor tensor([[0., 1.],
        [1., 0.],
        [0., 0.],
        [1., 0.]])
rnn_out tensor([[0.5036, 0.4786],
        [0.5189, 0.4328],
        [0.5315, 0.3974],
        [0.5543, 0.3580]], grad_fn=<SigmoidBackward0>)
------
label_tensor tensor([[0., 1.],
        [1., 0.],
        [0., 0.],
        [1., 0.]])
rnn_out tensor([[0.5087, 0.4518],
        [0.5463, 0.3569],
       



##RNN standard Training/Testing Pipeline definition

In [None]:
#NEW
import time, math, random
from random import shuffle


torch.manual_seed(1)
random.seed(1)
# def out2labels(rnn_flat_out,label_list): #a flat rnn output to split into slices, and get the label weights for each slice
#   final_list=[]
#   n_slices=int(len(rnn_flat_out)/len(label_list))
#   for i0 in range(n_slices):
#     i1=i0+1
#     cur_slice=rnn_flat_out[i0*len(label_list):i1*len(label_list)]
#     tmp_list=[]
#     for lb0,cs0 in zip(label_list,cur_slice): tmp_list.append((lb0,cs0))
#     tmp_list.sort(key=lambda x:-x[-1])
#     final_list.append(tmp_list)
#   return final_list

class pipeline:
  def __init__(self,params={},model_dir="models", train_ratio=0.8,logging=True,print_training=False):
    self.n_input=params.get("n_input")
    self.n_output=params.get("n_output")
    self.n_hidden=params.get("n_hidden",16)
    self.n_layers=params.get("n_layers",2)
    self.LR=params.get("LR",0.001)
    self.n_epochs=params.get("n_epochs",20)
    self.matching_in_out=params.get("matching_in_out",False)
    self.apply_sigmoid=params.get("apply_sigmoid",False)
    self.output_labels=params.get("output_labels",[]) #output_labels
    self.input_labels=params.get("input_labels",[]) #input_labels
    self.extract_features_fn=params.get("extract_features_fn") #extract_features_fn
    self.extract_labels_fn=params.get("extract_labels_fn") #extract_labels_fn
    self.train_batch_size=params.get("train_batch_size")
    self.eval_fn=params.get("eval_fn") #eval_fn
    self.exp_name=params.get("exp_name") #exp_name
    self.all_data=params.get("all_data") #exp_name
    self.print_training=print_training
    if self.all_data==[]: return #if the class is initialized with empty data 

    # if output_labels!=[]:self.n_output=len(self.output_labels)
    # if input_labels!=[]:self.n_input=len(self.input_labels)
    first_item=self.all_data[0]
    in0,out0=first_item
    print(first_item)
    in_vector0=self.extract_features_fn(in0,self.input_labels)
    out_vector0=self.extract_labels_fn(out0,self.output_labels)
    #print("in_vector0",in_vector0.shape,"out_vector0",out_vector0.shape)
    #if len(in_vector0.shape)<2: in_vector0=in_vector0.unsqueeze(0)
    if len(in_vector0.shape)<2: in_vector0 = np.expand_dims(in_vector0, axis=0)
    self.n_input=in_vector0.shape[1]
    if len(out_vector0.shape)<2: out_vector0 = np.expand_dims(out_vector0, axis=0)
    #self.n_output=out_vector0.shape[0]
    self.n_output=out_vector0.shape[1]
    # print("in_vector0",in_vector0.shape,"out_vector0",out_vector0)
    # print("self.n_input",self.n_input,"self.n_output",self.n_output)


    self.rnn = RNN(self.n_input, self.n_hidden, self.n_output,self.n_layers,self.matching_in_out,apply_sigmoid=self.apply_sigmoid).to(device)
    self.loss_func = nn.MSELoss()
    self.optimizer = torch.optim.Adam(self.rnn.parameters(), lr=self.LR)   # optimize all cnn parameters
    #self.train_batch_size=train_batch_size
    train_size=int(len(self.all_data)*train_ratio)
    self.train_set,self.test_set=self.all_data[:train_size],self.all_data[train_size:]
    self.n_batches=math.floor(len(self.train_set)/self.train_batch_size)
    if self.n_batches==0: self.test_batch_size=len(self.test_set)
    else: self.test_batch_size=math.floor(len(self.test_set)/self.n_batches)    

    if self.exp_name!=None:
      self.model_dir_path=os.path.join(model_dir,self.exp_name)
      if not os.path.exists(self.model_dir_path): os.makedirs(self.model_dir_path) 
      self.tmp_model_dir=os.path.join(self.model_dir_path,"tmp") 
      if not os.path.exists(self.tmp_model_dir): os.makedirs(self.tmp_model_dir)
      self.log_fpath=os.path.join(self.model_dir_path,"log.txt")
      log_fopen=open(self.log_fpath,"a")
      log_fopen.write(str(self.rnn)+"\n")

  def train(self):
    for epoch0 in range(self.n_epochs):
      self.rnn.zero_grad()
      PATH=os.path.join(self.model_dir_path, "model-%s.model"%epoch0)
      if os.path.exists(PATH):
        checkpoint = torch.load(PATH)
        self.rnn.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.rnn.train()
        print("loaded model for this epoch",PATH)
        for a,b in  checkpoint.items():
          if "loss" in a.lower(): print(a,round(b,6))
        continue  
      print("epoch0",epoch0)
      for batch_i0 in range(self.n_batches+1):
        t0=time.time()
        
        #pred_count,correct_count=0,0
        batch_i1=batch_i0+1
        cur_train_items=self.train_set[batch_i0*self.train_batch_size:batch_i1*self.train_batch_size]
        cur_test_items=self.test_set[batch_i0*self.test_batch_size:batch_i1*self.test_batch_size]
        #if len(cur_train_items)==0 or len(cur_test_items)==0: continue
        print("batch_i0",batch_i0, "cur_train_items",len(cur_train_items),"cur_test_items",len(cur_test_items))
        tmp_path=os.path.join(self.tmp_model_dir, "model-batch-%s.model"%batch_i0)
        if os.path.exists(tmp_path):
          checkpoint = torch.load(tmp_path)
          self.rnn.load_state_dict(checkpoint['model_state_dict'])
          self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
          print("loaded model for this epoch",tmp_path)
          continue  

        total_train_loss,total_test_loss=0,0
        train_counter,test_counter=0,0
        test_eval_items=[]
        #test_pred_counter,test_correct_counter=0,0 #how many test items reach the cutoff val for prediction, how many are correctly predicted
        #for train_i in range(1500):
        for train_i, train_item in enumerate(cur_train_items):
          if train_i%200==0: print("train_i",train_i)
          #print("train_item",train_item)
          train_item_input,train_item_output=train_item
          cur_feature_vector=self.extract_features_fn(train_item_input,self.input_labels)
          cur_label_vector=self.extract_labels_fn(train_item_output,self.output_labels)
          feature_tensor=torch.tensor(cur_feature_vector)
          if len(feature_tensor.shape)==1: feature_tensor=feature_tensor.unsqueeze(0)
          label_tensor=torch.tensor(cur_label_vector).to(device)
          #self.rnn.zero_grad()
          rnn_output = self.rnn(feature_tensor).to(device)
          flat_rnn_output=rnn_output.ravel()
          flat_label_vector=label_tensor.ravel()
          #print("flat_rnn_output", flat_rnn_output.shape,"flat_label_vector",flat_label_vector.shape)
          loss = self.loss_func(flat_rnn_output, flat_label_vector) #calculate the loss, difference between the output and the desired outcome tensors
          loss.backward()
          self.optimizer.step()
          total_train_loss+=loss.item()
          train_counter+=1
          if train_i>len(cur_train_items)-20 and self.print_training:
            preds=out2labels(flat_rnn_out,self.output_labels)
            for i0,ac_pr in enumerate(zip(train_item_output,preds)):
              ac0,pr0=ac_pr
              pr0=[(v[0],round(v[1].item(),3)) for v in pr0]
              print("actual:",ac0,"predictions:",pr0[:5])
              if i0>100: break 
            print("-------") 

        for test_i, test_item in enumerate(cur_test_items):
          if test_i%200==0: print("test_i",test_i)
          test_item_input,test_item_output=test_item
          cur_feature_vector=self.extract_features_fn(test_item_input,self.input_labels)
          cur_label_vector=self.extract_labels_fn(test_item_output,self.output_labels)
          feature_tensor=torch.tensor(cur_feature_vector)
          if len(feature_tensor.shape)==1: feature_tensor=feature_tensor.unsqueeze(0)
          label_tensor=torch.tensor(cur_label_vector).to(device)
          #self.rnn.zero_grad()
          with torch.no_grad():
            rnn_output = self.rnn(feature_tensor).to(device)
          flat_rnn_output=rnn_output.ravel()
          flat_label_vector=label_tensor.ravel()
          loss = self.loss_func(flat_rnn_output, flat_label_vector) #calculate the loss, difference between the output and the desired outcome tensors
          total_test_loss+=loss.item()
          test_counter+=1
        avg_train_loss=0
        if train_counter>0: avg_train_loss=round(total_train_loss/train_counter,6)
        avg_test_loss=0
        if test_counter>0: avg_test_loss=round(total_test_loss/test_counter,6)
        
        t1=time.time()
        elapsed=round(t1-t0,2) 
        t0=time.time()    
        #line="Epoch # %s - Batch: %s -  train loss: %s - test loss: %s - Correctness :%s/%s (ratio: %s) - elapsed: %s"%(epoch0, batch_i0, avg_train_loss,avg_test_loss,  correct_count, pred_count, correct_ratio, elapsed)
        line="Epoch # %s  - Batch: %s -  train loss: %s - test loss: %s - elapsed: %s"%(epoch0, batch_i0, avg_train_loss,avg_test_loss, elapsed)
        print(line)
        log_fopen=open(self.log_fpath,"a")
        log_fopen.write(line+"\n")
        log_fopen.close() 
        cur_checkpoint={
                'epoch': epoch0,
                'n_input': self.n_input,
                'n_hidden': self.n_hidden,
                'n_layers': self.n_layers,
                'n_output': self.n_output,
                'output_labels': self.output_labels,
                'input_labels': self.input_labels,
                'LR': self.LR,
                'model_state_dict': self.rnn.state_dict(),
                'optimizer_state_dict': self.optimizer.state_dict(),
                'matching_in_out':self.matching_in_out,
                'train_loss': avg_train_loss,
                'test_loss': avg_test_loss
                }
        torch.save(cur_checkpoint, tmp_path)
      
      torch.save(cur_checkpoint, PATH)  
      print("model saved")
      for f in os.listdir(self.tmp_model_dir):
        tmp_fpath=os.path.join(self.tmp_model_dir,f)
        os.remove(tmp_fpath)
      print("deleted temporary files")
      print("-----------")


  def load_model(self,model_fpath0):
    checkpoint = torch.load(model_fpath0)
    self.output_labels=checkpoint.get("output_labels",[])
    self.rnn = RNN(checkpoint["n_input"], checkpoint["n_hidden"] , checkpoint["n_output"] , checkpoint["n_layers"] , matching_in_out=checkpoint["n_layers"]).to(device)
    self.rnn.load_state_dict(checkpoint['model_state_dict'])
    self.rnn.eval()
    #return rnn0

  def eval(self,eval_dataset0): #can be just the testset
    output=[]
    total_test_loss,test_counter=0,0
    for test_i, test_item in enumerate(eval_dataset0):
      if test_i%200==0: print("test_i",test_i)
      test_item_input,test_item_output=test_item
      cur_feature_vector=self.extract_features_fn(test_item_input,self.input_labels)
      cur_label_vector=self.extract_labels_fn(test_item_output,self.output_labels)
      feature_tensor=torch.tensor(cur_feature_vector)
      if len(feature_tensor.shape)==1: feature_tensor=feature_tensor.unsqueeze(0)
      label_tensor=torch.tensor(cur_label_vector).to(device)
      
      self.rnn.zero_grad()
      rnn_output = self.rnn(feature_tensor).to(device)
      flat_rnn_output=rnn_output.ravel()
      flat_label_vector=label_tensor.ravel()
      loss = self.loss_func(flat_rnn_output, flat_label_vector) #calculate the loss, difference between the output and the desired outcome tensors
      total_test_loss+=loss.item()
      test_counter+=1
      output.append((test_item_input,test_item_output,loss.item(),flat_rnn_output))
    return output


print("loaded pipeline")

loaded pipeline


In [None]:
#OLD
import time, math, random
from random import shuffle


torch.manual_seed(1)
random.seed(1)
def out2labels(rnn_flat_out,label_list): #a flat rnn output to split into slices, and get the label weights for each slice
  final_list=[]
  n_slices=int(len(rnn_flat_out)/len(label_list))
  for i0 in range(n_slices):
    i1=i0+1
    cur_slice=rnn_flat_out[i0*len(label_list):i1*len(label_list)]
    tmp_list=[]
    for lb0,cs0 in zip(label_list,cur_slice): tmp_list.append((lb0,cs0))
    tmp_list.sort(key=lambda x:-x[-1])
    final_list.append(tmp_list)
  return final_list

class pipeline:
  def __init__(self,params={},model_dir="models", train_ratio=0.8,logging=True):
    self.n_input=params.get("n_input")
    self.n_output=params.get("n_output")
    self.n_hidden=params.get("n_hidden",16)
    self.n_layers=params.get("n_layers",2)
    self.LR=params.get("LR",0.001)
    self.n_epochs=params.get("n_epochs",20)
    self.matching_in_out=params.get("matching_in_out",False)
    self.output_labels=params.get("output_labels",[]) #output_labels
    self.input_labels=params.get("input_labels",[]) #input_labels
    self.extract_features_fn=params.get("extract_features_fn") #extract_features_fn
    self.extract_labels_fn=params.get("extract_labels_fn") #extract_labels_fn
    self.train_batch_size=params.get("train_batch_size")
    self.eval_fn=params.get("eval_fn") #eval_fn
    self.exp_name=params.get("exp_name") #exp_name
    self.all_data=params.get("all_data") #exp_name
    if self.all_data==[]: return #if the class is initialized with empty data 

    # if output_labels!=[]:self.n_output=len(self.output_labels)
    # if input_labels!=[]:self.n_input=len(self.input_labels)
    first_item=self.all_data[0]
    in0,out0=first_item
    print(first_item)
    in_vector0=self.extract_features_fn(in0,self.input_labels)
    out_vector0=self.extract_labels_fn(out0,self.output_labels)
    #print("in_vector0",in_vector0.shape,"out_vector0",out_vector0.shape)
    #if len(in_vector0.shape)<2: in_vector0=in_vector0.unsqueeze(0)
    if len(in_vector0.shape)<2: in_vector0 = np.expand_dims(in_vector0, axis=0)
    self.n_input=in_vector0.shape[1]
    if len(out_vector0.shape)<2: out_vector0 = np.expand_dims(out_vector0, axis=0)
    #self.n_output=out_vector0.shape[0]
    self.n_output=out_vector0.shape[1]
    print("in_vector0",in_vector0.shape,"out_vector0",out_vector0)
    print("self.n_input",self.n_input,"self.n_output",self.n_output)


    self.rnn = RNN(self.n_input, self.n_hidden, self.n_output,self.n_layers,self.matching_in_out).to(device)
    self.loss_func = nn.MSELoss()
    self.optimizer = torch.optim.Adam(self.rnn.parameters(), lr=self.LR)   # optimize all cnn parameters
    #self.train_batch_size=train_batch_size
    train_size=int(len(self.all_data)*train_ratio)
    self.train_set,self.test_set=self.all_data[:train_size],self.all_data[train_size:]
    self.n_batches=math.floor(len(self.train_set)/self.train_batch_size)
    if self.n_batches==0: self.test_batch_size=len(self.test_set)
    else: self.test_batch_size=math.floor(len(self.test_set)/self.n_batches)    

    if self.exp_name!=None:
      self.model_dir_path=os.path.join(model_dir,self.exp_name)
      if not os.path.exists(self.model_dir_path): os.makedirs(self.model_dir_path) 
      self.tmp_model_dir=os.path.join(self.model_dir_path,"tmp") 
      if not os.path.exists(self.tmp_model_dir): os.makedirs(self.tmp_model_dir)
      self.log_fpath=os.path.join(self.model_dir_path,"log.txt")
      log_fopen=open(self.log_fpath,"a")
      log_fopen.write(str(self.rnn)+"\n")

  def train(self):
    for epoch0 in range(self.n_epochs):
      self.rnn.zero_grad()
      PATH=os.path.join(self.model_dir_path, "model-%s.model"%epoch0)
      if os.path.exists(PATH):
        checkpoint = torch.load(PATH)
        self.rnn.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.rnn.train()
        print("loaded model for this epoch",PATH)
        for a,b in  checkpoint.items():
          if "loss" in a.lower(): print(a,round(b,6))
        continue  
      print("epoch0",epoch0)
      for batch_i0 in range(self.n_batches+1):
        t0=time.time()
        
        #pred_count,correct_count=0,0
        batch_i1=batch_i0+1
        cur_train_items=self.train_set[batch_i0*self.train_batch_size:batch_i1*self.train_batch_size]
        cur_test_items=self.test_set[batch_i0*self.test_batch_size:batch_i1*self.test_batch_size]
        if len(cur_train_items)==0 or len(cur_test_items)==0: continue
        print("batch_i0",batch_i0, "cur_train_items",len(cur_train_items),"cur_test_items",len(cur_test_items))
        tmp_path=os.path.join(self.tmp_model_dir, "model-batch-%s.model"%batch_i0)
        if os.path.exists(tmp_path):
          checkpoint = torch.load(tmp_path)
          self.rnn.load_state_dict(checkpoint['model_state_dict'])
          self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
          print("loaded model for this epoch",tmp_path)
          continue  

        total_train_loss,total_test_loss=0,0
        train_counter,test_counter=0,0
        test_eval_items=[]
        #test_pred_counter,test_correct_counter=0,0 #how many test items reach the cutoff val for prediction, how many are correctly predicted
        #for train_i in range(1500):
        for train_i, train_item in enumerate(cur_train_items):
          if train_i%200==0: print("train_i",train_i)
          #print("train_item",train_item)
          train_item_input,train_item_output=train_item
          cur_feature_vector=self.extract_features_fn(train_item_input,self.input_labels)
          cur_label_vector=self.extract_labels_fn(train_item_output,self.output_labels)
          feature_tensor=torch.tensor(cur_feature_vector)
          if len(feature_tensor.shape)==1: feature_tensor=feature_tensor.unsqueeze(0)
          label_tensor=torch.tensor(cur_label_vector).to(device)
          #self.rnn.zero_grad()
          rnn_output = self.rnn(feature_tensor).to(device)
          flat_rnn_output=rnn_output.ravel()
          flat_label_vector=label_tensor.ravel()
          #print("flat_rnn_output", flat_rnn_output.shape,"flat_label_vector",flat_label_vector.shape)
          loss = self.loss_func(flat_rnn_output, flat_label_vector) #calculate the loss, difference between the output and the desired outcome tensors
          loss.backward()
          self.optimizer.step()
          total_train_loss+=loss.item()
          train_counter+=1
        for test_i, test_item in enumerate(cur_test_items):
          if test_i%200==0: print("test_i",test_i)
          test_item_input,test_item_output=test_item
          cur_feature_vector=self.extract_features_fn(test_item_input,self.input_labels)
          cur_label_vector=self.extract_labels_fn(test_item_output,self.output_labels)
          feature_tensor=torch.tensor(cur_feature_vector)
          if len(feature_tensor.shape)==1: feature_tensor=feature_tensor.unsqueeze(0)
          label_tensor=torch.tensor(cur_label_vector).to(device)
          self.rnn.zero_grad()
          rnn_output = self.rnn(feature_tensor).to(device)
          flat_rnn_output=rnn_output.ravel()
          flat_label_vector=label_tensor.ravel()
          loss = self.loss_func(flat_rnn_output, flat_label_vector) #calculate the loss, difference between the output and the desired outcome tensors
          total_test_loss+=loss.item()
          test_counter+=1

        avg_train_loss=round(total_train_loss/train_counter,6)
        avg_test_loss=round(total_test_loss/test_counter,6)
        
        t1=time.time()
        elapsed=round(t1-t0,2) 
        t0=time.time()    
        #line="Epoch # %s - Batch: %s -  train loss: %s - test loss: %s - Correctness :%s/%s (ratio: %s) - elapsed: %s"%(epoch0, batch_i0, avg_train_loss,avg_test_loss,  correct_count, pred_count, correct_ratio, elapsed)
        line="Epoch # %s  - Batch: %s -  train loss: %s - test loss: %s - elapsed: %s"%(epoch0, batch_i0, avg_train_loss,avg_test_loss, elapsed)
        print(line)
        log_fopen=open(self.log_fpath,"a")
        log_fopen.write(line+"\n")
        log_fopen.close() 
        cur_checkpoint={
                'epoch': epoch0,
                'n_input': self.n_input,
                'n_hidden': self.n_hidden,
                'n_layers': self.n_layers,
                'n_output': self.n_output,
                'output_labels': self.output_labels,
                'input_labels': self.input_labels,
                'LR': self.LR,
                'model_state_dict': self.rnn.state_dict(),
                'optimizer_state_dict': self.optimizer.state_dict(),
                'matching_in_out':self.matching_in_out,
                'train_loss': avg_train_loss,
                'test_loss': avg_test_loss
                }
        torch.save(cur_checkpoint, tmp_path)
      
      torch.save(cur_checkpoint, PATH)  
      print("model saved")
      for f in os.listdir(self.tmp_model_dir):
        tmp_fpath=os.path.join(self.tmp_model_dir,f)
        os.remove(tmp_fpath)
      print("deleted temporary files")
      print("-----------")


  def load_model(self,model_fpath0):
    checkpoint = torch.load(model_fpath0)
    self.output_labels=checkpoint.get("output_labels",[])
    self.rnn = RNN(checkpoint["n_input"], checkpoint["n_hidden"] , checkpoint["n_output"] , checkpoint["n_layers"] , matching_in_out=checkpoint["n_layers"]).to(device)
    self.rnn.load_state_dict(checkpoint['model_state_dict'])
    self.rnn.eval()
    #return rnn0

  def eval(self,eval_dataset0): #can be just the testset
    output=[]
    total_test_loss,test_counter=0,0
    for test_i, test_item in enumerate(eval_dataset0):
      if test_i%200==0: print("test_i",test_i)
      test_item_input,test_item_output=test_item
      cur_feature_vector=self.extract_features_fn(test_item_input,self.input_labels)
      cur_label_vector=self.extract_labels_fn(test_item_output,self.output_labels)
      feature_tensor=torch.tensor(cur_feature_vector)
      if len(feature_tensor.shape)==1: feature_tensor=feature_tensor.unsqueeze(0)
      label_tensor=torch.tensor(cur_label_vector).to(device)
      
      self.rnn.zero_grad()
      rnn_output = self.rnn(feature_tensor).to(device)
      flat_rnn_output=rnn_output.ravel()
      flat_label_vector=label_tensor.ravel()
      loss = self.loss_func(flat_rnn_output, flat_label_vector) #calculate the loss, difference between the output and the desired outcome tensors
      total_test_loss+=loss.item()
      test_counter+=1
      output.append((test_item_input,test_item_output,loss.item(),flat_rnn_output))
    return output


print("loaded pipeline")

loaded pipeline


##2.5- (if needed) Create tsv files
We will need to create tsv files that match the timesteps for each feature extraction method we use. This needs to be done only if the feature extraction creates a different time step

In [None]:
#Now converting the annotations within the JSON files (which were converted from text grid) into tsv, aligning time stamps with annotated phonemes
#just bear in mind that the timesteps depend on the output of the audio feature extraction
import numpy as np
import os,json
from scipy import signal
from scipy.io import wavfile
wav_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/wav/"
json_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/"
tsv_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/tsv2/"
#if not os.path.exists(combined_dir): os.makedirs(combined_dir)


#def features_extraction():
if not os.path.exists(tsv_dir): os.makedirs(tsv_dir)
wav_files=os.listdir(wav_dir)
for i,fname in enumerate(wav_files):
  if i%50==0: print(i)
  #if i==10: break
  
  wav_fpath=os.path.join(wav_dir,fname)
  file_id=fname.split('.')[0] #file name without extension
  json_fpath=os.path.join(json_dir,file_id+'.json')
  tsv_fpath=os.path.join(tsv_dir,file_id+'.tsv') #now creating the tsv file for the phoneme-time table
  #combined_fpath=os.path.join(combined_dir,file_id+'.txt') #now creating the tsv file for the phoneme-time table
  if os.path.exists(tsv_fpath): continue
  t_list,ft_list=extract_audio_features(wav_fpath,"mfcc")
  

  json_fopen=open(json_fpath)
  content_dict=json.load(json_fopen)
  json_fopen.close()

  aligned_phones_list=[]
  labeled_phones=content_dict["items"]["1"]["intervals"]
  #for t,spec0 in zip(times,spectrogram):
  for t in t_list:
    cur_phoneme="-"
    for k,v in labeled_phones.items():
      phoneme=v["text"]
      min_time=float (v["xmin"])
      max_time=float (v["xmax"])
      if(t>=min_time and t<=max_time):
        cur_phoneme=phoneme
        break
    aligned_phones_list.append((t,cur_phoneme))
  
  tsv_fopen=open(tsv_fpath,"w")
  for t,ph in aligned_phones_list:
    line="%s\t%s\n"%(t,ph)
    tsv_fopen.write(line)
  tsv_fopen.close()


#features_extraction()

0




50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
1650
1700
1750
1800


##3- Preparing Data 
Now aligning the tsv data with the features extracted from wav files to start the training. 

In [32]:
#Loading Data/extracting features - Processing labels - NEW
#This data loading pipeline is specific to each project
#and it should be written every time
#What is important is that it should give a list of data items
#each item consists of the feature tensor
#and the actual outcome list of labels, which is processed from raw inputs
#The actual outcome will be processed later during the training process
import torch
half_window_size=2
wav_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/wav/"
#json_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/"
tsv_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/tsv2/"
wav_files=os.listdir(wav_dir)


data_max_size=500

all_data=[]
for i,fname in enumerate(wav_files[:data_max_size]):
  if i%50==0: print(i)
  phones_list=[]
  wav_fpath=os.path.join(wav_dir,fname)
  file_id=fname.split('.')[0] #file name without extension
  tsv_fpath=os.path.join(tsv_dir,file_id+'.tsv') #now creating the tsv file for the phoneme-time table
  t_list,ft_array=extract_audio_features(wav_fpath,"logfbank")
  cur_tsv_list=read_tsv(tsv_fpath)
  cur_phone_list=[buck2ipa(v[1])  for v in cur_tsv_list] #convert raw labels to IPA labels
  ft_tensor=torch.tensor(ft_array,dtype=torch.float32)
  all_data.append((ft_tensor,cur_phone_list))

print("data loaded",len(all_data))

0




50
100
150
200
250
300
350
400
450
data loaded 500


In [None]:
#Loading Data - OLD
half_window_size=2
wav_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/wav/"
#json_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/"
tsv_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/tsv2/"
wav_files=os.listdir(wav_dir)

all_data=[]
for i,fname in enumerate(wav_files[:20]):
  if i%50==0: print(i)
  #if i==10: break
  phones_list=[]
  wav_fpath=os.path.join(wav_dir,fname)
  file_id=fname.split('.')[0] #file name without extension
  tsv_fpath=os.path.join(tsv_dir,file_id+'.tsv') #now creating the tsv file for the phoneme-time table
  t_list,ft_list=extract_audio_features(wav_fpath,"logfbank")
  cur_tsv_list=read_tsv(tsv_fpath)
  for pair_i,pair0  in enumerate(zip(cur_tsv_list,ft_list)):
    tsv_item,ft_item=pair0
    cur_time,cur_phone=tsv_item
    tmp=[round(v,1) for v in ft_item]
    slice_i0=max(0,pair_i-half_window_size)
    slice_i1=min(len(ft_list),pair_i+half_window_size)
    cur_ft_slice=ft_list[slice_i0:slice_i1]
    #print("cur_phone",cur_phone,"cur_ft_slice",cur_ft_slice.shape)
    #print(tsv_item[1], tmp)
    all_data.append((ft_item,cur_phone))
print("data loaded",len(all_data))

0


  # Remove the CWD from sys.path while we load stuff.


data loaded 19515


##Starting Training
Running data through pipeline


In [None]:
#NEW
#preparing data
import torch
import numpy as np
import os,json, random, sys
from scipy import signal
from scipy.io import wavfile
from random import shuffle

torch.manual_seed(1)
random.seed(1)

ft_vec,cur_label_list=all_data[0]
print("ft_vec", ft_vec.shape)
print("cur_label_list",len(cur_label_list),cur_label_list[:50])
#sys.exit()

raw_output_labels=['$', '$$', '*', '**', '-', '<', '<<', 'A', "A'", 'AA', "AA'", 'AH', 'Ah', 'D', 'DD', 'E', 'E ', 'EE', 'G', 'H', 'HH', 'I0', "I0'", 'I1', "I1'", 'II0', "II0'", 'J', 'S', 'SS', 'T', 'TT', 'U0', "U0'", 'U1', 'UU0', "UU0'", "UU1'", 'Z', 'ZZ', '^', '^^', 'a', 'a ', "a'", 'aa', "aa'", 'b', 'bb', 'd', 'dd', 'dist', 'f', 'ff', 'g', 'gg', 'h', 'hh', 'i0', "i0'", 'i1', 'i1 ', "i1'", 'ii0', "ii0'", 'ii1', "ii1'", 'j', 'jj', 'k', 'kk', 'l', 'll', 'm', 'mm', 'n', 'nn', 'p', 'pp', 'q', 'qq', 'r', 'rr', 's', 'sil', 'ss', 't', 'tt', 'u', 'u0', "u0'", 'u1', "u1'", 'uu0', "uu0'", 'uu1', "uu1'", 'v', 'w', 'ww', 'x', 'xx', 'y', 'yy', 'z', 'zz']
def sp_extract_features(input_data0,input_labels0=[]):
  return np.array(input_data0).astype("float32")
def sp_extract_labels(item_label_list0,output_labels0=[]):
  #item_label_list0=[buck2ipa(v) for v in item_label_list0]
  final_out=[]
  if not type(item_label_list0) is list: item_label_list0=[item_label_list0] 
  for item_label0 in item_label_list0:
    cur_one_hot=[0.]*len(output_labels0)
    if item_label0 in output_labels0: 
      out_i=output_labels0.index(item_label0)
      cur_one_hot[out_i]=1.
    final_out.append(cur_one_hot)
  return np.array(final_out).astype("float32")

# def sp_extract_labels_OLD(output_data0,output_labels0=[]):
#   cur_one_hot=[0.]*len(output_labels0)
#   if output_data0 in output_labels0: 
#     out_i=output_labels0.index(output_data0)
#     cur_one_hot[out_i]=1.
#   return np.array(cur_one_hot).astype("float32")
shuffle(all_data)
cur_params={}
cur_params["all_data"]=all_data[:5]
cur_params["extract_features_fn"]=sp_extract_features
cur_params["extract_labels_fn"]=sp_extract_labels
cur_params["output_labels"]=ipa_symbol_list
cur_params["LR"]=0.001
cur_params["train_batch_size"]=20
cur_params["n_epochs"]=50
cur_params["n_layers"]=2
cur_params["n_hidden"]=64
cur_params["matching_in_out"]=True
cur_params["apply_sigmoid"]=False
cur_params["exp_name"]="exp1-overfit-1234"
#self.train_batch_size

#print("train_data,test_data",len(train_data),len(test_data))
cur_pipeline=pipeline(cur_params,print_training=True)
cur_pipeline.train()

ft_vec (630, 26)
cur_label_list 630 ['sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
(array([[-5.71581187, -5.65496072, -5.12895127, ...,  6.70264585,
         8.77550162,  9.00060369],
       [-2.78800835, -2.5773205 , -2.32563599, ...,  6.66628439,
         8.69488404,  9.09041071],
       [-4.74397559, -5.58807817, -4.72889798, ...,  6.84312843,
         8.77673203,  9.17543196],
       ...,
       [-3.10310789, -3.295515  , -3.47783139, ...,  6.77332765,
         8.80241399,  9.15558248],
       [-3.30247107, -3.66207443, -3.36597546, ...,  6.65968296,
         8.59559159,  9.11266708],
       [-2.91724091, -2.98255945, -3.07060904, ...,  6.24409302,
         7.96756937,  8.59143228]]), ['sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'j', 'j', 'j', '



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
actual: a predictions: [('i', 0.151), ('a', 0.147), ('ʔ', 0.09), ('t', 0.089), ('sil', 0.061)]
actual: a predictions: [('i', 0.152), ('a', 0.147), ('ʔ', 0.09), ('t', 0.089), ('sil', 0.06)]
actual: a predictions: [('i', 0.152), ('a', 0.147), ('ʔ', 0.09), ('t', 0.089), ('sil', 0.06)]
actual: l predictions: [('i', 0.152), ('a', 0.147), ('ʔ', 0.09), ('t', 0.089), ('sil', 0.06)]
actual: l predictions: [('i', 0.152), ('a', 0.147), ('ʔ', 0.09), ('t', 0.089), ('sil', 0.06)]
actual: l predictions: [('i', 0.152), ('a', 0.147), ('ʔ', 0.09), ('t', 0.089), ('sil', 0.06)]
actual: l predictions: [('i', 0.152), ('a', 0.147), ('ʔ', 0.09), ('t', 0.089), ('sil', 0.06)]
actual: q predictions: [('i', 0.153), ('a', 0.147), ('ʔ', 0.09), ('t', 0.089), ('sil', 0.06)]
actual: q predictions: [('i', 0.154), ('a', 0.147), ('ʔ', 0.09), ('t', 0.089), ('sil', 0.059)]
actual: q predictions: [('i', 0.155), ('a', 0.146), ('ʔ', 0.09), ('t', 0.089), ('sil', 

KeyboardInterrupt: ignored

In [None]:
#OLD
#preparing data
import torch
import numpy as np
import os,json, random
from scipy import signal
from scipy.io import wavfile
from random import shuffle

torch.manual_seed(1)
random.seed(1)

output_labels=['$', '$$', '*', '**', '-', '<', '<<', 'A', "A'", 'AA', "AA'", 'AH', 'Ah', 'D', 'DD', 'E', 'E ', 'EE', 'G', 'H', 'HH', 'I0', "I0'", 'I1', "I1'", 'II0', "II0'", 'J', 'S', 'SS', 'T', 'TT', 'U0', "U0'", 'U1', 'UU0', "UU0'", "UU1'", 'Z', 'ZZ', '^', '^^', 'a', 'a ', "a'", 'aa', "aa'", 'b', 'bb', 'd', 'dd', 'dist', 'f', 'ff', 'g', 'gg', 'h', 'hh', 'i0', "i0'", 'i1', 'i1 ', "i1'", 'ii0', "ii0'", 'ii1', "ii1'", 'j', 'jj', 'k', 'kk', 'l', 'll', 'm', 'mm', 'n', 'nn', 'p', 'pp', 'q', 'qq', 'r', 'rr', 's', 'sil', 'ss', 't', 'tt', 'u', 'u0', "u0'", 'u1', "u1'", 'uu0', "uu0'", 'uu1', "uu1'", 'v', 'w', 'ww', 'x', 'xx', 'y', 'yy', 'z', 'zz']
def sp_extract_features(input_data0,input_labels0=[]):
  return np.array(input_data0).astype("float32")
def sp_extract_labels(item_label_list0,output_labels0=[]):
  final_out=[]
  if not type(item_label_list0) is list: item_label_list0=[item_label_list0] 
  for item_label0 in item_label_list0:
    cur_one_hot=[0.]*len(output_labels0)
    if item_label0 in output_labels0: 
      out_i=output_labels0.index(item_label0)
      cur_one_hot[out_i]=1.
    final_out.append(cur_one_hot)
  return np.array(final_out).astype("float32")

def sp_extract_labels_OLD(output_data0,output_labels0=[]):
  cur_one_hot=[0.]*len(output_labels0)
  if output_data0 in output_labels0: 
    out_i=output_labels0.index(output_data0)
    cur_one_hot[out_i]=1.
  return np.array(cur_one_hot).astype("float32")
shuffle(all_data)
cur_params={}
cur_params["all_data"]=all_data[:100]
cur_params["extract_features_fn"]=sp_extract_features
cur_params["extract_labels_fn"]=sp_extract_labels
cur_params["output_labels"]=output_labels
cur_params["LR"]=0.001
cur_params["train_batch_size"]=20
cur_params["n_epochs"]=50
cur_params["n_layers"]=2
cur_params["n_hidden"]=16
cur_params["matching_in_out"]=True
cur_params["exp_name"]="exp1-overfit6"
#self.train_batch_size

#print("train_data,test_data",len(train_data),len(test_data))
cur_pipeline=pipeline(cur_params)
cur_pipeline.train()

NameError: ignored

##Testing through actual data

In [None]:
#tmp_dir=cur_pipeline.tmp_model_dir
def get_offset(label_item0,label_preds):
  cur_offset=None
  for l_i,pred_item in enumerate(label_preds):
    #print("pred_item", pred_item)
    pred_str,pred_val=pred_item
    if pred_item[0]==label_item0: 
      cur_offset=l_i
      break
  return cur_offset
#model_fpath=os.path.join(tmp_dir,"model-batch-36.model")
cur_dir=cur_pipeline.model_dir_path
epoch=15
model_fpath=os.path.join(cur_dir,"model-%s.model"%epoch)
#model_fpath=os.path.join(model_fpath=os.path.join(cur_dir,"model-46.model")#,"model-batch-36.model")
cur_testset=cur_pipeline.test_set
cur_trainset=cur_pipeline.train_set
cur_pipeline.load_model(model_fpath)
eval_output=cur_pipeline.eval(cur_trainset[:20]) #trainset for debugging only
all_offsets=[]
phone_offset_dict={}
for e_i, ev0 in enumerate(eval_output):
  actual_in0,actual_out0,loss0,rnn_out0=ev0
  preds=out2labels(rnn_out0,cur_pipeline.output_labels)
  #preds=[(v[0],round(v[1].item(),3)) for v in preds[0]]
  for i0,ac_pr in enumerate(zip(actual_out0,preds)):
    ac0,pr0=ac_pr
    pr0=[(v[0],round(v[1].item(),3)) for v in pr0]
    print("actual:",ac0,"predictions:",pr0[:5])
    if i0>50: break
  print("--------")
  # print(actual_in0.shape)
  # print("actual_out0",len(actual_out0),actual_out0[:10])
  # print("rnn_out0",rnn_out0.shape)
  continue
  #print(actual_out0)
  
  preds=[(v[0],round(v[1].item(),3)) for v in preds[0]]
  cur_pred_i=get_offset(actual_out0,preds)
  if cur_pred_i!=None: 
    all_offsets.append(cur_pred_i)
    phone_offset_dict[actual_out0]=phone_offset_dict.get(actual_out0,[])+[cur_pred_i]
    #phone_offset_dict
  # for p0 in preds[:10]:
  #   print([(v[0],round(v[1].item(),2)) for v in p0])
  if e_i>20: continue
  print("offset:",cur_pred_i, preds[cur_pred_i], "predictions", preds[:10])
  print("--------")
# avg_offset=round(sum(all_offsets)/len(all_offsets),1)
# print("Average offset=",avg_offset)

# for a,b in phone_offset_dict.items():
#   avg_phone_offset=round(sum(b)/len(b),2)
#   n_firsts=[v for v in b if v==0]
#   print(a,avg_phone_offset,n_firsts)

test_i 0




actual: sil predictions: [('sil', 0.376), ('a', 0.269), ('t', 0.194), ('s', 0.174), ('A', 0.171)]
actual: sil predictions: [('sil', 0.302), ('a', 0.15), ('t', 0.078), ('A', 0.06), ('s', 0.059)]
actual: sil predictions: [('sil', 0.28), ('a', 0.126), ('t', 0.06), ('S', 0.044), ('A', 0.044)]
actual: sil predictions: [('sil', 0.275), ('a', 0.121), ('t', 0.057), ('S', 0.041), ('A', 0.041)]
actual: sil predictions: [('sil', 0.274), ('a', 0.12), ('t', 0.056), ('S', 0.041), ('A', 0.04)]
actual: sil predictions: [('sil', 0.274), ('a', 0.12), ('t', 0.056), ('S', 0.041), ('A', 0.04)]
actual: sil predictions: [('sil', 0.274), ('a', 0.12), ('t', 0.056), ('S', 0.041), ('A', 0.04)]
actual: sil predictions: [('sil', 0.274), ('a', 0.119), ('t', 0.056), ('S', 0.041), ('A', 0.04)]
actual: sil predictions: [('sil', 0.273), ('a', 0.119), ('t', 0.056), ('S', 0.041), ('A', 0.04)]
actual: sil predictions: [('sil', 0.273), ('a', 0.119), ('t', 0.056), ('S', 0.04), ('A', 0.04)]
actual: sil predictions: [('sil', 

##Overfitting debugging

In [None]:
#Overfitting experiments
import string
torch.manual_seed(1)
random.seed(1)

def simplify_labels(lables_list0):
  out_list=[]
  for lb0 in lables_list0:
    if lb0.lower() in ["sil"]: simplified=lb0.lower()
    elif lb0[0].lower() in string.punctuation: simplified="-"
    else: simplified=lb0[0]
    out_list.append(simplified)
  return out_list
  
data0=all_data[0]
ft_vec,label_list=data0
simplified_cur_labels=simplify_labels(label_list)
simplified_possible_labels=sorted(list(set(simplify_labels(output_labels))))
label_vec=sp_extract_labels(simplified_cur_labels,simplified_possible_labels)

print("label_list",label_list)
print("simplified_cur_labels",simplified_cur_labels)
print("possible_labels", len(simplified_possible_labels), simplified_possible_labels)

print("label_vec",label_vec.shape)
print("ft_vec",ft_vec.shape)
loss_func = nn.MSELoss()
n_hidden=64
n_layers=2
LR=0.001
n_input=ft_vec.shape[1]
n_output=label_vec.shape[1]

rnn = RNN(n_input, n_hidden, n_output,n_layers,matching_in_out=True,apply_sigmoid=False)
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all cnn parameters

train_data=all_data[:5]
test_data=all_data[5:10]
for i in range(200):
  print("epoch:",i)
  rnn.zero_grad()
  #print("label_tensor",label_tensor)
#data0=all_data[0]
  for data0 in train_data:
    ft_vec,label_list=data0
    simplified_cur_labels=simplify_labels(label_list)
    simplified_possible_labels=sorted(list(set(simplify_labels(output_labels))))
    label_vec=sp_extract_labels(simplified_cur_labels,simplified_possible_labels)  
    feature_tensor=torch.tensor(ft_vec,dtype=torch.float32)
    label_tensor=torch.tensor(label_vec,dtype=torch.float32)  
    
    rnn_out=rnn(feature_tensor)
    print("rnn_out", rnn_out.shape)
    flat_rnn_out=rnn_out.ravel()
    loss = loss_func(flat_rnn_out,label_tensor.ravel()) #calculate the loss, difference between the output and the desired outcome tensors
    loss.backward()
    optimizer.step()  
    if i<180: continue

    preds=out2labels(flat_rnn_out,simplified_possible_labels)
    for i0,ac_pr in enumerate(zip(simplified_cur_labels,preds)):
      ac0,pr0=ac_pr
      pr0=[(v[0],round(v[1].item(),3)) for v in pr0]
      print("actual:",ac0,"predictions:",pr0[:5])
      if i0>100: break  
    print("------")
#print(rnn)

NameError: ignored

##New Pipeline

In [None]:
#Overfitting experiments - this one works, don't touch it
import string
import numpy as np

torch.manual_seed(1)
random.seed(1)

def simplify_labels_old(lables_list0):
  out_list=[]
  for lb0 in lables_list0:
    if lb0.lower() in ["sil"]: simplified=lb0.lower()
    elif lb0[0].lower() in string.punctuation: simplified="-"
    else: simplified=lb0[0]
    out_list.append(simplified)
  return out_list

def simplify_labels(cur_buck_list0):
  return [buck2ipa(v) for v in cur_buck_list0]  

def sp_extract_features(input_data0,input_labels0=[]):
  return np.array(input_data0).astype("float32")


def sp_extract_labels(item_label_list0,output_labels0=[]):
  #item_label_list0=[buck2ipa(v) for v in item_label_list0]
  final_out=[]
  if not type(item_label_list0) is list: item_label_list0=[item_label_list0] 
  for item_label0 in item_label_list0:
    cur_one_hot=[0.]*len(output_labels0)
    if item_label0 in output_labels0: 
      out_i=output_labels0.index(item_label0)
      cur_one_hot[out_i]=1.
    final_out.append(cur_one_hot)
  return np.array(final_out).astype("float32")

data0=all_data[0]
ft_vec,label_list=data0
print("label_list",label_list)
simplified_cur_labels=label_list
#simplified_cur_labels=simplify_labels(label_list)
#simplified_possible_labels=sorted(list(set(simplify_labels(output_labels))))
label_vec=sp_extract_labels(simplified_cur_labels,ipa_symbol_list)
print("ipa_symbol_list",ipa_symbol_list)
print("simplified_cur_labels",simplified_cur_labels)

# print("label_list",label_list)
# print("simplified_cur_labels",simplified_cur_labels)
# print("possible_labels", len(simplified_possible_labels), simplified_possible_labels)

print("label_vec",label_vec.shape)
print("ft_vec",ft_vec.shape)

loss_func = nn.MSELoss()
n_epochs=200
n_hidden=64
n_layers=2
LR=0.001#0.0001
n_input=ft_vec.shape[1]
n_output=label_vec.shape[1]

rnn = RNN(n_input, n_hidden, n_output,n_layers,matching_in_out=True,apply_sigmoid=False)
#rnn = RNN(n_input, n_hidden, n_output,n_layers,matching_in_out=True,apply_sigmoid=True)
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all cnn parameters
print(rnn)
train_data=all_data[:2]
test_data=all_data[2:4]
print("test_data", len(test_data))
for epoch_i in range(n_epochs):
  #print("epoch:",epoch_i)
  rnn.zero_grad()
  train_counter,test_counter=0,0
  total_train_loss,total_test_loss=0,0
  for data0 in train_data:
    ft_vec,cur_label_list=data0
    #label_vec=sp_extract_labels(simplified_cur_labels,simplified_possible_labels) 
    #simplified_cur_labels=simplify_labels(cur_label_list) 
    simplified_cur_labels=cur_label_list #simplify_labels(cur_label_list)
    label_vec=sp_extract_labels(simplified_cur_labels,ipa_symbol_list)
    feature_tensor=torch.tensor(ft_vec,dtype=torch.float32)
    label_tensor=torch.tensor(label_vec,dtype=torch.float32)  
    print("feature_tensor",feature_tensor.shape)
    #print(feature_tensor)
    rnn_out=rnn(feature_tensor)
    flat_rnn_out=rnn_out.ravel()
    loss = loss_func(flat_rnn_out,label_tensor.ravel()) #calculate the loss, difference between the output and the desired outcome tensors
    loss.backward()
    optimizer.step() 
    total_train_loss+= loss.item()
    train_counter+=1
    if epoch_i<180: continue #check if overfitting works
    preds=out2labels(flat_rnn_out,ipa_symbol_list)
    for i0,ac_pr in enumerate(zip(simplified_cur_labels,preds)):
      ac0,pr0=ac_pr
      pr0=[(v[0],round(v[1].item(),3)) for v in pr0]
      print("actual:",ac0,"predictions:",pr0[:5])
      if i0>100: break  
    print("------")    
  for data1 in test_data:
    continue
    ft_vec,label_list=data1
    #simplified_cur_labels=simplify_labels(label_list)
    simplified_cur_labels=cur_label_list
    print("simplified_cur_labels",simplified_cur_labels)
    label_vec=sp_extract_labels(simplified_cur_labels,ipa_symbol_list)
    feature_tensor=torch.tensor(ft_vec,dtype=torch.float32)
    label_tensor=torch.tensor(label_vec,dtype=torch.float32) 
    print("label_tensor",label_tensor.shape)
    with torch.no_grad(): 
      rnn_out=rnn(feature_tensor)
    flat_rnn_out=rnn_out.ravel()
    loss = loss_func(flat_rnn_out,label_tensor.ravel()) #calculate the loss, difference between the output and the desired outcome tensors
    total_test_loss+= loss.item()
    test_counter+=1
    if epoch_i<n_epochs-20: continue
    preds=out2labels(flat_rnn_out,ipa_symbol_list)
    # for i0,ac_pr in enumerate(zip(simplified_cur_labels,preds)):
    #   ac0,pr0=ac_pr
    #   pr0=[(v[0],round(v[1].item(),3)) for v in pr0]
    #   print("actual:",ac0,"predictions:",pr0[:5])
    #   if i0>100: break  
    # print("------")
  
  avg_train_loss=0
  if train_counter>0: avg_train_loss=round(total_train_loss/train_counter,6)
  avg_test_loss=0
  if test_counter>0: avg_test_loss=round(total_test_loss/test_counter,6)
  print("epoch_i",epoch_i,"avg_train_loss",avg_train_loss,"avg_test_loss",avg_test_loss)
#print(rnn)

label_list ['sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'l', 'l', 'l', 'l', 'l', 'l', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'l', 'l', 'l', 'l', 'l', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 't', 't', 't', 't', 't', 't', 't', 't', 't', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'l', 'l', 'l', 'l', 'l', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'j', 'j', 'j', 'j', 'j', 'j', 'j', 'j', 'j', 'ʔ', 'ʔ', 'ʔ', 'ʔ', 



epoch_i 0 avg_train_loss 0.03599 avg_test_loss 0
feature_tensor torch.Size([630, 26])
tensor([[-2.8950, -2.6621, -2.9220,  ...,  6.5206,  8.8138,  9.1983],
        [-6.0987, -5.1331, -5.1924,  ...,  6.7616,  8.7908,  9.1477],
        [-6.4724, -5.3593, -5.4370,  ...,  6.9577,  8.7238,  9.0852],
        ...,
        [-2.8484, -2.9857, -2.8784,  ...,  6.6678,  8.6817,  9.2256],
        [-3.1341, -3.0441, -3.0161,  ...,  6.5964,  8.6887,  9.1634],
        [-2.8993, -3.0911, -2.5866,  ...,  6.4991,  8.4029,  8.8139]])
feature_tensor torch.Size([860, 26])
tensor([[-6.4985, -6.1256, -5.1909,  ...,  6.6780,  8.7649,  9.0835],
        [-3.7923, -3.4317, -3.4056,  ...,  6.5609,  8.6520,  9.0208],
        [-1.7310, -1.6933, -1.7100,  ...,  6.6044,  8.5756,  9.0831],
        ...,
        [-2.9675, -4.0395, -4.1246,  ...,  6.6345,  8.6964,  9.2206],
        [-2.6020, -3.4066, -3.2198,  ...,  6.8047,  8.7878,  9.2763],
        [-2.6470, -3.6645, -3.8103,  ...,  6.5742,  8.4489,  8.8715]])
epoch_i 1

KeyboardInterrupt: ignored

##Completely new pipeline

In [None]:
#NEW - let's integrate the one that works with our pipeline
import time, math, random
from random import shuffle
import numpy as np



torch.manual_seed(1)
random.seed(1)


class pipeline2:
  def __init__(self,params={},model_dir="models", train_ratio=0.8,logging=True,print_training=False):
    self.n_input=params.get("n_input")
    self.n_output=params.get("n_output")
    self.n_hidden=params.get("n_hidden",16)
    self.n_layers=params.get("n_layers",2)
    self.LR=params.get("LR",0.001)
    self.n_epochs=params.get("n_epochs",20)
    self.matching_in_out=params.get("matching_in_out",False)
    self.apply_sigmoid=params.get("apply_sigmoid",False)
    self.output_labels=params.get("output_labels",[]) #output_labels
    self.input_labels=params.get("input_labels",[]) #input_labels
    self.extract_features_fn=params.get("extract_features_fn") #extract_features_fn
    self.extract_labels_fn=params.get("extract_labels_fn") #extract_labels_fn
    self.train_batch_size=params.get("train_batch_size")
    self.eval_fn=params.get("eval_fn") #eval_fn
    self.exp_name=params.get("exp_name") #exp_name
    self.all_data=params.get("all_data") #exp_name
    self.print_training=print_training
    if self.all_data==[]: return #if the class is initialized with empty data 

    # if output_labels!=[]:self.n_output=len(self.output_labels)
    # if input_labels!=[]:self.n_input=len(self.input_labels)
    first_item=self.all_data[0]
    in0,out0=first_item
    print(first_item)
    in_vector0=self.extract_features_fn(in0,self.input_labels)
    out_vector0=self.extract_labels_fn(out0,self.output_labels)
    #print("in_vector0",in_vector0.shape,"out_vector0",out_vector0.shape)
    #if len(in_vector0.shape)<2: in_vector0=in_vector0.unsqueeze(0)
    if len(in_vector0.shape)<2: in_vector0 = np.expand_dims(in_vector0, axis=0)
    self.n_input=in_vector0.shape[1]
    if len(out_vector0.shape)<2: out_vector0 = np.expand_dims(out_vector0, axis=0)
    #self.n_output=out_vector0.shape[0]
    self.n_output=out_vector0.shape[1]
    # print("in_vector0",in_vector0.shape,"out_vector0",out_vector0)
    # print("self.n_input",self.n_input,"self.n_output",self.n_output)


    self.rnn = RNN(self.n_input, self.n_hidden, self.n_output,self.n_layers,self.matching_in_out,apply_sigmoid=self.apply_sigmoid).to(device)
    self.loss_func = nn.MSELoss()
    self.optimizer = torch.optim.Adam(self.rnn.parameters(), lr=self.LR)   # optimize all cnn parameters
    #self.train_batch_size=train_batch_size
    train_size=int(len(self.all_data)*train_ratio)
    self.train_set,self.test_set=self.all_data[:train_size],self.all_data[train_size:]
    self.n_batches=math.floor(len(self.train_set)/self.train_batch_size)
    if self.n_batches==0: self.test_batch_size=len(self.test_set)
    else: self.test_batch_size=math.floor(len(self.test_set)/self.n_batches)
    print(self.rnn)    

    if self.exp_name!=None:
      self.model_dir_path=os.path.join(model_dir,self.exp_name)
      if not os.path.exists(self.model_dir_path): os.makedirs(self.model_dir_path) 
      self.tmp_model_dir=os.path.join(self.model_dir_path,"tmp") 
      if not os.path.exists(self.tmp_model_dir): os.makedirs(self.tmp_model_dir)
      self.log_fpath=os.path.join(self.model_dir_path,"log.txt")
      log_fopen=open(self.log_fpath,"a")
      log_fopen.write(str(self.rnn)+"\n")

  def train(self):
    for epoch0 in range(self.n_epochs):
      self.rnn.zero_grad()
      PATH=os.path.join(self.model_dir_path, "model-%s.model"%epoch0)
      if os.path.exists(PATH):
        checkpoint = torch.load(PATH)
        self.rnn.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.rnn.train()
        print("loaded model for this epoch",PATH)
        for a,b in  checkpoint.items():
          if "loss" in a.lower(): print(a,round(b,6))
        continue  
      print("epoch0",epoch0)
      for batch_i0 in range(self.n_batches+1):
        t0=time.time()
        
        #pred_count,correct_count=0,0
        batch_i1=batch_i0+1
        cur_train_items=self.train_set[batch_i0*self.train_batch_size:batch_i1*self.train_batch_size]
        cur_test_items=self.test_set[batch_i0*self.test_batch_size:batch_i1*self.test_batch_size]
        #if len(cur_train_items)==0 or len(cur_test_items)==0: continue
        print("batch_i0",batch_i0, "cur_train_items",len(cur_train_items),"cur_test_items",len(cur_test_items))
        tmp_path=os.path.join(self.tmp_model_dir, "model-batch-%s.model"%batch_i0)
        if os.path.exists(tmp_path):
          checkpoint = torch.load(tmp_path)
          self.rnn.load_state_dict(checkpoint['model_state_dict'])
          self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
          print("loaded model for this epoch",tmp_path)
          continue  

        total_train_loss,total_test_loss=0,0
        train_counter,test_counter=0,0
        test_eval_items=[]
        #test_pred_counter,test_correct_counter=0,0 #how many test items reach the cutoff val for prediction, how many are correctly predicted
        #for train_i in range(1500):
        for train_i, train_item in enumerate(cur_train_items):
          if train_i%200==0: print("train_i",train_i)
          #print("train_item",train_item)
          train_item_input,train_item_output=train_item
          print("train_item_output",train_item_output)
          cur_feature_vector=self.extract_features_fn(train_item_input,self.input_labels)
          cur_label_vector=self.extract_labels_fn(train_item_output,self.output_labels)
          feature_tensor=torch.tensor(cur_feature_vector,dtype=torch.float32)
          label_tensor=torch.tensor(cur_label_vector,dtype=torch.float32) 
          rnn_output=rnn(feature_tensor)
          print("label_tensor",label_tensor.shape)
          print("rnn_output",rnn_output)

          flat_rnn_output=rnn_output.ravel()
          flat_label_vector=label_tensor.ravel()
          print("flat_label_vector",flat_label_vector.shape)
          #print("flat_rnn_output", flat_rnn_output.shape,"flat_label_vector",flat_label_vector.shape)
          loss = self.loss_func(flat_rnn_output, flat_label_vector) #calculate the loss, difference between the output and the desired outcome tensors
          loss.backward()
          self.optimizer.step()
          total_train_loss+=loss.item()
          print("Loss:",loss.item())
          train_counter+=1
          if epoch0>self.n_epochs-10 and train_i>len(cur_train_items)-20 and self.print_training:
            preds=out2labels(flat_rnn_out,self.output_labels)
            for i0,ac_pr in enumerate(zip(train_item_output,preds)):
              ac0,pr0=ac_pr
              pr0=[(v[0],round(v[1].item(),3)) for v in pr0]
              print("actual:",ac0,"predictions:",pr0[:5])
              if i0>100: break 
            print("-------") 

        for test_i, test_item in enumerate(cur_test_items):
          if test_i%200==0: print("test_i",test_i)
          test_item_input,test_item_output=test_item
          cur_feature_vector=self.extract_features_fn(test_item_input,self.input_labels)
          cur_label_vector=self.extract_labels_fn(test_item_output,self.output_labels)
          feature_tensor=torch.tensor(cur_feature_vector)
          if len(feature_tensor.shape)==1: feature_tensor=feature_tensor.unsqueeze(0)
          label_tensor=torch.tensor(cur_label_vector).to(device)
          #self.rnn.zero_grad()
          #with torch.no_grad():
          rnn_output = self.rnn(feature_tensor).to(device)
          flat_rnn_output=rnn_output.ravel()
          flat_label_vector=label_tensor.ravel()
          loss = self.loss_func(flat_rnn_output, flat_label_vector) #calculate the loss, difference between the output and the desired outcome tensors
          total_test_loss+=loss.item()
          test_counter+=1
        avg_train_loss=0
        if train_counter>0: avg_train_loss=round(total_train_loss/train_counter,6)
        avg_test_loss=0
        if test_counter>0: avg_test_loss=round(total_test_loss/test_counter,6)
        
        t1=time.time()
        elapsed=round(t1-t0,2) 
        t0=time.time()    
        #line="Epoch # %s - Batch: %s -  train loss: %s - test loss: %s - Correctness :%s/%s (ratio: %s) - elapsed: %s"%(epoch0, batch_i0, avg_train_loss,avg_test_loss,  correct_count, pred_count, correct_ratio, elapsed)
        line="Epoch # %s  - Batch: %s -  train loss: %s - test loss: %s - elapsed: %s"%(epoch0, batch_i0, avg_train_loss,avg_test_loss, elapsed)
        print(line)
        log_fopen=open(self.log_fpath,"a")
        log_fopen.write(line+"\n")
        log_fopen.close() 
        cur_checkpoint={
                'epoch': epoch0,
                'n_input': self.n_input,
                'n_hidden': self.n_hidden,
                'n_layers': self.n_layers,
                'n_output': self.n_output,
                'output_labels': self.output_labels,
                'input_labels': self.input_labels,
                'LR': self.LR,
                'model_state_dict': self.rnn.state_dict(),
                'optimizer_state_dict': self.optimizer.state_dict(),
                'matching_in_out':self.matching_in_out,
                'train_loss': avg_train_loss,
                'test_loss': avg_test_loss
                }
        torch.save(cur_checkpoint, tmp_path)
      
      torch.save(cur_checkpoint, PATH)  
      print("model saved")
      for f in os.listdir(self.tmp_model_dir):
        tmp_fpath=os.path.join(self.tmp_model_dir,f)
        os.remove(tmp_fpath)
      print("deleted temporary files")
      print("-----------")


  def load_model(self,model_fpath0):
    checkpoint = torch.load(model_fpath0)
    self.output_labels=checkpoint.get("output_labels",[])
    self.rnn = RNN(checkpoint["n_input"], checkpoint["n_hidden"] , checkpoint["n_output"] , checkpoint["n_layers"] , matching_in_out=checkpoint["n_layers"]).to(device)
    self.rnn.load_state_dict(checkpoint['model_state_dict'])
    self.rnn.eval()
    #return rnn0

  def eval(self,eval_dataset0): #can be just the testset
    output=[]
    total_test_loss,test_counter=0,0
    for test_i, test_item in enumerate(eval_dataset0):
      if test_i%200==0: print("test_i",test_i)
      test_item_input,test_item_output=test_item
      cur_feature_vector=self.extract_features_fn(test_item_input,self.input_labels)
      cur_label_vector=self.extract_labels_fn(test_item_output,self.output_labels)
      feature_tensor=torch.tensor(cur_feature_vector)
      if len(feature_tensor.shape)==1: feature_tensor=feature_tensor.unsqueeze(0)
      label_tensor=torch.tensor(cur_label_vector).to(device)
      
      self.rnn.zero_grad()
      rnn_output = self.rnn(feature_tensor).to(device)
      flat_rnn_output=rnn_output.ravel()
      flat_label_vector=label_tensor.ravel()
      loss = self.loss_func(flat_rnn_output, flat_label_vector) #calculate the loss, difference between the output and the desired outcome tensors
      total_test_loss+=loss.item()
      test_counter+=1
      output.append((test_item_input,test_item_output,loss.item(),flat_rnn_output))
    return output

def sp_extract_features_OLD(input_data0,input_labels0=[]):
  return np.array(input_data0).astype("float32")

def sp_extract_features(input_data0,input_labels0=[]):
  return torch.tensor(input_data0,dtype=torch.float32) 

def sp_extract_labels(item_label_list0,output_labels0=[]):
  #item_label_list0=[buck2ipa(v) for v in item_label_list0]
  final_out=[]
  #if not type(item_label_list0) is list: item_label_list0=[item_label_list0] 
  for item_label0 in item_label_list0:
    cur_one_hot=[0.]*len(output_labels0)
    if item_label0 in output_labels0: 
      out_i=output_labels0.index(item_label0)
      cur_one_hot[out_i]=1.
    final_out.append(cur_one_hot)
  return np.array(final_out).astype("float32")

print("loaded pipeline")
cur_params={}
cur_params["all_data"]=all_data[:2]
cur_params["extract_features_fn"]=sp_extract_features
cur_params["extract_labels_fn"]=sp_extract_labels
cur_params["output_labels"]=ipa_symbol_list
cur_params["LR"]=0.0001
cur_params["train_batch_size"]=5
cur_params["n_epochs"]=150
cur_params["n_layers"]=3
cur_params["n_hidden"]=128
cur_params["matching_in_out"]=True
cur_params["apply_sigmoid"]=False
cur_params["exp_name"]="exp1-super-overfit-abcx"
#self.train_batch_size

#print("train_data,test_data",len(train_data),len(test_data))
cur_pipeline=pipeline2(cur_params,print_training=True)
cur_pipeline.train()

loaded pipeline
(array([[-2.89504669, -2.66212688, -2.92198218, ...,  6.52055047,
         8.81381628,  9.1982796 ],
       [-6.09874313, -5.13313349, -5.19236626, ...,  6.76164058,
         8.79081565,  9.14772333],
       [-6.47241727, -5.35925464, -5.4370104 , ...,  6.95766664,
         8.72375441,  9.08521148],
       ...,
       [-2.84840834, -2.98568798, -2.87839712, ...,  6.66784772,
         8.68172392,  9.22564885],
       [-3.13411662, -3.04408228, -3.01613402, ...,  6.59635365,
         8.68869736,  9.16337432],
       [-2.8993495 , -3.09110927, -2.58661135, ...,  6.49908145,
         8.40294408,  8.81389957]]), ['sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'l', 'l', 'l', 'l', 'l', 'l', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'k', 'k', 'k', 'k', 'k', 'k', 



Loss: 0.006752672139555216
test_i 0
Epoch # 0  - Batch: 0 -  train loss: 0.006753 - test loss: 0.028719 - elapsed: 0.35
model saved
deleted temporary files
-----------
epoch0 1
batch_i0 0 cur_train_items 1 cur_test_items 1
train_i 0
train_item_output ['sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'l', 'l', 'l', 'l', 'l', 'l', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'l', 'l', 'l', 'l', 'l', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 't', 't', 't', 't', 't', 't', 't', 't', 't', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'ħ', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'l', 



label_tensor torch.Size([630, 41])
rnn_output tensor([[-0.0202,  0.0375, -0.0082,  ..., -0.0487, -0.0735, -0.0243],
        [ 0.0079,  0.0227,  0.0009,  ..., -0.0226, -0.0510, -0.0022],
        [ 0.0105,  0.0297, -0.0011,  ...,  0.0019, -0.0204, -0.0008],
        ...,
        [ 0.0049,  0.0394,  0.0229,  ..., -0.0123,  0.0220,  0.0115],
        [ 0.0052,  0.0434,  0.0244,  ..., -0.0158,  0.0248,  0.0098],
        [ 0.0050,  0.0470,  0.0279,  ..., -0.0171,  0.0282,  0.0072]],
       grad_fn=<AddmmBackward0>)
flat_label_vector torch.Size([25830])
Loss: 0.006752672139555216
test_i 0
Epoch # 1  - Batch: 0 -  train loss: 0.006753 - test loss: 0.028719 - elapsed: 0.39
model saved
deleted temporary files
-----------
epoch0 2
batch_i0 0 cur_train_items 1 cur_test_items 1
train_i 0
train_item_output ['sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'a', 'a',

KeyboardInterrupt: ignored

##4- Just the training part of the pipeline

In [None]:
import torch
import sys, math, os, time
import numpy as np

exp_name="test09_3layer_128_500_lr1e05"
exp_name="test09_3layer_256_500_lr1e05"
n_layers=3
n_hidden=256#128#64
LR=0.00001
n_epochs=50
n_data=None
train_batch_size=20
matching_in_out=True
train_ratio=0.8
model_dir="models"
output_labels=standard_labels=ipa_symbol_list

def sp_extract_labels(item_label_list0,output_labels0=[]):
  final_out=[]
  for item_label0 in item_label_list0:
    cur_one_hot=[0.]*len(output_labels0)
    if item_label0 in output_labels0: 
      out_i=output_labels0.index(item_label0)
      cur_one_hot[out_i]=1.
    final_out.append(cur_one_hot)
  return torch.tensor(final_out,dtype=torch.float32) #np.array(final_out).astype("float32")

def get_offset(label_item0,label_preds): #a function to check if the main outocome of the training is achieved
  cur_offset=None
  for l_i,pred_item in enumerate(label_preds):
    pred_str,pred_val=pred_item
    if pred_item[0]==label_item0: 
      cur_offset=l_i
      break
  return cur_offset

def eval_function(rnn_out0,actual_out_list0,standard_labels0):
  list_offsets=[]
  preds0=out2labels(rnn_out0.ravel(),standard_labels0)
  for i0,ac_pr in enumerate(zip(actual_out_list0,preds0)):
    ac0,pr0=ac_pr
    pr0=[(v[0],round(v[1].item(),3)) for v in pr0]
    offset_i=get_offset(ac0,pr0)
    list_offsets.append(offset_i) 
  return list_offsets 

input_labels=[]
cur_item=all_data[0]
ft_vec,lb_list=cur_item
lb_vec=sp_extract_labels(lb_list,standard_labels)

if n_data==None: cur_data=all_data
else: cur_data=all_data[:n_data]
train_size=int(len(cur_data)*train_ratio)
train_set,test_set=cur_data[:train_size],cur_data[train_size:]
n_batches=math.floor(len(train_set)/train_batch_size)
if n_batches==0: test_batch_size=len(test_set)
else: test_batch_size=math.floor(len(test_set)/n_batches)
   

print("ft_vec", ft_vec.shape)
print("lb_list",len(lb_list),lb_list[:50])
print("lb_vec",lb_vec.shape)
n_input=ft_vec.shape[1]
n_output=lb_vec.shape[1]
print("n_input",n_input,"n_output",n_output)
#sys.exit()
rnn = RNN(n_input, n_hidden, n_output,n_layers,matching_in_out,apply_sigmoid=False).to(device)
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all cnn parameters

if exp_name!=None:
  model_dir_path=os.path.join(model_dir,exp_name)
  if not os.path.exists(model_dir_path): os.makedirs(model_dir_path) 
  tmp_model_dir=os.path.join(model_dir_path,"tmp") 
  if not os.path.exists(tmp_model_dir): os.makedirs(tmp_model_dir)
  log_fpath=os.path.join(model_dir_path,"log.txt")
  log_fopen=open(log_fpath,"a")
  log_fopen.write(str(rnn)+"\n")
  log_fopen.close()

for epoch0 in range(n_epochs):
  rnn.zero_grad()
  PATH=os.path.join(model_dir_path, "model-%s.model"%epoch0)
  if os.path.exists(PATH):
    checkpoint = torch.load(PATH)
    rnn.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    rnn.train()
    print("loaded model for this epoch",PATH)
    for a,b in  checkpoint.items():
      if "loss" in a.lower(): print(a,round(b,6))
    continue  
  print("epoch0",epoch0)
  for batch_i0 in range(n_batches+1):
    t0=time.time()
    
    #pred_count,correct_count=0,0
    batch_i1=batch_i0+1
    cur_train_items=train_set[batch_i0*train_batch_size:batch_i1*train_batch_size]
    cur_test_items=test_set[batch_i0*test_batch_size:batch_i1*test_batch_size]
    print("batch_i0",batch_i0, "cur_train_items",len(cur_train_items),"cur_test_items",len(cur_test_items))
    tmp_path=os.path.join(tmp_model_dir, "model-batch-%s.model"%batch_i0)
    if os.path.exists(tmp_path):
      checkpoint = torch.load(tmp_path)
      rnn.load_state_dict(checkpoint['model_state_dict'])
      optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
      print("loaded model for this epoch",tmp_path)
      rnn.train()
      continue  

    total_train_loss,total_test_loss=0,0
    train_counter,test_counter=0,0
    train_eval_items,test_eval_items=[],[]
    for train_i, train_item in enumerate(cur_train_items):
      if train_i%200==0: print("train_i",train_i)
      cur_feature_tensor,train_item_output=train_item
      cur_feature_tensor=cur_feature_tensor.to(device)
      cur_label_tensor=sp_extract_labels(train_item_output,standard_labels).to(device)
      rnn_output=rnn(cur_feature_tensor).to(device)
      loss = loss_func(rnn_output.ravel(), cur_label_tensor.ravel()) #calculate the loss, difference between the output and the desired outcome tensors
      loss.backward()
      optimizer.step()
      total_train_loss+=loss.item()
      train_counter+=1
      eval_out=eval_function(rnn_output,train_item_output,standard_labels)
      train_eval_items.extend(eval_out)
      #eval_avg=sum()
      #if epoch0>n_epochs-10 and train_i>len(cur_train_items)-20:
      # if epoch0>n_epochs-10:
      #   preds=out2labels(rnn_output.ravel(),standard_labels)
      #   for i0,ac_pr in enumerate(zip(train_item_output,preds)):
      #     ac0,pr0=ac_pr
      #     pr0=[(v[0],round(v[1].item(),3)) for v in pr0]
      #     offset_i=get_offset(ac0,pr0)
      #     print("actual:",ac0, "offset_i",offset_i, "predictions:",pr0[:5])
      #     if i0>50: break 
      #   print("-------") 

    for test_i, test_item in enumerate(cur_test_items):
      #continue
      if test_i%200==0: print("test_i",test_i)

      cur_feature_tensor,test_item_output=test_item
      cur_feature_tensor=cur_feature_tensor.to(device)
      cur_label_tensor=sp_extract_labels(test_item_output,standard_labels).to(device)
      with torch.no_grad():
        rnn_output=rnn(cur_feature_tensor).to(device)
      loss = loss_func(rnn_output.ravel(), cur_label_tensor.ravel()) #calculate the loss, difference between the output and the desired outcome tensors
      total_test_loss+=loss.item()
      test_counter+=1
      eval_out=eval_function(rnn_output,test_item_output,standard_labels)
      test_eval_items.extend(eval_out)
      continue



      # test_item_input,test_item_output=test_item
      # cur_feature_vector=self.extract_features_fn(test_item_input,self.input_labels)
      # cur_label_vector=self.extract_labels_fn(test_item_output,self.output_labels)
      # feature_tensor=torch.tensor(cur_feature_vector)
      # if len(feature_tensor.shape)==1: feature_tensor=feature_tensor.unsqueeze(0)
      # label_tensor=torch.tensor(cur_label_vector).to(device)
      # #self.rnn.zero_grad()
      # #with torch.no_grad():
      # rnn_output = self.rnn(feature_tensor).to(device)
      # flat_rnn_output=rnn_output.ravel()
      # flat_label_vector=label_tensor.ravel()
      # loss = self.loss_func(flat_rnn_output, flat_label_vector) #calculate the loss, difference between the output and the desired outcome tensors
      # total_test_loss+=loss.item()
      # test_counter+=1
    avg_train_loss=0
    if train_counter>0: avg_train_loss=round(total_train_loss/train_counter,6)
    avg_train_eval=-1
    if len(train_eval_items)>0: avg_train_eval=round(sum(train_eval_items)/len(train_eval_items),6)
    avg_test_loss=0
    if test_counter>0: avg_test_loss=round(total_test_loss/test_counter,6)
    avg_test_eval=-1
    if len(test_eval_items)>0: avg_test_eval=round(sum(test_eval_items)/len(test_eval_items),6)
    
    t1=time.time()
    elapsed=round(t1-t0,2) 
    t0=time.time()    
    #line="Epoch # %s - Batch: %s -  train loss: %s - test loss: %s - Correctness :%s/%s (ratio: %s) - elapsed: %s"%(epoch0, batch_i0, avg_train_loss,avg_test_loss,  correct_count, pred_count, correct_ratio, elapsed)
    #line="Epoch # %s  - Batch: %s -  train loss: %s - test loss: %s - elapsed: %s"%(epoch0, batch_i0, avg_train_loss,avg_test_loss, elapsed)
    line="Epoch # %s  - Batch: %s / %s -  train loss: %s - test loss: %s - train eval: %s - test eval: %s - elapsed: %s"%(epoch0, batch_i0, n_batches, avg_train_loss,avg_test_loss, avg_train_eval,avg_test_eval, elapsed)
    print(line)
    log_fopen=open(log_fpath,"a")
    log_fopen.write(line+"\n")
    log_fopen.close() 
    cur_checkpoint={
            'epoch': epoch0,
            'n_input': n_input,
            'n_hidden': n_hidden,
            'n_layers': n_layers,
            'n_output': n_output,
            'output_labels': standard_labels,
            'input_labels': input_labels,
            'LR': LR,
            'model_state_dict': rnn.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'matching_in_out':matching_in_out,
            'train_loss': avg_train_loss,
            'test_loss': avg_test_loss,
            'train_eval': avg_train_eval,
            'test_eval': avg_test_eval

            }
    torch.save(cur_checkpoint, tmp_path)
  #continue
  torch.save(cur_checkpoint, PATH)  
  print("model saved")
  for f in os.listdir(tmp_model_dir):
    tmp_fpath=os.path.join(tmp_model_dir,f)
    os.remove(tmp_fpath)
  print("deleted temporary files")
  print("-----------")

ft_vec torch.Size([630, 26])
lb_list 630 ['sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'ð', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
lb_vec torch.Size([630, 41])
n_input 26 n_output 41
epoch0 0
batch_i0 0 cur_train_items 20 cur_test_items 5
train_i 0




test_i 0
Epoch # 0  - Batch: 0 / 20 -  train loss: 0.02537 - test loss: 0.025305 - train eval: 18.040892 - test eval: 19.531263 - elapsed: 58.53
batch_i0 1 cur_train_items 20 cur_test_items 5
train_i 0
test_i 0
Epoch # 0  - Batch: 1 / 20 -  train loss: 0.024141 - test loss: 0.024445 - train eval: 14.38956 - test eval: 14.734985 - elapsed: 59.44
batch_i0 2 cur_train_items 20 cur_test_items 5
train_i 0
test_i 0
Epoch # 0  - Batch: 2 / 20 -  train loss: 0.023231 - test loss: 0.02386 - train eval: 11.130768 - test eval: 11.43594 - elapsed: 52.47
batch_i0 3 cur_train_items 20 cur_test_items 5
train_i 0
test_i 0
Epoch # 0  - Batch: 3 / 20 -  train loss: 0.022982 - test loss: 0.023385 - train eval: 9.659116 - test eval: 7.97098 - elapsed: 60.31
batch_i0 4 cur_train_items 20 cur_test_items 5
train_i 0
test_i 0
Epoch # 0  - Batch: 4 / 20 -  train loss: 0.022859 - test loss: 0.023468 - train eval: 7.421913 - test eval: 7.821895 - elapsed: 63.43
batch_i0 5 cur_train_items 20 cur_test_items 5
trai

##5- Check the results

In [76]:
# epoch0=0
# exp_name="test09_3layer_500_lr1e05"
#exp_name="test09_3layer_128_500_lr1e05"
# epoch0=2
# exp_name="test09_3layer_500_lr1e05" >> 2.13
# epoch0=10
# exp_name="test09_3layer" #"test03" >> 3.2
#exp_name="test09_3layer" #"test03" >> 3.2 # test_i=450 >> 2.35
#epoch0=3; exp_name="test09_3layer_128_500_lr1e05"; test_i=450 >> 1.59
epoch0=14
#exp_name="test09_3layer" #"test03" >> 3.2 # test_i=450 >> 2.35
#exp_name="test09_3layer_128_500_lr1e05"; epoch0=14; test_i=450 >> 0.67
exp_name="test09_3layer_128_500_lr1e05"

test_i=450

model_dir="models"
model_dir_path=os.path.join(model_dir,exp_name)
PATH=os.path.join(model_dir_path, "model-%s.model"%epoch0)
rnn=load_model(PATH)
data0=all_data[test_i]
ft_tensor,actual_labels=data0
print("ft_tensor",ft_tensor.shape)
print("actual_labels",len(actual_labels),actual_labels[:50])
rnn_out=rnn(ft_tensor)
list_offsets=[]
preds0=out2labels(rnn_out.ravel(),standard_labels)
for i0,ac_pr in enumerate(zip(actual_labels,preds0)):
  ac0,pr0=ac_pr
  pr0=[(v[0],round(v[1].item(),3)) for v in pr0]
  offset_i=get_offset(ac0,pr0)
  list_offsets.append(offset_i) 
  print("actual",ac0, "offset_i",offset_i,"predicted:",pr0[:5])
avg_offset=round(sum(list_offsets)/len(list_offsets),2)
print("avg_offset",avg_offset)

ft_tensor torch.Size([469, 26])
actual_labels 469 ['sil', 'sil', 'sil', 'sil', 'sil', 'sil', 'sil', 't', 't', 't', 't', 't', 'a', 'a', 'a', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'ʔ', 'ʔ', 'ʔ', 'ʔ', 'ʔ', 'sˤ', 'sˤ', 'sˤ', 'sˤ', 'sˤ', 'sˤ', 'sˤ', 'sˤ', 'sˤ', 'sˤ']




actual sil offset_i 0 predicted: [('sil', 0.598), ('g', 0.514), ('ʌ', 0.512), ('b', 0.509), ('aː', 0.508)]
actual sil offset_i 0 predicted: [('sil', 0.672), ('g', 0.508), ('r', 0.508), ('f', 0.507), ('q', 0.506)]
actual sil offset_i 0 predicted: [('sil', 0.704), ('r', 0.508), ('q', 0.507), ('f', 0.507), ('n', 0.506)]
actual sil offset_i 0 predicted: [('sil', 0.712), ('t', 0.508), ('r', 0.507), ('q', 0.506), ('sˤ', 0.506)]
actual sil offset_i 0 predicted: [('sil', 0.715), ('t', 0.511), ('r', 0.506), ('q', 0.506), ('sˤ', 0.506)]
actual sil offset_i 0 predicted: [('sil', 0.716), ('t', 0.511), ('r', 0.506), ('sˤ', 0.506), ('q', 0.506)]
actual sil offset_i 0 predicted: [('sil', 0.714), ('t', 0.513), ('r', 0.506), ('sˤ', 0.506), ('q', 0.505)]
actual t offset_i 1 predicted: [('sil', 0.637), ('t', 0.585), ('w', 0.522), ('r', 0.512), ('q', 0.509)]
actual t offset_i 0 predicted: [('t', 0.639), ('sil', 0.565), ('w', 0.539), ('r', 0.515), ('q', 0.514)]
actual t offset_i 0 predicted: [('t', 0.661),

In [None]:
vec1=torch.rand(5)
print(vec1,vec1.shape)
vec2=vec1.unsqueeze(0)
print(vec2,vec2.shape)

tensor([0.5239, 0.7981, 0.7718, 0.0112, 0.8100]) torch.Size([5])
tensor([[0.5239, 0.7981, 0.7718, 0.0112, 0.8100]]) torch.Size([1, 5])


In [None]:
#preparing data - OLD
wav_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/wav/"
#json_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/"
tsv_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/tsv2/"
wav_files=os.listdir(wav_dir)
all_data=[]
for i,fname in enumerate(wav_files[:50]):
  if i%10==0: print(i)
  #if i==10: break
  phones_list=[]
  wav_fpath=os.path.join(wav_dir,fname)
  file_id=fname.split('.')[0] #file name without extension
  tsv_fpath=os.path.join(tsv_dir,file_id+'.tsv') #now creating the tsv file for the phoneme-time table
  t_list,ft_list=extract_audio_features(wav_fpath,"mfcc")
  cur_tsv_list=read_tsv(tsv_fpath)
  for tsv_item,ft_item in zip(cur_tsv_list,ft_list):
    cur_time,cur_phone=tsv_item
    tmp=[round(v,1) for v in ft_item]
    #print(tsv_item[1], tmp)
    all_data.append((cur_phone,ft_item))

  # print("ft_list", ft_list.shape)
  # print("cur_tsv_list", len(cur_tsv_list))
  # print("-----")

  # continue
  
  # sample_rate, samples = wavfile.read(wav_fpath)
  # frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)
  # file_duration=len(samples)/sample_rate
  # spectrogram=spectrogram.transpose()
  # tsv_fopen=open(tsv_fpath)
  # tsv_list=[]
  # for tsv0 in tsv_fopen: 
  #   line_split=tsv0.strip().split("\t")
  #   if len(line_split)!=2: continue
  #   a,b=line_split
  #   tsv_list.append([float(a),b])
  # tsv_fopen.close()
  # #print("tsv_list",len(tsv_list),"spectrogram",spectrogram.shape)
  # all_data.append((tsv_list,spectrogram))
  #print("tsv_list", len(tsv_list),tsv_list[:10])
print(len(all_data))
print(all_data[0])
train_size=int(len(all_data)*0.8)
train_data,test_data=all_data[:train_size],all_data[train_size:]
print("train_data,test_data",len(train_data),len(test_data))


0


  # Remove the CWD from sys.path while we load stuff.


10
20
30
40
48899
('sil', array([ 10.02398013, -36.82826074,  22.39296318, -23.55537289,
        21.1721557 , -23.41151604,  17.30218381, -15.92378065,
         8.9656957 ,  -2.37825806,   0.791876  ,   6.55471386,
        -3.83212697]))
train_data,test_data 39119 9780


In [None]:
#test json files
import json
json_fpath='arabic-speech-corpus/json/ARA NORM  0980.json'
fopen=open(json_fpath)
content=fopen.read()
fopen.close()
content_dict=json.loads(content)
#print(content_dict.keys())
phones=content_dict["items"]["1"]["intervals"]
for k,v in phones.items():
  print(k,v)


1 {'xmin': 0.0, 'xmax': 0.09, 'text': 'sil', 'id': '1'}
2 {'xmin': 0.09, 'xmax': 0.13, 'text': '<', 'id': '2'}
3 {'xmin': 0.13, 'xmax': 0.19, 'text': "a'", 'id': '3'}
4 {'xmin': 0.19, 'xmax': 0.38, 'text': 'kk', 'id': '4'}
5 {'xmin': 0.38, 'xmax': 0.43, 'text': 'a', 'id': '5'}
6 {'xmin': 0.43, 'xmax': 0.49, 'text': 'd', 'id': '6'}
7 {'xmin': 0.49, 'xmax': 0.56, 'text': 'a', 'id': '7'}
8 {'xmin': 0.56, 'xmax': 0.61, 'text': 'l', 'id': '8'}
9 {'xmin': 0.61, 'xmax': 0.75, 'text': 'k', 'id': '9'}
10 {'xmin': 0.75, 'xmax': 0.89, 'text': "aa'", 'id': '10'}
11 {'xmin': 0.89, 'xmax': 1.0, 'text': 't', 'id': '11'}
12 {'xmin': 1.0, 'xmax': 1.04, 'text': 'i0', 'id': '12'}
13 {'xmin': 1.04, 'xmax': 1.1, 'text': 'b', 'id': '13'}
14 {'xmin': 1.1, 'xmax': 1.13, 'text': 'u0', 'id': '14'}
15 {'xmin': 1.13, 'xmax': 1.32, 'text': 'w', 'id': '15'}
16 {'xmin': 1.32, 'xmax': 1.35, 'text': 'a', 'id': '16'}
17 {'xmin': 1.35, 'xmax': 1.49, 'text': 'nn', 'id': '17'}
18 {'xmin': 1.49, 'xmax': 1.64, 'text': "AA'"

In [None]:
#Now extracting frequency features from wav files
import numpy as np
from scipy import signal
from scipy.io import wavfile
wav_fpath="/content/drive/MyDrive/speech_project/arabic-speech-corpus/wav/ARA NORM  0002.wav"
sample_rate, samples = wavfile.read(wav_fpath)
frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)
file_duration=len(samples)/sample_rate
#print(len(samples))
print("times:", len(times), times[:20])
print("Frequencies:", frequencies)
print("Frequencies:", frequencies.shape)
print("spectrogram:", spectrogram.shape)
mfcc_out=get_mfcc(wav_fpath)
print("mfcc_out",mfcc_out.shape)


  


times: 3273 [0.00266667 0.00733333 0.012      0.01666667 0.02133333 0.026
 0.03066667 0.03533333 0.04       0.04466667 0.04933333 0.054
 0.05866667 0.06333333 0.068      0.07266667 0.07733333 0.082
 0.08666667 0.09133333]
Frequencies: [    0.    187.5   375.    562.5   750.    937.5  1125.   1312.5  1500.
  1687.5  1875.   2062.5  2250.   2437.5  2625.   2812.5  3000.   3187.5
  3375.   3562.5  3750.   3937.5  4125.   4312.5  4500.   4687.5  4875.
  5062.5  5250.   5437.5  5625.   5812.5  6000.   6187.5  6375.   6562.5
  6750.   6937.5  7125.   7312.5  7500.   7687.5  7875.   8062.5  8250.
  8437.5  8625.   8812.5  9000.   9187.5  9375.   9562.5  9750.   9937.5
 10125.  10312.5 10500.  10687.5 10875.  11062.5 11250.  11437.5 11625.
 11812.5 12000.  12187.5 12375.  12562.5 12750.  12937.5 13125.  13312.5
 13500.  13687.5 13875.  14062.5 14250.  14437.5 14625.  14812.5 15000.
 15187.5 15375.  15562.5 15750.  15937.5 16125.  16312.5 16500.  16687.5
 16875.  17062.5 17250.  17437.5 17625. 

In [None]:
#Function to extract features and labels from json and wav files
#accept wav file path, and json file path

In [None]:
#identify all the labels in all the json files [40-50 phoneme] 
import json, os
json_dir='arabic-speech-corpus/json'
json_files=os.listdir(json_dir)
print("number of json files:", len(json_files))
phoneme_counter={}
phoneme_duration_dict={}
for i,fname in enumerate(json_files): #iterate over files and count how many times each phoneme occurs
  #print(fname)
  if i%50==0: print(i)
  json_fpath=os.path.join(json_dir,fname)
# json_fpath='arabic-speech-corpus/json/ARA NORM  0980.json'
  fopen=open(json_fpath)
  content=fopen.read()
  fopen.close()
  content_dict=json.loads(content)
  #print(content_dict.keys())
  phones=content_dict["items"]["1"]["intervals"]
  for k,v in phones.items():
    phoneme=v["text"]
    dur=v["xmax"]-v["xmin"]
    phoneme_counter[phoneme]=phoneme_counter.get(phoneme,0)+1
    phoneme_duration_dict[phoneme]=phoneme_duration_dict.get(phoneme,0)+dur


    #print(k,v)
for a,b in phoneme_counter.items():
  print(a,b)

number of json files: 1813
0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
1650
1700
1750
1800
sil 8221
t 7754
a 18379
gg 83
A 3361
w 4783
s 3256
a' 4141
ww 602
r 4596
A' 1495
u0 3442
i0 7253
uu0 572
y 2771
ii0 848
ff 113
uu0' 805
i0' 1477
ii0' 2134
qq 120
U0 679
I0' 342
I0 1021
kk 137
u0' 577
aa 1767
aa' 2557
ll 411
mm 206
nn 450
hh 95
< 2952
S 2354
b 2556
SS 423
u1 620
n 5166
<< 78
rr 733
bb 136
tt 363
^ 533
^^ 173
j 818
jj 131
AA' 675
H 1079
HH 110
x 576
II0' 324
U1 134
xx 95
* 401
** 114
U0' 241
ss 482
$ 614
$$ 198
D 486
DD 110
T 569
TT 159
Z 204
UU0' 281
ZZ 112
E 2164
EE 95
g 329
f 1621
q 1213
k 998
h 1330
i1 1171
AA 334
I1 171
z 456
zz 114
m 3699
l 5424
yy 159
d 1424
dd 302
II0 117
UU0 83
i1' 10
Ah 5
G 3
J 3
ii1 3
uu1 15
v 6
AH 6
dist 10
E  1
- 4
uu1' 19
p 3
pp 3
ii1' 6
i1  1
a  1
u 3
I1' 1
UU1' 2
u1' 1


In [None]:
#all_phonemes=sorted(list(phoneme_counter.keys()))
all_phonemes=['$', '$$', '*', '**', '-', '<', '<<', 'A', "A'", 'AA', "AA'", 'AH', 'Ah', 'D', 'DD', 'E', 'E ', 'EE', 'G', 'H', 'HH', 'I0', "I0'", 'I1', "I1'", 'II0', "II0'", 'J', 'S', 'SS', 'T', 'TT', 'U0', "U0'", 'U1', 'UU0', "UU0'", "UU1'", 'Z', 'ZZ', '^', '^^', 'a', 'a ', "a'", 'aa', "aa'", 'b', 'bb', 'd', 'dd', 'dist', 'f', 'ff', 'g', 'gg', 'h', 'hh', 'i0', "i0'", 'i1', 'i1 ', "i1'", 'ii0', "ii0'", 'ii1', "ii1'", 'j', 'jj', 'k', 'kk', 'l', 'll', 'm', 'mm', 'n', 'nn', 'p', 'pp', 'q', 'qq', 'r', 'rr', 's', 'sil', 'ss', 't', 'tt', 'u', 'u0', "u0'", 'u1', "u1'", 'uu0', "uu0'", 'uu1', "uu1'", 'v', 'w', 'ww', 'x', 'xx', 'y', 'yy', 'z', 'zz']
print(len(all_phonemes))
#Analyzing the average duration of each phoneme
for ph in all_phonemes:
  phoneme_count=phoneme_counter[ph]
  avg_phoneme_duration=round(phoneme_duration_dict[ph]/phoneme_count,2)
  print(ph, phoneme_count,avg_phoneme_duration)


106
$ 614 0.13
$$ 198 0.21
* 401 0.09
** 114 0.17
- 4 0.05
< 2952 0.08
<< 78 0.19
A 3361 0.08
A' 1495 0.07
AA 334 0.16
AA' 675 0.16
AH 6 0.09
Ah 5 0.09
D 486 0.08
DD 110 0.19
E 2164 0.13
E  1 0.08
EE 95 0.06
G 3 0.06
H 1079 0.14
HH 110 0.2
I0 1021 0.08
I0' 342 0.07
I1 171 0.1
I1' 1 0.1
II0 117 0.17
II0' 324 0.16
J 3 0.07
S 2354 0.12
SS 423 0.21
T 569 0.1
TT 159 0.18
U0 679 0.08
U0' 241 0.08
U1 134 0.07
UU0 83 0.17
UU0' 281 0.16
UU1' 2 0.13
Z 204 0.11
ZZ 112 0.19
^ 533 0.12
^^ 173 0.19
a 18379 0.07
a  1 0.06
a' 4141 0.06
aa 1767 0.15
aa' 2557 0.15
b 2556 0.08
bb 136 0.18
d 1424 0.08
dd 302 0.17
dist 10 0.07
f 1621 0.12
ff 113 0.19
g 329 0.09
gg 83 0.18
h 1330 0.08
hh 95 0.13
i0 7253 0.09
i0' 1477 0.07
i1 1171 0.08
i1  1 0.06
i1' 10 0.08
ii0 848 0.15
ii0' 2134 0.15
ii1 3 0.14
ii1' 6 0.17
j 818 0.11
jj 131 0.19
k 998 0.12
kk 137 0.19
l 5424 0.07
ll 411 0.16
m 3699 0.09
mm 206 0.18
n 5166 0.09
nn 450 0.16
p 3 0.09
pp 3 0.16
q 1213 0.11
qq 120 0.18
r 4596 0.05
rr 733 0.19
s 3256 0.13
sil 82

In [None]:
#Mona
!ls

arabic-speech-corpus  arabic-speech-corpus.zip


##Extracting features - get phoneme at time stamps and corresponding features - OLD

In [None]:
import numpy as np
import os,json
from scipy import signal
from scipy.io import wavfile
wav_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/wav/"
json_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/"
tsv_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/tsv/"
combined_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/combined/"
if not os.path.exists(combined_dir): os.makedirs(combined_dir)


def features_extraction():
  if not os.path.exists(tsv_dir): os.makedirs(tsv_dir)
  wav_files=os.listdir(wav_dir)
  for i,fname in enumerate(wav_files):
    if i%100==0: print(i)
    #if i==10: break
    phones_list=[]
    wav_fpath=os.path.join(wav_dir,fname)
    file_id=fname.split('.')[0] #file name without extension
    json_fpath=os.path.join(json_dir,file_id+'.json')
    tsv_fpath=os.path.join(tsv_dir,file_id+'.tsv') #now creating the tsv file for the phoneme-time table
    combined_fpath=os.path.join(combined_dir,file_id+'.txt') #now creating the tsv file for the phoneme-time table
    #if os.path.exists(combined_fpath): continue
    
    #print(json_fpath)
    #extract audio features
    sample_rate, samples = wavfile.read(wav_fpath)
    frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)
    file_duration=len(samples)/sample_rate
    spectrogram=spectrogram.transpose()
    #continue
    
    #read json content
    fopen=open(json_fpath)
    print(json_fpath)
    content=fopen.read()
    fopen.close()
    content_dict=json.loads(content)
    #continue
    #print(content_dict.keys())
    feature_labels_list=[]
     
    phones=content_dict["items"]["1"]["intervals"]
    for t,spec0 in zip(times,spectrogram):
      cur_phoneme="-"
      for k,v in phones.items():
        phoneme=v["text"]
        min_time=float (v["xmin"])
        max_time=float (v["xmax"])
        if(t>=min_time and t<=max_time):
          cur_phoneme=phoneme
          break
      phones_list.append((t,cur_phoneme))
        
          # phones_list.append((t,phoneme))
          # tmp_dict={}
          # tmp_dict["time"]=t
          # tmp_dict["phoneme"]=phoneme
          # tmp_dict["spectrogram"]=list([float(v) for v in spec0])
          # feature_labels_list.append(tmp_dict)


          
    #print(len(samples))
    #print("phones_list",len(phones_list), "spectrogram",spectrogram.shape)
    
    tsv_fopen=open(tsv_fpath,"w")
    for t,ph in phones_list:
      line="%s\t%s\n"%(t,ph)
      tsv_fopen.write(line)
    tsv_fopen.close()

    # combined_fopen=open(combined_fpath,"w")
    # for item in feature_labels_list:
    #   #line="%s\t%s\n"%(t,ph)
    #   # print(item)
    #   # print("---")
    #   combined_fopen.write(json.dumps(item)+"\n")
    # combined_fopen.close()

    #print(phones_list)
    #print("times:", len(times), times[:20])
    #print("------")
    #print("Frequencies:", frequencies)
    #print("Frequencies:", frequencies.shape)
    #print("spectrogram:", spectrogram.shape)
    #break

features_extraction()

0
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1021.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1029.json




/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1037.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1036.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1033.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1022.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1038.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1026.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1028.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1020.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1040.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1019.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1039.json
/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  10

##Starting training - OLD

In [None]:
import os, json
import numpy as np
n_epochs=5
n_input=129
n_hidden =64
n_layers=2
#n_output=40
LR=0.00001
output_labels=['$', '$$', '*', '**', '-', '<', '<<', 'A', "A'", 'AA', "AA'", 'AH', 'Ah', 'D', 'DD', 'E', 'E ', 'EE', 'G', 'H', 'HH', 'I0', "I0'", 'I1', "I1'", 'II0', "II0'", 'J', 'S', 'SS', 'T', 'TT', 'U0', "U0'", 'U1', 'UU0', "UU0'", "UU1'", 'Z', 'ZZ', '^', '^^', 'a', 'a ', "a'", 'aa', "aa'", 'b', 'bb', 'd', 'dd', 'dist', 'f', 'ff', 'g', 'gg', 'h', 'hh', 'i0', "i0'", 'i1', 'i1 ', "i1'", 'ii0', "ii0'", 'ii1', "ii1'", 'j', 'jj', 'k', 'kk', 'l', 'll', 'm', 'mm', 'n', 'nn', 'p', 'pp', 'q', 'qq', 'r', 'rr', 's', 'sil', 'ss', 't', 'tt', 'u', 'u0', "u0'", 'u1', "u1'", 'uu0', "uu0'", 'uu1', "uu1'", 'v', 'w', 'ww', 'x', 'xx', 'y', 'yy', 'z', 'zz']
n_output=len(output_labels)
loss_func = nn.MSELoss()
rnn = RNN(n_input, n_hidden, n_output,n_layers,matching_in_out=True)
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all cnn parameters
# combined_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/combined/"
# all_files=[os.path.join(combined_dir,v) for v in os.listdir(combined_dir)]
# train_size=int(len(all_files)*0.8)
# train_files,test_files=all_files[:train_size],all_files[train_size:]
# def process_combined(combined_fpath0):
#   out_list=[]
#   combined_open=open(combined_fpath0)
#   for line in combined_open:
#     tmp_json=json.loads(line)
#     if tmp_json==None: continue
#     out_list.append((tmp_json["time"],tmp_json["phoneme"],tmp_json["spectrogram"]))
#   combined_open.close()
#   return out_list

for epoch0 in range(n_epochs):
  PATH=os.path.join(model_dir, "model-%s.model"%epoch0)
  if os.path.exists(PATH):
    checkpoint = torch.load(PATH)
    rnn.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    print("loaded model for this epoch",PATH)
    for a,b in  checkpoint.items():
      if "loss" in a.lower(): print(a,round(b,6))
    continue  

  total_train_loss,total_test_loss=0,0
  train_counter,test_counter=0,0
  for train_item in train_data:
    if train_counter%10==0: print(train_counter)
    train_counter+=1
    #tmp_combined_list=process_combined(trf)
    tsv_list,feature_array=train_item
    tmp_labels=[v[1] for v in tsv_list]
    actual_one_hot_labels=one_hot(tmp_labels,output_labels).astype("float32")
    actual_labels_tensor_flat=torch.tensor(actual_one_hot_labels).ravel()
    input_tensor=torch.tensor(feature_array.astype("float32"))
    rnn_out=rnn(input_tensor)
    rnn_flat_out=rnn_out.ravel()
    loss = loss_func(rnn_flat_out, actual_labels_tensor_flat) #calculate the loss, difference between the output and the desired outcome tensors
    total_train_loss+=loss.item()
    loss.backward()
    optimizer.step()  
  print("testing")
  for test_item in test_data:
    if test_counter%10==0: print(test_counter)
    test_counter+=1
    #tmp_combined_list=process_combined(trf)
    tsv_list,feature_array=test_item
    tmp_labels=[v[1] for v in tsv_list]
    actual_one_hot_labels=one_hot(tmp_labels,output_labels).astype("float32")
    actual_labels_tensor_flat=torch.tensor(actual_one_hot_labels).ravel()
    input_tensor=torch.tensor(feature_array.astype("float32"))
    rnn_out=rnn(input_tensor)
    rnn_flat_out=rnn_out.ravel()
    loss = loss_func(rnn_flat_out, actual_labels_tensor_flat) #calculate the loss, difference between the output and the desired outcome tensors
    total_test_loss+=loss.item()
  avg_train_loss=round(total_train_loss/train_counter,6)
  avg_test_loss=round(total_test_loss/test_counter,6)
  print("epoch0:",epoch0, "avg_train_loss",avg_train_loss, "avg_test_loss",avg_test_loss)
  print("-------")


    
  # print("feature_list",feature_array.shape)
  # print("actual_labels",actual_one_hot_labels.shape)
  # print("input_tensor",input_tensor.shape)
  # print("rnn_out", rnn_out.shape)
  # print("rnn_flat_out",rnn_flat_out.shape)
  # print("actual_labels_tensor_flat",actual_labels_tensor_flat.shape)
  # print("-------")
  #print("tmp_combined_list",len(tmp_combined_list))
  # continue
  # if tmp_combined_list==[]: continue
  # tmp_times=[v[0] for v in tmp_combined_list]
  # tmp_labels=[v[1] for v in tmp_combined_list]
  # tmp_features=[v[-1] for v in tmp_combined_list]
  # print("tmp_features",len(tmp_features), "tmp_labels",len(tmp_labels), "output_labels",len(output_labels))
  # actual_one_hot_labels=one_hot(tmp_labels,output_labels)
  # actual_labels_tensor=torch.tensor(actual_one_hot_labels)#.ravel()
  
  # input_array=np.array(tmp_features).transpose().astype("float32")
  # #input_array=np.array(tmp_features).astype("float32")
  # input_tensor=torch.tensor(input_array)
  # print(tmp_labels[:5], input_tensor.shape) #one_hot(el_vec,val_list)  out2labels(rnn_flat_out,label_list)
  
  # rnn_out=rnn(input_tensor)
  # print("rnn_out", rnn_out.shape)
  # rnn_flat_out=rnn_out.ravel()
  # rnn_out_labels=out2labels(rnn_flat_out,output_labels)
  # print("input_tensor",input_tensor.shape)
  # print("actual_labels_tensor",actual_labels_tensor.shape)
  # continue

  # loss = loss_func(rnn_flat_out, actual_labels_tensor) #calculate the loss, difference between the output and the desired outcome tensors
  # loss.backward()
  # optimizer.step()  

  # print(len(list_json))
  # print(list_json[0])
  # print(list_json[-1])
  
#print(len(train_files), len(test_files) )


0




10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
510
520
530
540
550
560
570
580
590
600
610
620
630
640
650
660
670
680
690
700
710
720
730
740
750
760
770
780
790
800
810
820
830
840
850
860
870
880
890
900
910
920
930
940
950
960
970
980
990
1000
1010
1020
1030
1040
1050
1060
1070
1080
1090
1100
1110
1120
1130
1140
1150
1160
1170
1180
1190
1200
1210
1220
1230
1240
1250
1260
1270
1280
1290
1300
1310
1320
1330
1340
1350
1360
1370
1380
1390
1400
1410
1420
1430
1440
testing
0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
epoch0: 0 avg_train_loss 0.16215 avg_test_loss 0.048243
-------
0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
47

KeyboardInterrupt: ignored

##Testing on actual data - OLD

In [None]:
output_labels=['$', '$$', '*', '**', '-', '<', '<<', 'A', "A'", 'AA', "AA'", 'AH', 'Ah', 'D', 'DD', 'E', 'E ', 'EE', 'G', 'H', 'HH', 'I0', "I0'", 'I1', "I1'", 'II0', "II0'", 'J', 'S', 'SS', 'T', 'TT', 'U0', "U0'", 'U1', 'UU0', "UU0'", "UU1'", 'Z', 'ZZ', '^', '^^', 'a', 'a ', "a'", 'aa', "aa'", 'b', 'bb', 'd', 'dd', 'dist', 'f', 'ff', 'g', 'gg', 'h', 'hh', 'i0', "i0'", 'i1', 'i1 ', "i1'", 'ii0', "ii0'", 'ii1', "ii1'", 'j', 'jj', 'k', 'kk', 'l', 'll', 'm', 'mm', 'n', 'nn', 'p', 'pp', 'q', 'qq', 'r', 'rr', 's', 'sil', 'ss', 't', 'tt', 'u', 'u0', "u0'", 'u1', "u1'", 'uu0', "uu0'", 'uu1', "uu1'", 'v', 'w', 'ww', 'x', 'xx', 'y', 'yy', 'z', 'zz']
for test_item in test_data[:5]:
  if test_counter%10==0: print(test_counter)
  test_counter+=1
  #tmp_combined_list=process_combined(trf)
  tsv_list,feature_array=test_item
  tmp_labels=[v[1] for v in tsv_list]
  actual_one_hot_labels=one_hot(tmp_labels,output_labels).astype("float32")
  actual_labels_tensor_flat=torch.tensor(actual_one_hot_labels).ravel()
  input_tensor=torch.tensor(feature_array.astype("float32"))
  rnn_out=rnn(input_tensor)
  rnn_flat_out=rnn_out.ravel()
  preds=out2labels(rnn_flat_out,output_labels)
  for i0 in range(100):
    actual=tmp_labels[i0]
    cur_preds=[(v[0],round(v[1].item(),3)) for v in preds[i0]]
    print("actual",actual)
    print(cur_preds[:10])
    print("---")
  # print(len(preds))
  # print(len(tmp_labels))
  print("====")




actual sil
[('sil', 0.45), ('S', 0.446), ('Z', 0.445), ('g', 0.445), ('dist', 0.442), ('a', 0.44), ("i1'", 0.44), ('TT', 0.439), ('jj', 0.436), ("uu1'", 0.435)]
---
actual sil
[('sil', 0.279), ('S', 0.258), ('Z', 0.244), ('r', 0.243), ('q', 0.241), ('^^', 0.241), ('g', 0.238), ('dist', 0.237), ('TT', 0.236), ('jj', 0.229)]
---
actual sil
[('sil', 0.1), ('S', 0.079), ('^^', 0.078), ('r', 0.075), ('q', 0.072), ('Z', 0.068), ('TT', 0.066), ('dist', 0.062), ('dd', 0.061), ('A', 0.059)]
---
actual sil
[('sil', 0.063), ('^^', 0.048), ('S', 0.048), ('r', 0.045), ('q', 0.043), ('Z', 0.04), ('TT', 0.039), ('dd', 0.035), ('dist', 0.035), ('A', 0.033)]
---
actual sil
[('sil', 0.06), ('^^', 0.045), ('S', 0.045), ('r', 0.042), ('q', 0.04), ('Z', 0.038), ('TT', 0.036), ('dd', 0.032), ('dist', 0.032), ('A', 0.031)]
---
actual sil
[('sil', 0.059), ('^^', 0.045), ('S', 0.045), ('r', 0.041), ('q', 0.04), ('Z', 0.037), ('TT', 0.035), ('dd', 0.032), ('dist', 0.032), ('A', 0.031)]
---
actual sil
[('sil', 0

In [None]:
import os
print(len(os.listdir(tsv_dir)))
print(len(os.listdir(json_dir)))

NameError: ignored

In [None]:
features_extraction()

  from ipykernel import kernelapp as app


/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/ARA NORM  1021.json
dict_keys(['File type', 'Object class', 'xmin', 'xmax', 'size', 'items'])
['t', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', '

In [None]:
wav_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/wav/"
json_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/json/"
tsv_dir="/content/drive/MyDrive/speech_project/arabic-speech-corpus/tsv/"

wav_files=os.listdir(wav_dir)
for i,fname in enumerate(wav_files[:5]):
  if i%100==0: print(i)
  wav_fpath=os.path.join(wav_dir,fname)
  file_id=fname.split('.')[0] #file name without extension
  json_fpath=os.path.join(json_dir,file_id+'.json')
  tsv_fpath=os.path.join(tsv_dir,file_id+'.tsv') #now creating the tsv file for the phoneme-time table

  tsv_fopen=open(tsv_fpath)
  
  tsv_content_dict={} #identifying the corresponding label to each point in time
  for line in tsv_fopen:
    line_split=line.strip("\n\r").split("\t")
    if len(line_split)!=2: continue
    time_,label_=line_split
    tsv_content_dict[float(time_)]=label_
  tsv_fopen.close()

  print(wav_fpath)
  print(tsv_fpath)
  print(tsv_content_2d[:5])

  sample_rate, samples = wavfile.read(wav_fpath)
  frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)
  file_duration=len(samples)/sample_rate
  spectrogram=spectrogram.transpose()
  #print(len(spectrogram),len(spectrogram[0]))
  for i in range(20,40):
    cur_t=times[i]
    cur_spectogram=spectrogram[i]
    cur_label=tsv_content_dict[cur_t]
    #print(cur_t,cur_label)
    print(cur_t,cur_label,cur_spectogram[:5],len(cur_spectogram))
  #   for cs in cur_spectogram[:5]:
  #     print(cs)

  #acoustic_features=extract_features(wav_file_path)
  print("----")



0
/content/drive/MyDrive/speech_project/arabic-speech-corpus/wav/ARA NORM  1021.wav
/content/drive/MyDrive/speech_project/arabic-speech-corpus/tsv/ARA NORM  1021.tsv
[['0.0026666666666666666', 'sil'], ['0.007333333333333333', 'sil'], ['0.012', 'sil'], ['0.016666666666666666', 'sil'], ['0.021333333333333333', 'sil']]
0.096 w [  2.399953  763.7001     25.487423    3.7621925   1.1779009] 129
0.10066666666666667 w [   8.058664 4520.782     263.07797    12.313553    6.413483] 129
0.10533333333333333 w [ 303.41858 3711.4333   275.32486  278.62384  133.61092] 129
0.11 w [   77.49511 12006.17     2820.4958     64.77254   133.38329] 129
0.11466666666666667 w [  56.584137 3004.2769    393.09253   169.96492   212.23615 ] 129
0.11933333333333333 w [ 151.46663  7079.605    2468.7944    713.6378     11.106361] 129
0.124 w [  350.77103 10196.743     227.88739  1440.384     316.55746] 129
0.12866666666666668 w [6.7500465e+01 7.3917773e+03 3.2512361e+04 9.7674960e-01 1.0418031e+03] 129
0.13333333333333



In [None]:
!unzip basic.zip

Archive:  basic.zip
   creating: wav/
  inflating: wav/tesa3-2oTTat.wav    
  inflating: wav/thiqah.wav          
  inflating: wav/mabsooT.wav         
  inflating: wav/khamseen.wav        
  inflating: wav/waa7ed-we-3eshreen.wav  
  inflating: wav/min.wav             
  inflating: wav/el-dail-betaa3-el-kalb.wav  
  inflating: wav/3alaa.wav           
  inflating: wav/el-genainah-di.wav  
  inflating: wav/2arait-ketaab-3an-el-3uloom.wav  
  inflating: wav/saba3taashar.wav    
  inflating: wav/3andek.wav          
  inflating: wav/heyya-bent-gameelah.wav  
  inflating: wav/metDaay2ah.wav      
  inflating: wav/dah-el-kitaab-el-mufeed.wav  
  inflating: wav/3ayyaaneen.wav      
  inflating: wav/kalb-waa7ed.wav     
  inflating: wav/metDaaye2.wav       
  inflating: wav/haa.wav             
  inflating: wav/3andenaa-3arabeyyah.wav  
  inflating: wav/el-mudeer-etkallem-3an-el-mushkelah.wav  
  inflating: wav/saafartu-ma3-aS7aabee.wav  
  inflating: wav/dah-baba.wav        
  inflating: wav

In [None]:
preds[0]

[('sil', tensor(0.0058, grad_fn=<UnbindBackward0>)),
 ('gg', tensor(0.0051, grad_fn=<UnbindBackward0>)),
 ('mm', tensor(0.0047, grad_fn=<UnbindBackward0>)),
 ('tt', tensor(0.0038, grad_fn=<UnbindBackward0>)),
 ('w', tensor(0.0032, grad_fn=<UnbindBackward0>)),
 ('m', tensor(0.0032, grad_fn=<UnbindBackward0>)),
 ('D', tensor(0.0030, grad_fn=<UnbindBackward0>)),
 ('ff', tensor(0.0029, grad_fn=<UnbindBackward0>)),
 ('$$', tensor(0.0028, grad_fn=<UnbindBackward0>)),
 ('Z', tensor(0.0027, grad_fn=<UnbindBackward0>)),
 ('y', tensor(0.0026, grad_fn=<UnbindBackward0>)),
 ('**', tensor(0.0025, grad_fn=<UnbindBackward0>)),
 ('q', tensor(0.0025, grad_fn=<UnbindBackward0>)),
 ('r', tensor(0.0023, grad_fn=<UnbindBackward0>)),
 ('jj', tensor(0.0023, grad_fn=<UnbindBackward0>)),
 ('f', tensor(0.0023, grad_fn=<UnbindBackward0>)),
 ('k', tensor(0.0022, grad_fn=<UnbindBackward0>)),
 ("aa'", tensor(0.0022, grad_fn=<UnbindBackward0>)),
 ('v', tensor(0.0022, grad_fn=<UnbindBackward0>)),
 ('ll', tensor(0.002

#Egyptian Arabic Recordings - Extract features and labels

In [None]:
!pip install python_speech_features

Collecting python_speech_features
  Downloading python_speech_features-0.6.tar.gz (5.6 kB)
Building wheels for collected packages: python-speech-features
  Building wheel for python-speech-features (setup.py) ... [?25l[?25hdone
  Created wheel for python-speech-features: filename=python_speech_features-0.6-py3-none-any.whl size=5888 sha256=1c4fd6bbe91ceb9dccd63dddf76d5ff4f14d72f544778574a5a8fc02764526e8
  Stored in directory: /root/.cache/pip/wheels/b0/0e/94/28cd6afa3cd5998a63eef99fe31777acd7d758f59cf24839eb
Successfully built python-speech-features
Installing collected packages: python-speech-features
Successfully installed python-speech-features-0.6


#Basic Functions

In [None]:
import numpy as np
import os,json
from scipy import signal
from scipy.io import wavfile
import re

from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav
#combined_letters=["sh","gh","dh","kh","DH"]

def get_mfcc(wav_fpath0):
  (rate,sig) = wav.read(wav_fpath0)
  if len(sig.shape)>1: sig= sig.sum(axis=1) / 2 #handle mono/sterio
  return mfcc(sig,rate)

# def get_fbank(wav_fpath0):
#   (rate,sig) = wav.read(wav_fpath0)
#   if len(sig.shape)>1: sig= sig.sum(axis=1) / 2 #handle mono/sterio
#   return logfbank(sig,rate)


def get_wav_spect(wav_fpath):
  sample_rate, samples = wavfile.read(wav_fpath)
  if len(samples.shape)>1: samples= samples.sum(axis=1) / 2 #handle mono/sterio
  frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)
  file_duration=len(samples)/sample_rate
  spectrogram=spectrogram.transpose()
  return spectrogram
def get_phones(tmp_text):
  combined_letters=["sh","gh","dh","kh","th", "DH"]
  tmp_text=re.sub("\-q$","-?",tmp_text)
  tmp_text=tmp_text.replace(" "," + ")
  tmp_text=tmp_text.replace("-"," + ")
  tmp_text=re.sub("([aeiou]+)",r" \1 ",tmp_text)
  for a0 in combined_letters:
    tmp_text=tmp_text.replace(a0, " %s "%a0)
  tmp_list_phones=re.split("\s+",tmp_text.strip())
  list_phones=[]
  for tp0 in tmp_list_phones:
    if len(tp0)==1: 
      if tp0 in "DST": list_phones.append(tp0)
      elif tp0.lower()=="p": list_phones.append("b")
      elif tp0.lower()=="c": list_phones.append("k")
      else: list_phones.append(tp0.lower())
    elif tp0[0] in "aeiou": list_phones.append(tp0)
    elif tp0 in combined_letters: list_phones.append(tp0)
    else: 
      for chr0 in tp0:
        if chr0 in "DST": list_phones.append(chr0)
        elif chr0.lower()=="p": list_phones.append("b")
        elif chr0.lower()=="c": list_phones.append("k")
        else: list_phones.append(chr0.lower())
      #list_phones.extend(tp0)
  return list_phones

def one_hot(el_vec,val_list):
  final_one_hot=[]
  for el0 in el_vec:
    tmp_list=[0.]*len(val_list)
    if el0 in val_list:
      found_i=val_list.index(el0)
      tmp_list[found_i]=1.
    final_one_hot.append(tmp_list)
  return np.array(final_one_hot) 

def get_label_from_tensor(unflattened_tensor,val_list):
  all_dicts=[]
  for row in unflattened_tensor:
    tmp_list=[]
    for i,v0 in enumerate(val_list):
      pred_val=row[i].item()
      tmp_list.append((v0,pred_val))
      #local_dict[v0]=row[i]
    tmp_list.sort(key=lambda x:-x[1])
    all_dicts.append(tmp_list)
  return all_dicts

def out2labels(rnn_flat_out,label_list): #a flat rnn output to split into slices, and get the label weights for each slice
  final_list=[]
  n_slices=int(len(rnn_flat_out)/len(label_list))
  for i0 in range(n_slices):
    i1=i0+1
    cur_slice=rnn_flat_out[i0*len(label_list):i1*len(label_list)]
    tmp_list=[]
    for lb0,cs0 in zip(label_list,cur_slice): tmp_list.append((lb0,cs0))
    tmp_list.sort(key=lambda x:-x[-1])
    final_list.append(tmp_list)
  return final_list


def get_fbank(wav_fpath0):
  (rate,sig) = wav.read(wav_fpath0)
  if len(sig.shape)>1: sig= sig.sum(axis=1) / 2 #handle mono/sterio
  return logfbank(sig,rate)

def fname2phones(fname0,fixed_n_phones=100): #create a list of phones (and padding) from a file name (in fraco)
  #cur_text=fname0.replace(".wav","")
  cur_text=fname0.split(".")[0] #need to make sure there is no period within the file name text
  cur_phones0=get_phones(cur_text)
  cur_phones0=[""]+cur_phones0
  cur_phones0 = cur_phones0 + [''] * (fixed_n_phones - len(cur_phones0))
  return cur_phones0


def process_input(wav_fpath0): return get_fbank(wav_fpath0)

def get_dir_files(dir_path0,train_ratio=0.8):
  all_wav_files=[]
  for root0,dir0,files0 in os.walk(dir_path0):
    for fname in files0:
      cur_fpath=os.path.join(root0,fname)
      if not fname.endswith(".wav"): continue
      all_wav_files.append(cur_fpath)
  train_size=int(len(all_wav_files)*train_ratio)
  train_data,test_date=all_wav_files[:train_size],all_wav_files[train_size:]
  return train_data,test_date

print("loaded the needed functions")

loaded the needed functions


#Preparing Data

In [None]:
cur_dir="basic"
wav_file_list=os.listdir(cur_dir)

full_list=[]
all_phones=[]
fixed_n_phones=100
for fname in os.listdir(cur_dir):
  tmp_fpath=os.path.join(cur_dir,fname)
  cur_text=fname.replace(".wav","")
  cur_phones=get_phones(cur_text)
  cur_phones=[""]+cur_phones
  cur_phones = cur_phones + [''] * (fixed_n_phones - len(cur_phones))
  #print(len(cur_phones))
  full_list.append((tmp_fpath,cur_phones))
  all_phones.extend(cur_phones)

all_phones=sorted(list(set(all_phones)))  
print(all_phones)
full_set=[]
for fpath0,ph0 in full_list:
  oh_labels=one_hot(ph0,all_phones)
  #sp_features=get_wav_spect(fpath0)
  #sp_features=get_mfcc(fpath0)
  sp_features=get_fbank(fpath0)
  #get_fbank

  full_set.append((sp_features,oh_labels,ph0))
train_size=int(0.9*len(full_set))
train_set=full_set[:train_size]
test_set=full_set[train_size:]
print(len(train_set),len(test_set))
print(len(all_phones))
print(all_phones)



['', '+', '2', '3', '7', '?', 'D', 'S', 'T', 'a', 'aa', 'ai', 'b', 'd', 'dh', 'e', 'ee', 'ei', 'f', 'g', 'gh', 'h', 'i', 'ia', 'k', 'kh', 'l', 'm', 'n', 'o', 'oa', 'oo', 'q', 'r', 's', 'sh', 't', 'th', 'u', 'w', 'y', 'z']




373 42
42
['', '+', '2', '3', '7', '?', 'D', 'S', 'T', 'a', 'aa', 'ai', 'b', 'd', 'dh', 'e', 'ee', 'ei', 'f', 'g', 'gh', 'h', 'i', 'ia', 'k', 'kh', 'l', 'm', 'n', 'o', 'oa', 'oo', 'q', 'r', 's', 'sh', 't', 'th', 'u', 'w', 'y', 'z']


#New Data Preparation

In [None]:
alphabet_dir="ar-alphabet"
alphabet_classification_dir="cleaned_classification_data"
basic_dir="basic"
alphabet_train,alphabet_test=get_dir_files(alphabet_dir)
alphabet_class_train,alphabet_class_test=get_dir_files(alphabet_classification_dir)
basic_train,basic_test=get_dir_files(basic_dir)
print("alphabet_train",len(alphabet_train),"alphabet_test",len(alphabet_test))
print("basic_train",len(basic_train),"basic_test",len(basic_test))

#combined_train=alphabet_train+basic_train
combined_train=alphabet_train+alphabet_class_train
# combined_test=alphabet_test+basic_test
#combined_train=alphabet_train#+basic_train
combined_test=alphabet_test#+basic_test
combined_all=combined_train+combined_test
all_phones=[]
for fpath0 in combined_all:
  r0,fname1=os.path.split(fpath0)
  cur_phones=fname2phones(fname1,fixed_n_phones=100)
  all_phones.extend(cur_phones)  
all_phones=sorted(list(set(all_phones)))
print(len(all_phones), all_phones)

train_set,test_set=[],[]
for fpath0 in combined_train:
  r0,fname1=os.path.split(fpath0)
  cur_phones=fname2phones(fname1,fixed_n_phones=100)
  oh_labels=one_hot(cur_phones,all_phones)
  try: sp_features=process_input(fpath0)
  except: continue
  train_set.append((sp_features,oh_labels,cur_phones))
for fpath0 in combined_test:
  r0,fname1=os.path.split(fpath0)
  cur_phones=fname2phones(fname1,fixed_n_phones=100)
  oh_labels=one_hot(cur_phones,all_phones)
  try: sp_features=process_input(fpath0)
  except: continue
  test_set.append((sp_features,oh_labels,cur_phones))
# train_size=int(0.9*len(full_set))
# train_set=full_set[:train_size]
# test_set=full_set[train_size:]
print(len(train_set),len(test_set))
print(len(all_phones))
print(all_phones)

alphabet_train 189 alphabet_test 48
basic_train 332 basic_test 83
35 ['', '+', '2', '3', '7', 'D', 'DH', 'S', 'T', 'a', 'aa', 'b', 'd', 'dh', 'ee', 'f', 'gh', 'h', 'i', 'j', 'k', 'kh', 'l', 'm', 'n', 'oo', 'q', 'r', 's', 'sh', 't', 'th', 'w', 'y', 'z']




3946 48
35
['', '+', '2', '3', '7', 'D', 'DH', 'S', 'T', 'a', 'aa', 'b', 'd', 'dh', 'ee', 'f', 'gh', 'h', 'i', 'j', 'k', 'kh', 'l', 'm', 'n', 'oo', 'q', 'r', 's', 'sh', 't', 'th', 'w', 'y', 'z']


In [None]:
all_phones=[]
for fpath0 in combined_all:
  r0,fname1=os.path.split(fpath0)
  cur_phones=fname2phones(fname1,fixed_n_phones=100)
  print(cur_phones)
  all_phones.extend(cur_phones)  
all_phones=sorted(list(set(all_phones)))
print(len(all_phones), all_phones)


['', 'S', 'aa', 'd', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['', 'r', 'aa', '2', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['', 'b', 'aa', '2', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '

In [None]:
print(len(train_set),len(test_set))
for a in test_set[:5]:
  print(a[0].shape,a[1].shape,a[2])
print(all_phones)

3946 48
(101, 26) (100, 35) ['', 'a', 'l', 'i', 'f', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
(101, 26) (100, 35) ['', 'kh', 'aa', '2', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
(101, 26) (100, 35) ['', '7', 'aa', '2', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 

In [None]:
#all_phones=['', '+', '2', '3', '7', '?', 'D', 'S', 'T', 'a', 'aa', 'ai', 'b', 'd', 'dh', 'e', 'ee', 'ei', 'f', 'g', 'gh', 'h', 'i', 'ia', 'k', 'kh', 'l', 'm', 'n', 'o', 'oa', 'oo', 'q', 'r', 's', 'sh', 't', 'th', 'u', 'w', 'y', 'z']

#Network Definition
https://github.com/hmghaly/word_align/blob/master/rnn_utils.py

In [None]:
#Let's build the network - here is a small cheat sheet for possible RNN classes based on input and output size
#https://github.com/hmghaly/rnn/blob/master/classes.py
#https://github.com/hmghaly/word_align/blob/master/rnn_utils.py

#here the size of the output is the same as the size of the input
#the depth of the output depends on the number of possible outcome categories (e.g. different phonemes)
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import pickle

torch.manual_seed(1)
random.seed(1)
device = torch.device('cpu')

class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size,num_layers, matching_in_out=False, batch_size=1):
    super(RNN, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.num_layers = num_layers
    self.batch_size = batch_size
    self.matching_in_out = matching_in_out #length of input vector matches the length of output vector 
    self.lstm = nn.LSTM(input_size, hidden_size,num_layers)
    self.hidden2out = nn.Linear(hidden_size, output_size)
    self.hidden = self.init_hidden()
  def forward(self, feature_list):
    feature_list=torch.tensor(feature_list)
    
    feature_list.to(device) #### <<<<<<<<<<<<<<<<< 
    if self.matching_in_out:
      lstm_out, _ = self.lstm( feature_list.view(len( feature_list), 1, -1))
      output_space = self.hidden2out(lstm_out.view(len( feature_list), -1))
      output_scores = torch.sigmoid(output_space) #we'll need to check if we need this sigmoid
      return output_scores #output_scores
    else:
      for i in range(len(feature_list)):
        cur_ft_tensor=feature_list[i]#.view([1,1,self.input_size])
        cur_ft_tensor=cur_ft_tensor.view([1,1,self.input_size])
        lstm_out, self.hidden = self.lstm(cur_ft_tensor, self.hidden)
        outs=self.hidden2out(lstm_out)
      return outs
  def init_hidden(self):
    #return torch.rand(self.num_layers, self.batch_size, self.hidden_size)
    return (torch.rand(self.num_layers, self.batch_size, self.hidden_size).to(device),
            torch.rand(self.num_layers, self.batch_size, self.hidden_size).to(device))

n_input=13
n_hidden =64
n_layers=2
depth=40
LR=0.00001

feature_vec0,label_vec0,phones0=train_set[0]
print(feature_vec0.shape)
print(label_vec0.shape)
#depth=flattened_out.shape[0]
n_output=len(label_vec0.ravel())
n_input=feature_vec0.shape[1]
print("n_input",n_input,"n_output",n_output)
rnn = RNN(n_input, n_hidden, n_output,n_layers, matching_in_out=False)

n_data_points=3000
input_tensor=torch.rand((n_data_points, n_input))
input_tensor=train_set[1][0]
input_tensor=input_tensor.astype(np.float32)

output = rnn(input_tensor)
print(output.shape)

(110, 26)
(100, 35)
n_input 26 n_output 3500
torch.Size([1, 1, 3500])


#Then run the training


In [None]:
import time, os
model_name="alpha_combined_with_classification_data_models_64_6_lr1e7"
# n_input=13
# n_output=40
n_hidden =64
n_layers=6 #3
n_epochs=100
LR=0.0000001
loss_func = nn.MSELoss()
feature_vec0,label_vec0,phones0=train_set[0] #need to assign the n_input and output according to the shape of input/output
n_output=len(label_vec0.ravel())
n_input=feature_vec0.shape[1]
model_dir=os.path.join(cwd,"models", model_name) 
if not os.path.exists(model_dir): os.makedirs(model_dir)
rnn = RNN(n_input, n_hidden, n_output,n_layers, matching_in_out=False)
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all cnn parameters

log_fpath=os.path.join(model_dir,"log.txt")
log_fopen=open(log_fpath,"a")
log_fopen.write(str(rnn)+"\n")
log_fopen.close()

for epoch0 in range(n_epochs):
  t0=time.time()
  PATH=os.path.join(model_dir, "model-%s.model"%epoch0)
  if os.path.exists(PATH):
    checkpoint = torch.load(PATH)
    rnn.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    print("loaded model for this epoch",PATH)
    for a,b in  checkpoint.items():
      if "loss" in a.lower(): print(a,round(b,6))
    continue  

  total_train_loss=0
  total_test_loss=0
  total_test_offset=0
  print("epoch #",epoch0)
  print("train_set size",len(train_set))
  for i0,tr0 in enumerate(train_set):
    if i0%100==0: print(i0)
    feature_vec0,label_vec0,list_phones=tr0
    rnn.hidden = rnn.init_hidden()
    rnn.zero_grad()
    actual_output=torch.tensor(label_vec0.astype(np.float32)).ravel()
    input_tensor=feature_vec0 #torch.rand((n_data_points, n_input)) #n_input = 129
    input_tensor=input_tensor.astype(np.float32)
    rnn_output = rnn(input_tensor)
    loss = loss_func(rnn_output.ravel(), actual_output) #calculate the loss, difference between the output and the desired outcome tensors
    loss.backward()
    optimizer.step()  
    total_train_loss+=loss.item()
  print("now working on test set")
  all_pred_offsets=[]
  for i0,tr0 in enumerate(test_set):
    #continue
    if i0%50==0: print(i0)
    feature_vec0,label_vec0,list_phones=tr0
    rnn.hidden = rnn.init_hidden()
    rnn.zero_grad()
    actual_output=torch.tensor(label_vec0.astype(np.float32)).ravel()
    input_tensor=feature_vec0 #torch.rand((n_data_points, n_input)) #n_input = 129
    input_tensor=input_tensor.astype(np.float32)
    rnn_output = rnn(input_tensor)
    rnn_flat_out=rnn_output.ravel()
    loss = loss_func(rnn_flat_out, actual_output) #calculate the loss, difference between the output and the desired outcome tensors
    #calculate the loss, difference between the output and the desired outcome tensors
    total_test_loss+=loss.item()
    cur_preds0=out2labels(rnn_flat_out,all_phones)
    actual_phones=[""]+[v for v in list_phones if v]
    #actual_phones=[v for v in list_phones if v]
    #print("Actual phones:",actual_phones)
    tmp_pred_offsets=[]
    for ac_i,actual_label in enumerate(actual_phones):
      cp0=cur_preds0[ac_i]
      cur_pred_vals=[(v[0],round(v[1].item(),2)) for v in cp0]
      pred_just_labels=[v[0] for v in cur_pred_vals]
      correct_label_pred_offset=pred_just_labels.index(actual_label)
      tmp_pred_offsets.append(correct_label_pred_offset)
      #print(ac_i,"actual:",actual_label,"offset:", correct_label_pred_offset,"predicted",cur_pred_vals[:5])

    
    cur_avg_pred_offsets=sum(tmp_pred_offsets)/len(tmp_pred_offsets)
    #print(">>> cur_avg_pred_offsets",cur_avg_pred_offsets)
    all_pred_offsets.append(cur_avg_pred_offsets)
    #total_test_offset+=cur_avg_pred_offsets
    #print("----------")
  
  avg_train_loss=round(total_train_loss/len(train_set),4)
  avg_test_loss=round(total_test_loss/len(test_set), 4) 
  #avg_test_offsets=round(total_test_offset/len(test_set), 4) 
  avg_test_pred_offset=round(sum(all_pred_offsets)/len(all_pred_offsets) ,4)
  
  print("avg_train_loss",avg_train_loss,"avg_test_loss",avg_test_loss, "avg_test_offsets",avg_test_pred_offset)
  t1=time.time()
  elapsed=round(t1-t0,2) 
  t0=time.time()    
  #line="Epoch # %s - Batch: %s -  train loss: %s - test loss: %s - elapsed: %s"%(epoch0, batch_i0, avg_train_loss,avg_test_loss, elapsed)
  line="Epoch # %s -   train loss: %s - test loss: %s - elapsed: %s"%(epoch0, avg_train_loss,avg_test_loss, elapsed)
  #line="Epoch # %s  -  train loss: %s - test loss: %s - elapsed: %s"%(epoch0, avg_train_loss,avg_test_loss, elapsed)
  print(line)
  log_fopen=open(log_fpath,"a")
  log_fopen.write(line+"\n")
  log_fopen.close() 
  cur_checkpoint={
          'epoch': epoch0,
          'n_input': n_input,
          'n_hidden': n_hidden,
          'n_layers': n_layers,
          'n_output': n_output,
          'LR': LR,
          'model_state_dict': rnn.state_dict(),
          'optimizer_state_dict': optimizer.state_dict(),
          'train_loss': avg_train_loss,
          'test_loss': avg_test_loss
          }
  torch.save(cur_checkpoint, PATH) 

  


loaded model for this epoch /content/drive/MyDrive/speech_project/models/alpha_combined_with_classification_data_models_64_6_lr1e7/model-0.model
train_loss 0.0337
test_loss 0.0335
loaded model for this epoch /content/drive/MyDrive/speech_project/models/alpha_combined_with_classification_data_models_64_6_lr1e7/model-1.model
train_loss 0.0333
test_loss 0.0332
loaded model for this epoch /content/drive/MyDrive/speech_project/models/alpha_combined_with_classification_data_models_64_6_lr1e7/model-2.model
train_loss 0.033
test_loss 0.0329
loaded model for this epoch /content/drive/MyDrive/speech_project/models/alpha_combined_with_classification_data_models_64_6_lr1e7/model-3.model
train_loss 0.0327
test_loss 0.0326
loaded model for this epoch /content/drive/MyDrive/speech_project/models/alpha_combined_with_classification_data_models_64_6_lr1e7/model-4.model
train_loss 0.0324
test_loss 0.0322
loaded model for this epoch /content/drive/MyDrive/speech_project/models/alpha_combined_with_classifi

KeyboardInterrupt: ignored

In [None]:
pred_just_labels

['u',
 'm',
 'gh',
 'h',
 'i',
 '',
 'th',
 '+',
 'a',
 'kh',
 '7',
 '2',
 'T',
 'ia',
 'aa',
 '?',
 'l',
 'b',
 'ee',
 't',
 'k',
 'g',
 'q',
 'ai',
 'r',
 'n',
 'f',
 'oo',
 's',
 'sh',
 'z',
 'oa',
 'o',
 'w',
 'd',
 'S',
 'dh',
 'ei',
 'y',
 '3',
 'e',
 'D']

#Testing on actual data

In [None]:
import torch, random

model_name="alpha_combined_with_classification_data_models_64_6_lr1e7"
e0=50

torch.manual_seed(1)
random.seed(1)

model_dir=os.path.join(cwd,"models", model_name) 
PATH=os.path.join(model_dir, "model-%s.model"%e0)
checkpoint = torch.load(PATH)
rnn = RNN(checkpoint["n_input"], checkpoint["n_hidden"] , checkpoint["n_output"] , checkpoint["n_layers"] , matching_in_out=False).to(device)
rnn.load_state_dict(checkpoint['model_state_dict'])
rnn.eval()


print("now working on test set")
all_pred_offsets=[]
total_test_offset=0
for i0,tr0 in enumerate(test_set):
  #continue
  if i0%50==0: print(i0)
  feature_vec0,label_vec0,list_phones=tr0
  rnn.hidden = rnn.init_hidden()
  rnn.zero_grad()
  actual_output=torch.tensor(label_vec0.astype(np.float32)).ravel()
  input_tensor=feature_vec0 #torch.rand((n_data_points, n_input)) #n_input = 129
  input_tensor=input_tensor.astype(np.float32)
  rnn_output = rnn(input_tensor)
  rnn_flat_out=rnn_output.ravel()
  #loss = loss_func(rnn_flat_out, actual_output) #calculate the loss, difference between the output and the desired outcome tensors
  #calculate the loss, difference between the output and the desired outcome tensors
  #total_test_loss+=loss.item()
  cur_preds0=out2labels(rnn_flat_out,all_phones)
  #actual_phones=[v for v in list_phones if v]
  actual_phones=[""]+[v for v in list_phones if v]
  print("actual_phones",actual_phones)
  tmp_pred_offsets=[]
  for ac_i,actual_label in enumerate(actual_phones):
    cp0=cur_preds0[ac_i]
    cur_pred_vals=[(v[0],round(v[1].item(),2)) for v in cp0]
    pred_just_labels=[v[0] for v in cur_pred_vals]
    correct_label_pred_offset=pred_just_labels.index(actual_label)
    tmp_pred_offsets.append(correct_label_pred_offset)
    print(ac_i,"actual:",actual_label,"offset:", correct_label_pred_offset,"predicted",cur_pred_vals[:5])
  cur_avg_pred_offsets=sum(tmp_pred_offsets)/len(tmp_pred_offsets)
  print(">>> cur_avg_pred_offsets",cur_avg_pred_offsets)
  all_pred_offsets.append(cur_avg_pred_offsets)
  total_test_offset+=cur_avg_pred_offsets
  print("----------")


avg_test_pred_offset=round(sum(all_pred_offsets)/len(all_pred_offsets) ,4)
print("avg_test_pred_offset",avg_test_pred_offset)

now working on test set
0
actual_phones ['', 'w', 'aa', 'w']
0 actual:  offset: 0 predicted [('', 1.01), ('d', 0.05), ('n', 0.0), ('t', 0.0), ('w', 0.0)]
1 actual: w offset: 14 predicted [('m', 0.41), ('T', 0.24), ('t', 0.19), ('j', 0.16), ('z', 0.16)]
2 actual: aa offset: 0 predicted [('aa', 0.37), ('ee', 0.25), ('+', 0.08), ('oo', 0.06), ('l', 0.02)]
3 actual: w offset: 6 predicted [('n', 0.21), ('2', 0.21), ('m', 0.17), ('7', 0.11), ('i', 0.09)]
>>> cur_avg_pred_offsets 5.0
----------
actual_phones ['', 'y', 'aa', '2']
0 actual:  offset: 0 predicted [('', 1.01), ('d', 0.05), ('n', 0.0), ('t', 0.0), ('w', 0.0)]
1 actual: y offset: 5 predicted [('m', 0.41), ('T', 0.24), ('t', 0.19), ('j', 0.16), ('z', 0.16)]
2 actual: aa offset: 0 predicted [('aa', 0.37), ('ee', 0.25), ('+', 0.08), ('oo', 0.06), ('l', 0.02)]
3 actual: 2 offset: 1 predicted [('n', 0.21), ('2', 0.21), ('m', 0.17), ('7', 0.11), ('i', 0.09)]
>>> cur_avg_pred_offsets 1.5
----------
actual_phones ['', 'h', 'a', 'm', 'z', 'a

#Test on the alphabet

In [None]:
cur_root_dir="ar-alphabet"
all_alphabet_files=[]
for root0,dir0,files0 in os.walk(cur_root_dir):
  for fname in files0:
    cur_fpath=os.path.join(root0,fname)
    if not fname.endswith(".wav"): continue
    all_alphabet_files.append(cur_fpath)
print(len(all_alphabet_files))

for fpath0 in all_alphabet_files[:10]:
  #print(fpath0)
  r0,fname1=os.path.split(fpath0)
  print(fname1)
  cur_phones=fname2phones(fname1,fixed_n_phones=100)
  oh_labels0=one_hot(cur_phones,all_phones)
  actual_output=oh_labels0.ravel()

  feature_vec0=get_fbank(fpath0)
  # print(cur_phones)
  # print("-------")
  input_tensor=feature_vec0 #torch.rand((n_data_points, n_input)) #n_input = 129
  input_tensor=input_tensor.astype(np.float32)
  rnn_output = rnn(input_tensor)
  rnn_flat_out=rnn_output.ravel()
  print(rnn_output.shape)
  #loss = loss_func(rnn_flat_out, actual_output) #calculate the loss, difference between the output and the desired outcome tensors
  #calculate the loss, difference between the output and the desired outcome tensors
  total_test_loss+=loss.item()
  cur_preds0=out2labels(rnn_flat_out,all_phones)
  #actual_phones=[v for v in list_phones if v]
  actual_phones=[""]+[v for v in cur_phones if v]
  print("actual_phones",actual_phones)
  tmp_pred_offsets=[]
  for ac_i,actual_label in enumerate(actual_phones):
    cp0=cur_preds0[ac_i]
    cur_pred_vals=[(v[0],round(v[1].item(),2)) for v in cp0]
    pred_just_labels=[v[0] for v in cur_pred_vals]
    correct_label_pred_offset=pred_just_labels.index(actual_label)
    tmp_pred_offsets.append(correct_label_pred_offset)
    print(ac_i,"actual:",actual_label,"offset:", correct_label_pred_offset,"predicted",cur_pred_vals[:5])
  cur_avg_pred_offsets=sum(tmp_pred_offsets)/len(tmp_pred_offsets)
  print(">>> cur_avg_pred_offsets",cur_avg_pred_offsets)
  all_pred_offsets.append(cur_avg_pred_offsets)
  total_test_offset+=cur_avg_pred_offsets
  print("----------")




237
Saad.wav
torch.Size([1, 1, 4200])




actual_phones ['', 'S', 'aa', 'd']
0 actual:  offset: 0 predicted [('', 1.0), ('+', 0.0), ('q', 0.0), ('t', 0.0), ('aa', 0.0)]
1 actual: S offset: 16 predicted [('e', 0.23), ('3', 0.11), ('m', 0.09), ('d', 0.09), ('a', 0.07)]
2 actual: aa offset: 8 predicted [('a', 0.35), ('l', 0.15), ('e', 0.12), ('n', 0.08), ('o', 0.07)]
3 actual: d offset: 7 predicted [('+', 0.23), ('n', 0.11), ('t', 0.11), ('l', 0.08), ('m', 0.07)]
>>> cur_avg_pred_offsets 7.75
----------
raa2.wav
torch.Size([1, 1, 4200])




actual_phones ['', 'r', 'aa', '2']
0 actual:  offset: 0 predicted [('', 1.0), ('+', 0.0), ('q', 0.0), ('oo', 0.0), ('ei', 0.0)]
1 actual: r offset: 20 predicted [('e', 0.23), ('3', 0.11), ('d', 0.09), ('m', 0.09), ('a', 0.07)]
2 actual: aa offset: 8 predicted [('a', 0.35), ('l', 0.15), ('e', 0.12), ('n', 0.08), ('o', 0.07)]
3 actual: 2 offset: 24 predicted [('+', 0.23), ('n', 0.11), ('t', 0.11), ('l', 0.08), ('m', 0.07)]
>>> cur_avg_pred_offsets 13.0
----------
baa2.wav




torch.Size([1, 1, 4200])
actual_phones ['', 'b', 'aa', '2']
0 actual:  offset: 0 predicted [('', 1.0), ('+', 0.0), ('q', 0.0), ('S', 0.0), ('t', 0.0)]
1 actual: b offset: 6 predicted [('e', 0.23), ('3', 0.11), ('d', 0.09), ('m', 0.09), ('a', 0.07)]
2 actual: aa offset: 8 predicted [('a', 0.35), ('l', 0.15), ('e', 0.12), ('n', 0.08), ('o', 0.07)]
3 actual: 2 offset: 28 predicted [('+', 0.23), ('n', 0.11), ('t', 0.11), ('l', 0.08), ('m', 0.07)]
>>> cur_avg_pred_offsets 10.5
----------
waaw.wav




torch.Size([1, 1, 4200])
actual_phones ['', 'w', 'aa', 'w']
0 actual:  offset: 0 predicted [('', 1.0), ('+', 0.0), ('t', 0.0), ('g', 0.0), ('q', 0.0)]
1 actual: w offset: 14 predicted [('e', 0.23), ('3', 0.11), ('d', 0.09), ('m', 0.09), ('a', 0.07)]
2 actual: aa offset: 8 predicted [('a', 0.35), ('l', 0.15), ('e', 0.12), ('n', 0.08), ('o', 0.07)]
3 actual: w offset: 14 predicted [('+', 0.23), ('n', 0.11), ('t', 0.11), ('l', 0.08), ('m', 0.07)]
>>> cur_avg_pred_offsets 9.0
----------
alif.wav




torch.Size([1, 1, 4200])
actual_phones ['', 'a', 'l', 'i', 'f']
0 actual:  offset: 0 predicted [('', 1.0), ('+', 0.0), ('aa', 0.0), ('q', 0.0), ('th', 0.0)]
1 actual: a offset: 4 predicted [('e', 0.23), ('3', 0.11), ('d', 0.09), ('m', 0.09), ('a', 0.07)]
2 actual: l offset: 1 predicted [('a', 0.35), ('l', 0.15), ('e', 0.12), ('n', 0.08), ('o', 0.07)]
3 actual: i offset: 24 predicted [('+', 0.23), ('n', 0.11), ('t', 0.11), ('l', 0.08), ('m', 0.07)]
4 actual: f offset: 25 predicted [('+', 0.17), ('a', 0.13), ('aa', 0.11), ('k', 0.06), ('3', 0.06)]
>>> cur_avg_pred_offsets 10.8
----------
khaa2.wav




torch.Size([1, 1, 4200])
actual_phones ['', 'kh', 'aa', '2']
0 actual:  offset: 0 predicted [('', 1.0), ('+', 0.0), ('th', 0.0), ('t', 0.0), ('aa', 0.0)]
1 actual: kh offset: 10 predicted [('e', 0.23), ('3', 0.11), ('d', 0.09), ('m', 0.09), ('a', 0.07)]
2 actual: aa offset: 8 predicted [('a', 0.35), ('l', 0.15), ('e', 0.12), ('n', 0.08), ('o', 0.07)]
3 actual: 2 offset: 28 predicted [('+', 0.23), ('n', 0.11), ('t', 0.11), ('l', 0.08), ('m', 0.07)]
>>> cur_avg_pred_offsets 11.5
----------
7aa2.wav




torch.Size([1, 1, 4200])
actual_phones ['', '7', 'aa', '2']
0 actual:  offset: 0 predicted [('', 1.01), ('+', 0.0), ('oa', 0.0), ('th', 0.0), ('7', 0.0)]
1 actual: 7 offset: 15 predicted [('e', 0.23), ('3', 0.11), ('d', 0.09), ('m', 0.09), ('a', 0.07)]
2 actual: aa offset: 8 predicted [('a', 0.34), ('l', 0.15), ('e', 0.12), ('n', 0.08), ('o', 0.07)]
3 actual: 2 offset: 35 predicted [('+', 0.23), ('n', 0.11), ('t', 0.11), ('l', 0.08), ('m', 0.07)]
>>> cur_avg_pred_offsets 14.5
----------
taa2-marbooTah.wav




torch.Size([1, 1, 4200])
actual_phones ['', 't', 'aa', '2', '+', 'm', 'a', 'r', 'b', 'oo', 'T', 'a', 'h']
0 actual:  offset: 0 predicted [('', 1.0), ('+', 0.0), ('7', 0.0), ('oa', 0.0), ('sh', 0.0)]
1 actual: t offset: 5 predicted [('e', 0.23), ('3', 0.11), ('d', 0.09), ('m', 0.09), ('a', 0.07)]
2 actual: aa offset: 8 predicted [('a', 0.35), ('l', 0.15), ('e', 0.12), ('n', 0.08), ('o', 0.07)]
3 actual: 2 offset: 32 predicted [('+', 0.23), ('n', 0.11), ('t', 0.11), ('l', 0.08), ('m', 0.07)]
4 actual: + offset: 0 predicted [('+', 0.17), ('a', 0.13), ('aa', 0.11), ('k', 0.06), ('3', 0.06)]
5 actual: m offset: 9 predicted [('a', 0.15), ('aa', 0.1), ('e', 0.08), ('+', 0.08), ('o', 0.06)]
6 actual: a offset: 3 predicted [('+', 0.15), ('n', 0.13), ('', 0.13), ('a', 0.1), ('t', 0.08)]
7 actual: r offset: 10 predicted [('', 0.19), ('a', 0.11), ('aa', 0.08), ('+', 0.07), ('k', 0.05)]
8 actual: b offset: 1 predicted [('', 0.25), ('b', 0.08), ('a', 0.08), ('+', 0.07), ('e', 0.07)]
9 actual: oo off



torch.Size([1, 1, 4200])
actual_phones ['', 'T', 'aa', '2']
0 actual:  offset: 0 predicted [('', 1.0), ('+', 0.0), ('q', 0.0), ('g', 0.0), ('aa', 0.0)]
1 actual: T offset: 30 predicted [('e', 0.23), ('3', 0.11), ('m', 0.09), ('d', 0.09), ('a', 0.07)]
2 actual: aa offset: 8 predicted [('a', 0.35), ('l', 0.15), ('e', 0.12), ('n', 0.07), ('o', 0.07)]
3 actual: 2 offset: 23 predicted [('+', 0.23), ('n', 0.11), ('t', 0.11), ('l', 0.08), ('m', 0.07)]
>>> cur_avg_pred_offsets 15.25
----------
laam-alif.wav
torch.Size([1, 1, 4200])
actual_phones ['', 'l', 'aa', 'm', '+', 'a', 'l', 'i', 'f']
0 actual:  offset: 0 predicted [('', 1.0), ('+', 0.0), ('q', 0.0), ('oo', 0.0), ('aa', 0.0)]
1 actual: l offset: 18 predicted [('e', 0.23), ('3', 0.11), ('d', 0.09), ('m', 0.09), ('a', 0.07)]
2 actual: aa offset: 8 predicted [('a', 0.35), ('l', 0.15), ('e', 0.12), ('n', 0.08), ('o', 0.07)]
3 actual: m offset: 4 predicted [('+', 0.23), ('n', 0.11), ('t', 0.11), ('l', 0.08), ('m', 0.07)]
4 actual: + offset: 0

In [None]:
#Download zip file of recordings
!wget https://champolu.com/audio/ar-alphabet.zip
!unzip ar-alphabet.zip

--2022-03-20 17:49:57--  https://champolu.com/audio/ar-alphabet.zip
Resolving champolu.com (champolu.com)... 107.180.44.147
Connecting to champolu.com (champolu.com)|107.180.44.147|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 41776326 (40M) [application/zip]
Saving to: ‘ar-alphabet.zip’


2022-03-20 17:50:00 (15.4 MB/s) - ‘ar-alphabet.zip’ saved [41776326/41776326]



In [None]:

model_dir=os.path.join(cwd,"mfcc_models1") 
if not os.path.exists(model_dir): os.makedirs(model_dir)

#n_input=129
n_input=13
n_hidden =64
n_layers=2
depth=40
LR=0.00001

feature_vec0,label_vec0,phones0=train_set[0]
print(feature_vec0.shape)
# print(train_set[0][0].shape)
# print(train_set[0][1].shape)
flattened_out=label_vec0.ravel()
print(flattened_out.shape)
original_out_shape=label_vec0.shape
print("original_out_shape", original_out_shape)
depth=flattened_out.shape[0]
n_output=depth
n_input=feature_vec0.shape[1]
print("depth",depth)

rnn = RNN(n_input, n_hidden, depth,n_layers, matching_in_out=False)

n_data_points=3000
input_tensor=torch.rand((n_data_points, n_input))
input_tensor=train_set[1][0]
input_tensor=input_tensor.astype(np.float32)

output = rnn(input_tensor)
print("input tensor shape", input_tensor.shape)
#print(output)
print("output tensor shape", output.shape)

loss_func = nn.MSELoss()

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all cnn parameters
fopen=open("loss-analysis.txt","w")

for e0 in range(100):
  total_train_loss=0
  total_test_loss=0
  print("epoch #",e0)
  print("train_set size",len(train_set))
  for i0,tr0 in enumerate(train_set):
    if i0%50==0: print(i0)
    feature_vec0,label_vec0,list_phones=tr0
    flattened_out=label_vec0.ravel().astype(np.float32)
    flattened_out=torch.tensor(flattened_out)
    rnn.hidden = rnn.init_hidden()
    rnn.zero_grad()
    input_tensor=feature_vec0 #torch.rand((n_data_points, n_input)) #n_input = 129
    input_tensor=input_tensor.astype(np.float32)
    output = rnn(input_tensor)
    output=torch.flatten(output)
    loss = loss_func(output, flattened_out) #calculate the loss, difference between the output and the desired outcome tensors
    loss.backward()
    optimizer.step()  
    total_train_loss+=loss.item()
    unflattened_output=output.view(original_out_shape)
    test=get_label_from_tensor(unflattened_output,all_phones)
    # if e0>2:
    #   print("actual phones:",[v for v in list_phones if v])
    #   for t0 in test[:5]:
    #     print(t0[:3])
    #   print("----")
  print("now working on test set")
  for i0,tr0 in enumerate(test_set):
    if i0%50==0: print(i0)
    feature_vec0,label_vec0,list_phones=tr0
    flattened_out=label_vec0.ravel().astype(np.float32)
    flattened_out=torch.tensor(flattened_out)
    rnn.hidden = rnn.init_hidden()
    rnn.zero_grad()
    input_tensor=feature_vec0 #torch.rand((n_data_points, n_input)) #n_input = 129
    input_tensor=input_tensor.astype(np.float32)
    output = rnn(input_tensor)
    output=torch.flatten(output)
    loss = loss_func(output, flattened_out) #calculate the loss, difference between the output and the desired outcome tensors
    total_test_loss+=loss.item()
    # loss.backward()
    # optimizer.step()  
    unflattened_output=output.view(original_out_shape)
    test=get_label_from_tensor(unflattened_output,all_phones)
    all_pred_offsets=[]
    if e0>=0:
      #print("TEST SET")
      actual_phones=[v for v in list_phones if v]
      #print("actual phones:",actual_phones)
      tmp_pred_offsets=[]
      
      for t_i,t0 in enumerate(test[:len(actual_phones)+1]):
        correct_phone=list_phones[t_i]
        correct_found=[]
        for t_i_2,t0_2 in enumerate(t0):
          if t0_2[0]==correct_phone: 
            correct_found=(t_i_2,t0_2[0],t0_2[1])
            tmp_pred_offsets.append(t_i_2)
            break
        #print(correct_found,t0[:3])
      #print(correct_found_locs)
      pred_offset_avg=sum(tmp_pred_offsets)/len(tmp_pred_offsets)
      all_pred_offsets.append(pred_offset_avg)
      #print("average:",sum(correct_found_locs)/len(correct_found_locs))
      #print("----")
  
  avg_train_loss=round(total_train_loss/len(train_set),4)
  avg_test_loss=round(total_test_loss/len(test_set), 4) 
  avg_test_pred_offset=round(sum(all_pred_offsets)/len(all_pred_offsets) ,4)
  print("train loss: %s - test loss: %s - prediction offset: %s"%(avg_train_loss,avg_test_loss, avg_test_pred_offset))
  PATH=os.path.join(model_dir, "model-%s.model"%e0)
  pickle_path=os.path.join(model_dir, "model-%s.pickle"%e0)
  pickle_dict={}
  pickle_dict["n_output"]=n_output
  pickle_dict["n_input"]=n_input
  pickle_dict["n_hidden"]=n_hidden
  pickle_dict["n_layers"]=n_layers
  #pickle_dict["out2labels"]=out2labels
  pickle_dict["labels"]=all_phones
  pickle_dict["train_loss"]=avg_train_loss
  pickle_dict["test_loss"]=avg_test_loss
  pickle_dict["pred_offset"]=avg_test_pred_offset

  numpy_state_dict={}
  for a,b in rnn.state_dict().items():
    numpy_state_dict[a]=b.numpy()
  pickle_dict["state_dict"]=numpy_state_dict
  with open(pickle_path, 'wb') as f:
    pickle.dump(pickle_dict, f, pickle.HIGHEST_PROTOCOL)


  torch.save({
              'epoch': e0,
              'model_state_dict': rnn.state_dict(),
              'optimizer_state_dict': optimizer.state_dict(),
              'train_loss': avg_train_loss,
              'test_loss': avg_test_loss
              }, PATH)


# for fname in files:
#   json_file_path=""
#   wav_file_path=""
#   features=extract_features(wav_file_path)
#   labels=extract_labels(json_file_path)
#   labels_tensor=convert2tensor(labels)
#   n_data_points=len(labels)

#   rnn.hidden = rnn.init_hidden()
#   rnn.zero_grad()


#   input_tensor=torch.rand((n_data_points, n_input)) #n_input = 129
#   output = rnn(input_tensor)

#   loss = loss_func(output, labels_tensor) #calculate the loss, difference between the output and the desired outcome tensors

 
#   loss.backward()
#   optimizer.step()  


  # # a=random.randint(0,9) #start from a random number
  # # rand_tensor = 0.2*torch.rand((3, 4)) + a #generating input tensor from the random number, that consists of random numbers +/- 0.1 of the random number
  # # outcome=[0.]*n_output #initializing outcome tensor
  # # outcome[a]=1. #filling the index corresponding to the generated random number, which is the outcome
  # # outcome_tensor=torch.tensor(outcome).view([1,1,n_output]) #convert it to tensor with shape (1,1,size of outcome/output)
  # for i in range(len(rand_tensor)): #feed the network sequentially with the input tensors
  #   cur_tensor=rand_tensor[i].view([1,1,n_input])
  #   output = rnn(cur_tensor)

(195, 26)
(4200,)
original_out_shape (100, 42)
depth 4200
input tensor shape (153, 26)
output tensor shape torch.Size([1, 1, 4200])
epoch # 0
train_set size 373
0
50
100
150
200
250
300
350
now working on test set
0
train loss: 0.0369 - test loss: 0.0346 - prediction offset: 17.5926
epoch # 1
train_set size 373
0
50
100
150
200
250
300
350
now working on test set
0
train loss: 0.0329 - test loss: 0.0319 - prediction offset: 16.963
epoch # 2
train_set size 373
0
50
100
150
200
250
300
350
now working on test set
0
train loss: 0.0307 - test loss: 0.0297 - prediction offset: 17.0741
epoch # 3
train_set size 373
0
50
100
150
200
250
300
350
now working on test set
0
train loss: 0.0285 - test loss: 0.0273 - prediction offset: 17.2222
epoch # 4
train_set size 373
0
50
100
150
200
250
300
350
now working on test set
0
train loss: 0.0257 - test loss: 0.0243 - prediction offset: 16.5926
epoch # 5
train_set size 373
0
50
100
150
200
250
300
350
now working on test set
0
train loss: 0.0225 - test

KeyboardInterrupt: ignored

#NUMPY LSTM Model

In [None]:
import numpy as np

import torch
from torch import nn

def sigmoid(x): 
    return 1. / (1 + np.exp(-x))
def softmax(x):
    e_x = np.exp(x - np.max(x)) # max(x) subtracted for numerical stability
    return e_x / np.sum(e_x)

def get_params(state_dict0): #get network parameters from state dict/understand network architecture from state dict
  params={}
  all_layers=[]
  for a,b in state_dict0.items():
    cur_shape=b.shape
    a0=a.split(".")[-1]
    if a0=="weight_ih_l0": 
      params["n_hidden"],params["n_input"]=int(cur_shape[0]/4),cur_shape[1]
    last_a=a0.split("_")[-1]
    if last_a.startswith("l") and not last_a in all_layers: all_layers.append(last_a)
    if a0== "weight": params["fc_weight"]=b
    if a0== "bias": 
      params["fc_bias"]=b
      params["n_output"]=cur_shape[0]
  params["n_layers"]=len(all_layers)
  return params


def numpy_lstm(data_input,state_dict0):
  #we can get the number of layers and number of hidden from the state dict
  cur_params=get_params(state_dict0)
  n_hidden=cur_params["n_hidden"]
  new_stat_dict={}
  for a,b in state_dict0.items(): #just to handle whether th state dict has torch tensors or numpy arrays
    try: new_stat_dict[a]=b.numpy() #if torch tensors, convert to numpy
    except: new_stat_dict[a]=b #otherwise, keep it
  state_dict0=new_stat_dict
  for layer_i in range(cur_params["n_layers"]):
    layer="l%s"%layer_i
    #Event (x) Weights and Biases for all gates
    Weights_xi = state_dict0['lstm.weight_ih_'+layer][0:n_hidden]  # shape  [h, x]
    Weights_xf = state_dict0['lstm.weight_ih_'+layer][n_hidden:2*n_hidden]  # shape  [h, x]
    Weights_xl = state_dict0['lstm.weight_ih_'+layer][2*n_hidden:3*n_hidden]  # shape  [h, x]
    Weights_xo = state_dict0['lstm.weight_ih_'+layer][3*n_hidden:4*n_hidden] # shape  [h, x]

    Bias_xi = state_dict0['lstm.bias_ih_'+layer][0:n_hidden]  #shape is [h, 1]
    Bias_xf = state_dict0['lstm.bias_ih_'+layer][n_hidden:2*n_hidden]  #shape is [h, 1]
    Bias_xl = state_dict0['lstm.bias_ih_'+layer][2*n_hidden:3*n_hidden]  #shape is [h, 1]
    Bias_xo = state_dict0['lstm.bias_ih_'+layer][3*n_hidden:4*n_hidden] #shape is [h, 1]

    #Hidden state (h) Weights and Biases for all gates
    Weights_hi = state_dict0['lstm.weight_hh_'+layer][0:n_hidden]  #shape is [h, h]
    Weights_hf = state_dict0['lstm.weight_hh_'+layer][n_hidden:2*n_hidden]  #shape is [h, h]
    Weights_hl = state_dict0['lstm.weight_hh_'+layer][2*n_hidden:3*n_hidden]  #shape is [h, h]
    Weights_ho = state_dict0['lstm.weight_hh_'+layer][3*n_hidden:4*n_hidden] #shape is [h, h]

    Bias_hi = state_dict0['lstm.bias_hh_'+layer][0:n_hidden]  #shape is [h, 1]
    Bias_hf = state_dict0['lstm.bias_hh_'+layer][n_hidden:2*n_hidden]  #shape is [h, 1]
    Bias_hl = state_dict0['lstm.bias_hh_'+layer][2*n_hidden:3*n_hidden]  #shape is [h, 1]
    Bias_ho = state_dict0['lstm.bias_hh_'+layer][3*n_hidden:4*n_hidden] #shape is [h, 1]

    #Initialize cell and hidden states with zeroes
    h = np.zeros(n_hidden)
    c = np.zeros(n_hidden)

    #Loop through data, updating the hidden and cell states after each pass
    out_list=[]
    all_output=[]
    for eventx in data_input:
      f = forget_gate(eventx, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, c)
      i =  input_gate(eventx, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, 
                    Weights_hl, Bias_hl, Weights_xl, Bias_xl)
      c = cell_state(f,i)
      h = output_gate(eventx, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, c)
      out_list.append(h)
      #cur_output=model_output(h, fc_Weight, fc_Bias)
      #all_output.append(cur_output)
    data_input=np.array(out_list)
    #print(data_input)
  return  data_input
def fully_connected(lstm_out,state_dict0):
  cur_params=get_params(state_dict0)
  fc_wt,fc_bias=cur_params["fc_weight"],cur_params["fc_bias"]
  try: fc_wt,fc_bias=fc_wt.numpy(),fc_bias.numpy()
  except: pass
  all_output=[]
  for lstm_item in lstm_out:
    cur_output=np.dot(fc_wt, lstm_item) + fc_bias
    all_output.append(cur_output)
  return np.array(all_output)



# lstm_output=numpy_lstm(data,state)
# fc_output=fully_connected(lstm_output,state)
# fc_output_sigmoid=sigmoid(fc_output) 
# fc_output_softmax=softmax(fc_output) 


#Initialize an PyTorch LSTM for comparison to our Numpy LSTM
class LSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers=1):
        super(LSTM, self).__init__()
        self.hidden_dim=hidden_dim
        #LSTM Layer
        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)
        #Final, fully-connected layer
        self.fc = nn.Linear(hidden_dim, output_size)

    def forward(self, x, hidden):
        batch_size = 1
        # get LSTM outputs
        lstm_output, (h,c) = self.lstm(x, hidden)
        # shape output to be (batch_size*seq_length, hidden_dim)
        lstm_output = lstm_output.view(-1, self.hidden_dim)  
        
        # get final output 
        model_output = self.fc(lstm_output)
        
        return model_output, (h,c)
      
torch.manual_seed(5)
#rPyTorch expects an extra dimension for batch size:

data = np.array(
           [[1,1],
            [2,2],
            [3,3]])


input_size  = 2 # size of one 'event', or sample, in our batch of data
n_hidden = hidden_dim  = 16 # 3 cells in the LSTM layer
output_size = 6 # desired model output
num_layers=2


torch_lstm = LSTM(input_size = input_size, 
                 hidden_dim = hidden_dim,
                 output_size = output_size,
                  n_layers=num_layers
                 )
state = torch_lstm.state_dict()
# fc_Weight = state['fc.weight'].numpy() #shape is [h, output_size]
# fc_Bias = state['fc.bias'].numpy() #shape is [,output_size]

torch_batch = torch.Tensor(data).unsqueeze(0) 
torch_output, (torch_hidden, torch_cell) = torch_lstm(torch_batch, None)
print("torch_output:", torch_output)

lstm_output=numpy_lstm(data,state)
fc_output=fully_connected(lstm_output,state)
fc_output_sigmoid=sigmoid(fc_output) 
fc_output_softmax=softmax(fc_output) 
print("numpy LSTM output:", fc_output)
# print("fc_output_sigmoid",fc_output_sigmoid)
# print("fc_output_softmax",fc_output_softmax)


torch_output: tensor([[-0.1531,  0.0724, -0.0483,  0.2170, -0.2389, -0.1048],
        [-0.1804,  0.0846, -0.0497,  0.2260, -0.2425, -0.0755],
        [-0.2051,  0.0965, -0.0449,  0.2260, -0.2488, -0.0592]],
       grad_fn=<AddmmBackward0>)
numpy LSTM output: [[-0.15305544  0.07241841 -0.04832025  0.21703622 -0.23894376 -0.1047642 ]
 [-0.18038092  0.08460638 -0.0496739   0.22595658 -0.2425259  -0.07545674]
 [-0.20506505  0.09647481 -0.04485004  0.22601635 -0.24875245 -0.0591723 ]]


#Extracting features and applying it to the model

In [None]:
import os,pickle, random
import numpy as np
from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav


#torch.manual_seed(1)
random.seed(1)


e0=21
model_dir=os.path.join(cwd,"mfcc_models") 
pickle_path=os.path.join(model_dir, "model-%s.pickle"%e0)

with open(pickle_path, 'rb') as f:
    data_dict = pickle.load(f)
print(data_dict.keys())
cur_state_dict=data_dict["state_dict"]
labels=data_dict["labels"]

for a,b in cur_state_dict.items():
  print(a,b.shape)

#Input processing/ feature extraction
def get_mfcc(wav_fpath0):
  (rate,sig) = wav.read(wav_fpath0)
  if len(sig.shape)>1: sig= sig.sum(axis=1) / 2 #handle mono/sterio
  return mfcc(sig,rate)


def get_fbank(wav_fpath0):
  (rate,sig) = wav.read(wav_fpath0)
  if len(sig.shape)>1: sig= sig.sum(axis=1) / 2 #handle mono/sterio
  return logfbank(sig,rate)

def extract_features(wav_fpath0): return get_fbank(wav_fpath0)

#Output processing
def out2labels(rnn_flat_out,label_list): #a flat rnn output to split into slices, and get the label weights for each slice
  final_list=[]
  n_slices=int(len(rnn_flat_out)/len(label_list))
  for i0 in range(n_slices):
    i1=i0+1
    cur_slice=rnn_flat_out[i0*len(label_list):i1*len(label_list)]
    tmp_list=[]
    for lb0,cs0 in zip(label_list,cur_slice): tmp_list.append((lb0,cs0))
    tmp_list.sort(key=lambda x:-x[-1])
    final_list.append(tmp_list)
  return final_list


cur_dir="basic"
wav_file_list=os.listdir(cur_dir)
test_i=47
for fname in wav_file_list[test_i:test_i+1]:
  wav_fpath=os.path.join(cur_dir,fname)
  print(wav_fpath)
  cur_feature_list=extract_features(wav_fpath)
  cur_feature_list=np.array(cur_feature_list)

  lstm_output=numpy_lstm(cur_feature_list,cur_state_dict)
  fc_output=fully_connected(lstm_output,cur_state_dict)
  fc_output_sigmoid=sigmoid(fc_output) 
  print(fc_output_sigmoid.shape)
  last_rnn_sigmoid_out=fc_output_sigmoid[-1]
  output_lable_list=out2labels(last_rnn_sigmoid_out,labels)
  for a in output_lable_list[:10]:
    print([(v[0],round(v[1],4)) for v in a[:10]])
  #print(last_rnn_sigmoid_out.shape)




dict_keys(['n_output', 'n_input', 'n_hidden', 'n_layers', 'labels', 'train_loss', 'test_loss', 'pred_offset', 'state_dict'])
lstm.weight_ih_l0 (128, 26)
lstm.weight_hh_l0 (128, 32)
lstm.bias_ih_l0 (128,)
lstm.bias_hh_l0 (128,)
lstm.weight_ih_l1 (128, 32)
lstm.weight_hh_l1 (128, 32)
lstm.bias_ih_l1 (128,)
lstm.bias_hh_l1 (128,)
hidden2out.weight (4200, 32)
hidden2out.bias (4200,)
basic/ga3aan.wav
(118, 4200)
[('', 0.7292), ('?', 0.5013), ('2', 0.5008), ('q', 0.5008), ('7', 0.5007), ('ee', 0.5006), ('3', 0.5005), ('n', 0.5004), ('h', 0.5004), ('w', 0.5004)]
[('e', 0.5669), ('3', 0.5272), ('m', 0.5261), ('d', 0.5207), ('a', 0.5198), ('b', 0.5133), ('t', 0.5114), ('f', 0.511), ('h', 0.5089), ('k', 0.5086)]
[('a', 0.5768), ('l', 0.5463), ('e', 0.528), ('o', 0.5168), ('n', 0.5147), ('i', 0.5139), ('ee', 0.5136), ('ai', 0.5109), ('oa', 0.5037), ('r', 0.5034)]
[('+', 0.5625), ('n', 0.5288), ('t', 0.5262), ('l', 0.5215), ('h', 0.5144), ('m', 0.5133), ('b', 0.5109), ('', 0.5088), ('aa', 0.5081),

#Alphabet data from clasification project

In [None]:
conversion_dict={}
conversion_dict["Aain"]="3ayn"
conversion_dict["Alif"]="alif"
conversion_dict["BA"]="baa2"
conversion_dict["Ba"]="baa2"
conversion_dict["Dal"]="daal"
conversion_dict["Duad"]="Daad"
conversion_dict["Faa"]="faa2"
conversion_dict["Ghain"]="ghayn"
conversion_dict["Haa"]="haa2"
conversion_dict["Hamzah"]="hamzah"
conversion_dict["Hha"]="7aa2"
conversion_dict["Jeem"]="jeem"
conversion_dict["Kaif"]="kaaf"
conversion_dict["Kha"]="khaa2"
conversion_dict["Laam"]="laam"
conversion_dict["Lam"]="laam"
conversion_dict["Meem"]="meem"
conversion_dict["Noon"]="noon"
conversion_dict["Qauf"]="qaaf"
conversion_dict["Quaf"]="qaaf"
conversion_dict["Raa"]="raa2"
conversion_dict["Sa"]="Saad"
conversion_dict["Saud"]="Saad"
conversion_dict["Suad"]="Saad"

conversion_dict["Seen"]="seen"
conversion_dict["Sheen"]="sheen"
conversion_dict["Ta"]="taa2"
conversion_dict["Tua"]="Taa2"
conversion_dict["Wao"]="waaw"
conversion_dict["Waow"]="waaw"

conversion_dict["Yaa"]="yaa2"
conversion_dict["Za"]="zaa"
conversion_dict["Zaa"]="zaa"
conversion_dict["Zaal"]="dhaal"
conversion_dict["Zua"]="DHaa2"

conversion_dict["Zuad"]="zaa"
conversion_dict["Zaa"]="zaa"
conversion_dict["Zaal"]="dhaal"
conversion_dict["Zua"]="DHaa2"

conversion_dict["aain"]="3ayn"
conversion_dict["dal"]="daal"
conversion_dict["duad"]="Daad"
conversion_dict["faa"]="faa2"

conversion_dict["ghain"]="ghayn"
conversion_dict["hamzah"]="hamzah"
conversion_dict["hha"]="7aa2"
conversion_dict["jeem"]="jeem"
conversion_dict["kaaf"]="kaaf"
conversion_dict["kaif"]="kaaf"

conversion_dict["qauf"]="qaaf"
conversion_dict["seen"]="seen"
conversion_dict["sheen"]="sheen"
conversion_dict["suad"]="Saad"

conversion_dict["tua"]="Taa2"
conversion_dict["wao"]="waaw"
conversion_dict["zhal"]="dhaal"
conversion_dict["Zhal"]="dhaal"
conversion_dict["zua"]="Dhaa2"


In [None]:
import shutil
cur_dir='ar_alphabet_classification_data'
copy_dir="cleaned_classification_data"
dir_path=os.path.join(cwd,copy_dir)
if not os.path.exists(dir_path): os.makedirs(dir_path)
files=os.listdir(cur_dir)
id_count_dict={}
tmp_list=[]
for fi in files:
  src_fpath=os.path.join(cur_dir,fi)
  if not fi.endswith(".wav"): continue
  corr_id=None
  fname_split=fi.split("_")
  for tmp_id in fname_split:
    corr_id=conversion_dict.get(tmp_id)
    if corr_id!=None: break
  if corr_id==None: continue
  cur_count=id_count_dict.get(corr_id,0)
  new_fname="%s.%s.wav"%(corr_id, cur_count)
  id_count_dict[corr_id]=cur_count+1
  new_fpath=os.path.join(copy_dir,new_fname)
  print(fi, corr_id)
  print(src_fpath, new_fpath)
  shutil.copy(src_fpath,new_fpath)
  #tmp_list.append(fi.split("_")[-2])
  #print()
tmp_list=sorted(list(set(tmp_list)))
print(tmp_list)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
ar_alphabet_classification_data/0_2_Taj_Ta_T (40).wav cleaned_classification_data/taa2.79.wav
0_6_Taj_Kha_T (19).wav khaa2
ar_alphabet_classification_data/0_6_Taj_Kha_T (19).wav cleaned_classification_data/khaa2.131.wav
0_6_Taj_Kha_T (30).wav khaa2
ar_alphabet_classification_data/0_6_Taj_Kha_T (30).wav cleaned_classification_data/khaa2.132.wav
0_2_Taj_Ta_T (5).wav taa2
ar_alphabet_classification_data/0_2_Taj_Ta_T (5).wav cleaned_classification_data/taa2.80.wav
0_6_Taj_Kha_T (31).wav khaa2
ar_alphabet_classification_data/0_6_Taj_Kha_T (31).wav cleaned_classification_data/khaa2.133.wav
0_6_Taj_Kha_T (72).wav khaa2
ar_alphabet_classification_data/0_6_Taj_Kha_T (72).wav cleaned_classification_data/khaa2.134.wav
0_6_Taj_Kha_T (26).wav khaa2
ar_alphabet_classification_data/0_6_Taj_Kha_T (26).wav cleaned_classification_data/khaa2.135.wav
0_6_Taj_Kha_T (73).wav khaa2
ar_alphabet_classification_data/0_6_Taj_Kha_T (73).wav cleaned_

#Old Trials

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
d = sp0.sum(axis=1) / 2

In [None]:
d.shape

(1, 73728)

In [None]:
d

array([[0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [None]:
import numpy as np
import os,json
from scipy import signal
from scipy.io import wavfile
import pywt


wav_fpath="basic/ga3aan-aw-3aTshaan.wav"
#wav_fpath="/content/drive/MyDrive/speech_project/arabic-speech-corpus/wav/ARA NORM  1803.wav"
#sample_rate, samples = wavfile.read(wav_fpath)


sample_rate, samples = wavfile.read(wav_fpath)
if len(samples.shape)>1: samples= samples.sum(axis=1) / 2 #handle mono/sterio
frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)
file_duration=len(samples)/sample_rate
spectrogram=spectrogram.transpose()

print(samples.shape)
print(len(samples.shape))
print(samples[:10])

scales = (1, len(samples))
coefficient, frequency = pywt.cwt(samples, scales, 'gaus1')

# a=samples.sum(axis=1) / 2
# print(a[:10])
# print(a.shape)



(73728,)
1
[215. 335. 219. 229. 280. 165. 279. 204. 264. 254.]


In [None]:
import scipy, pywt
wavefile = "basic/ga3aan-aw-3aTshaan.wav"
# read the wavefile
sampling_frequency, signal2 = scipy.io.wavfile.read(wavefile)
#
scales = (1, len(signal2))
#coefficient, frequency = pywt.cwt(signal2, scales, 'db1')
cA,cD=pywt.dwt(signal2, 'bior6.8', 'per')
print(cA.shape)
print(cD.shape)

(73728, 1)
(73728, 1)


In [None]:
pywt.wavelist()

['bior1.1',
 'bior1.3',
 'bior1.5',
 'bior2.2',
 'bior2.4',
 'bior2.6',
 'bior2.8',
 'bior3.1',
 'bior3.3',
 'bior3.5',
 'bior3.7',
 'bior3.9',
 'bior4.4',
 'bior5.5',
 'bior6.8',
 'cgau1',
 'cgau2',
 'cgau3',
 'cgau4',
 'cgau5',
 'cgau6',
 'cgau7',
 'cgau8',
 'cmor',
 'coif1',
 'coif2',
 'coif3',
 'coif4',
 'coif5',
 'coif6',
 'coif7',
 'coif8',
 'coif9',
 'coif10',
 'coif11',
 'coif12',
 'coif13',
 'coif14',
 'coif15',
 'coif16',
 'coif17',
 'db1',
 'db2',
 'db3',
 'db4',
 'db5',
 'db6',
 'db7',
 'db8',
 'db9',
 'db10',
 'db11',
 'db12',
 'db13',
 'db14',
 'db15',
 'db16',
 'db17',
 'db18',
 'db19',
 'db20',
 'db21',
 'db22',
 'db23',
 'db24',
 'db25',
 'db26',
 'db27',
 'db28',
 'db29',
 'db30',
 'db31',
 'db32',
 'db33',
 'db34',
 'db35',
 'db36',
 'db37',
 'db38',
 'dmey',
 'fbsp',
 'gaus1',
 'gaus2',
 'gaus3',
 'gaus4',
 'gaus5',
 'gaus6',
 'gaus7',
 'gaus8',
 'haar',
 'mexh',
 'morl',
 'rbio1.1',
 'rbio1.3',
 'rbio1.5',
 'rbio2.2',
 'rbio2.4',
 'rbio2.6',
 'rbio2.8',
 'rbio3.1',

In [None]:
print(pywt.wavelist(kind='continuous'))

['cgau1', 'cgau2', 'cgau3', 'cgau4', 'cgau5', 'cgau6', 'cgau7', 'cgau8', 'cmor', 'fbsp', 'gaus1', 'gaus2', 'gaus3', 'gaus4', 'gaus5', 'gaus6', 'gaus7', 'gaus8', 'mexh', 'morl', 'shan']


In [None]:
len(samples)
# print(samples[0:10][0])
# print(samples[0][1])

# print(samples[10][0])
# print(samples[10][1])


215
215
234
234


In [None]:
!pip install python_speech_features

Collecting python_speech_features
  Downloading python_speech_features-0.6.tar.gz (5.6 kB)
Building wheels for collected packages: python-speech-features
  Building wheel for python-speech-features (setup.py) ... [?25l[?25hdone
  Created wheel for python-speech-features: filename=python_speech_features-0.6-py3-none-any.whl size=5888 sha256=d00fb594f7b5daefb010dd485307fe862da6f6e84df4fb46825af6a61dd50d04
  Stored in directory: /root/.cache/pip/wheels/b0/0e/94/28cd6afa3cd5998a63eef99fe31777acd7d758f59cf24839eb
Successfully built python-speech-features
Installing collected packages: python-speech-features
Successfully installed python-speech-features-0.6


In [None]:
from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav

def get_mfcc(wav_fpath0):
  (rate,sig) = wav.read(wav_fpath0)
  if len(sig.shape)>1: sig= sig.sum(axis=1) / 2 #handle mono/sterio
  return mfcc(sig,rate)

def get_fbank(wav_fpath0):
  (rate,sig) = wav.read(wav_fpath0)
  if len(sig.shape)>1: sig= sig.sum(axis=1) / 2 #handle mono/sterio
  return logfbank(sig,rate)


cur_wavefile = "basic/ga3aan-aw-3aTshaan.wav"
cur_wavefile="/content/drive/MyDrive/speech_project/arabic-speech-corpus/wav/ARA NORM  1803.wav"
#cur_wavefile="basic/3arabeyyetoh-el-3arabeyyah-beta3toh.wav"
# (rate,sig) = wav.read(cur_wavefile)
# mfcc_feat = mfcc(sig,rate)
# fbank_feat = logfbank(sig,rate)
# if len(sig.shape)>1: sig= sig.sum(axis=1) / 2 #handle mono/sterio
# frequencies, times, spectrogram = signal.spectrogram(sig,rate)

# print(sig.shape)
# print(spectrogram.shape)
# print(fbank_feat[1:3,:])
# print(fbank_feat.shape)
# print("mfcc", mfcc_feat.shape)
# print(mfcc_feat)

mfcc0=get_mfcc(cur_wavefile)
#mfcc0=get_fbank(cur_wavefile)
print(mfcc0.shape)
print(mfcc0)

  # This is added back by InteractiveShellApp.init_path()


(679, 26)
[[-6.19746729 -5.99381617 -5.276773   ...  5.48201802  8.01270006
   8.04649634]
 [-2.60093577 -2.90259519 -2.34477365 ...  5.52567012  7.91797019
   8.30487067]
 [-1.85467273 -1.9583237  -1.31726376 ...  5.67965708  7.73016014
   7.91001147]
 ...
 [-3.87000319 -3.89797274 -3.88823127 ...  5.47847471  7.74722577
   8.39613656]
 [-4.29668513 -4.87024882 -5.14827547 ...  5.95626795  7.93570826
   8.35177519]
 [-2.72414675 -3.92755005 -4.46265847 ...  5.70874143  7.3752326
   7.79495378]]


In [None]:
!tar -xvzf cv-corpus-8.0-2022-01-19-ar.tar.gz

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764599.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764603.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764604.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764645.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764647.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764649.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764653.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764654.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764714.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764720.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764721.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764722.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764723.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_24764842.mp3
cv-corpus-8.0-2022-01-19/ar/clips/common_voice_ar_247

In [None]:
import torch
import torch.nn as nn
rnn = nn.LSTM(10, 20, 2)
input = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input, (h0, c0))
print(output.shape)

torch.Size([5, 3, 20])


In [None]:
import torch, random
import torch.nn as nn
torch.manual_seed(1)
random.seed(1)
# m = nn.Linear(20, 30)
# input = torch.randn(128, 20)
m = nn.Linear(2, 3,bias=True)
m2 = nn.Linear(2, 3,bias=False)
input = torch.randn(5, 2)
output = m(input)
output2 = m2(input)
print(output.size())
print(output)
print(output2)

torch.Size([5, 3])
tensor([[-0.1227, -0.1239,  0.3805],
        [-0.5428,  0.7932,  0.6307],
        [-0.3832,  0.5874,  0.4384],
        [-0.2714,  0.6250,  0.1799],
        [ 0.2295,  0.0254, -0.4550]], grad_fn=<AddmmBackward0>)
tensor([[-0.2755, -0.6360,  0.6198],
        [ 0.2559,  0.3435, -0.0803],
        [ 0.1351,  0.1629, -0.0055],
        [ 0.1538,  0.2806, -0.1969],
        [-0.1990, -0.2238, -0.0244]], grad_fn=<MmBackward0>)


In [None]:
import torch
import torchvision

dummy_input = torch.randn(10, 3, 224, 224, device="cpu")
model = torchvision.models.alexnet(pretrained=True)

# Providing input and output names sets the display names for values
# within the model's graph. Setting these does not change the semantics
# of the graph; it is only for readability.
#
# The inputs to the network consist of the flat list of inputs (i.e.
# the values you would pass to the forward() method) followed by the
# flat list of parameters. You can partially specify names, i.e. provide
# a list here shorter than the number of inputs to the model, and we will
# only set that subset of names, starting from the beginning.
input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ]
output_names = [ "output1" ]

torch.onnx.export(model, dummy_input, "alexnet.onnx", verbose=True, input_names=input_names, output_names=output_names)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth


  0%|          | 0.00/233M [00:00<?, ?B/s]

KeyboardInterrupt: ignored

In [None]:
import numpy as np
import sys
import pandas as pd
import datetime
import sys
import random
import time
import math
from matplotlib import pyplot as plt

class Optimizer:
    #USE SAME DEFAULTS AS KERAS ADAM OPTIMIZER
    def __init__(self, lr=.1, beta_1=0.9, beta_2=0.999,
                 epsilon=0, decay=0., **kwargs):
        
        allowed_kwargs = {'clipnorm', 'clipvalue'}
        for k in kwargs:
            if k not in allowed_kwargs:
                raise TypeError('Unexpected keyword argument '
                                'passed to optimizer: ' + str(k))
        self.__dict__.update(kwargs)
        self.iterations = 1
        self.lr = lr
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.decay = decay
        self.epsilon = epsilon
        self.initial_decay = decay

    def get_ADAM(self, params, grads):

        original_shapes = [x.shape for x in params]
        params = [x.flatten() for x in params]
        grads = [x.flatten() for x in grads]
        
        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * self.iterations))

        t = self.iterations + 1
        lr_t = lr * (np.sqrt(1. - np.power(self.beta_2, t)) /
                     (1. - np.power(self.beta_1, t)))

        if not hasattr(self, 'ms'):
            self.ms = [np.zeros(p.shape) for p in params]
            self.vs = [np.zeros(p.shape) for p in params]
    
        ret = [None] * len(params)
        for i, p, g, m, v in zip(range(len(params)), params, grads, self.ms, self.vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * np.square(g)
            p_t = p - lr_t * m_t / (np.sqrt(v_t) + self.epsilon)
            self.ms[i] = m_t
            self.vs[i] = v_t
            ret[i] = p_t
        self.iterations += 1
  
        for i in range(len(ret)):
            ret[i] = ret[i].reshape(original_shapes[i])

        return np.array(ret)


    def get_SGD(self, w,p):
        for x,y in zip(w,p):
                    x+=self.lr*y
        return w[0],w[1],w[2],w[3],w[4],w[5],w[6],w[7],w[8],w[9]

def sigmoid(x): 
    return 1. / (1 + np.exp(-x))

def sigmoid_derivative(values): 
    return values*(1-values)

def tanh_derivative(values): 
    return 1. - values ** 2

# createst uniform random array w/ values in [a,b) and shape args
def rand_arr(a, b, *args): 
    np.random.seed(0)
    return (np.random.rand(*args) * (b - a) + a)*.1

class LstmParam:
    def __init__(self, mem_cell_ct, x_dim,optimization):
        self.mem_cell_ct = mem_cell_ct
        self.x_dim = x_dim
        concat_len = x_dim + mem_cell_ct
        
        self.opt=Optimizer()
        self.optimization=optimization

        # weight matrices
        self.wg = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
        self.wi = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len) 
        self.wf = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
        self.wo = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)

        # bias terms
        self.bg = rand_arr(-0.1, 0.1, mem_cell_ct) 
        self.bi = rand_arr(-0.1, 0.1, mem_cell_ct) 
        self.bf = rand_arr(-0.1, 0.1, mem_cell_ct) 
        self.bo = rand_arr(-0.1, 0.1, mem_cell_ct)


        
        # diffs (derivative of loss function w.r.t. all parameters)
        self.wg_diff = np.zeros((mem_cell_ct, concat_len)) 
        self.wi_diff = np.zeros((mem_cell_ct, concat_len)) 
        self.wf_diff = np.zeros((mem_cell_ct, concat_len)) 
        self.wo_diff = np.zeros((mem_cell_ct, concat_len)) 
        self.bg_diff = np.zeros(mem_cell_ct) 
        self.bi_diff = np.zeros(mem_cell_ct) 
        self.bf_diff = np.zeros(mem_cell_ct) 
        self.bo_diff = np.zeros(mem_cell_ct) 

    def apply_diff(self, lr = .1):
        if(self.optimization=='adam'):
            self.wg=self.opt.get_ADAM(self.wg,self.wg_diff)
            self.wi=self.opt.get_ADAM(np.array(self.wi),np.array(self.wi_diff))
            self.wf=self.opt.get_ADAM(np.array(self.wf),np.array(self.wf_diff))
            self.wo=self.opt.get_ADAM(np.array(self.wo),np.array(self.wo_diff))

        else:
            #This is the stochastic gradient descent code
            self.wg -= lr * self.wg_diff
            self.wi -= lr * self.wi_diff
            self.wf -= lr * self.wf_diff
            self.wo -= lr * self.wo_diff


        
        self.bg -= lr * self.bg_diff
        self.bi -= lr * self.bi_diff
        self.bf -= lr * self.bf_diff
        self.bo -= lr * self.bo_diff
        
        # reset diffs to zero
        self.wg_diff = np.zeros_like(self.wg)
        self.wi_diff = np.zeros_like(self.wi) 
        self.wf_diff = np.zeros_like(self.wf) 
        self.wo_diff = np.zeros_like(self.wo) 
        self.bg_diff = np.zeros_like(self.bg)
        self.bi_diff = np.zeros_like(self.bi) 
        self.bf_diff = np.zeros_like(self.bf) 
        self.bo_diff = np.zeros_like(self.bo) 

class LstmState:
    def __init__(self, mem_cell_ct, x_dim):
        self.g = np.zeros(mem_cell_ct)
        self.i = np.zeros(mem_cell_ct)
        self.f = np.zeros(mem_cell_ct)
        self.o = np.zeros(mem_cell_ct)
        self.s = np.zeros(mem_cell_ct)
        self.h = np.zeros(mem_cell_ct)
        self.bottom_diff_h = np.zeros_like(self.h)
        self.bottom_diff_s = np.zeros_like(self.s)
    
class LstmNode:
    def __init__(self, lstm_param, lstm_state):
        # store reference to parameters and to activations
        self.state = lstm_state
        self.param = lstm_param

        # non-recurrent input concatenated with recurrent input
        self.xc = None

    def bottom_data_is(self, x, s_prev = None, h_prev = None):
        # if this is the first lstm node in the network
        if s_prev is None: s_prev = np.zeros_like(self.state.s)
        if h_prev is None: h_prev = np.zeros_like(self.state.h)
        # save data for use in backprop
        self.s_prev = s_prev
        self.h_prev = h_prev

        # concatenate x(t) and h(t-1)
        xc = np.hstack((x,  h_prev))
        self.state.g = np.tanh(np.dot(self.param.wg, xc) + self.param.bg)
        self.state.i = sigmoid(np.dot(self.param.wi, xc) + self.param.bi)
        self.state.f = sigmoid(np.dot(self.param.wf, xc) + self.param.bf)
        self.state.o = sigmoid(np.dot(self.param.wo, xc) + self.param.bo)
        self.state.s = self.state.g * self.state.i + s_prev * self.state.f
        self.state.h = self.state.s * self.state.o

        self.xc = xc

    
    def top_diff_is(self, top_diff_h, top_diff_s):
        # notice that top_diff_s is carried along the constant error carousel
        ds = self.state.o * top_diff_h + top_diff_s
        do = self.state.s * top_diff_h
        di = self.state.g * ds
        dg = self.state.i * ds
        df = self.s_prev * ds

        # diffs w.r.t. vector inside sigma / tanh function
        di_input = sigmoid_derivative(self.state.i) * di 
        df_input = sigmoid_derivative(self.state.f) * df 
        do_input = sigmoid_derivative(self.state.o) * do 
        dg_input = tanh_derivative(self.state.g) * dg

        # diffs w.r.t. inputs
        self.param.wi_diff += np.outer(di_input, self.xc)
        self.param.wf_diff += np.outer(df_input, self.xc)
        self.param.wo_diff += np.outer(do_input, self.xc)
        self.param.wg_diff += np.outer(dg_input, self.xc)
        self.param.bi_diff += di_input
        self.param.bf_diff += df_input       
        self.param.bo_diff += do_input
        self.param.bg_diff += dg_input

        #for dparam in [self.param.wi_diff, self.param.wf_diff , self.param.wo_diff, self.param.wg_diff, self.param.bi_diff, self.param.bf_diff, self.param.bo_diff, self.param.bg_diff]:
        #    np.clip(dparam, -1, 1, out=dparam)

        # compute bottom diff
        dxc = np.zeros_like(self.xc)
        dxc += np.dot(self.param.wi.T, di_input)
        dxc += np.dot(self.param.wf.T, df_input)
        dxc += np.dot(self.param.wo.T, do_input)
        dxc += np.dot(self.param.wg.T, dg_input)

        # save bottom diffs
        self.state.bottom_diff_s = ds * self.state.f
        self.state.bottom_diff_h = dxc[self.param.x_dim:]

class LstmNetwork():
    def __init__(self, lstm_param, loss):
        self.lstm_param = lstm_param
        self.lstm_node_list = []
        # input sequence
        self.x_list = []
        self.loss=loss

    def y_list_is(self, y_list, loss_layer):
        """
        Updates diffs by setting target sequence 
        with corresponding loss layer. 
        Will *NOT* update parameters.  To update parameters,
        call self.lstm_param.apply_diff()
        """
        assert len(y_list) == len(self.x_list)
        idx = len(self.x_list) - 1
        # first node only gets diffs from label ...
        loss = loss_layer.loss(self.lstm_node_list[idx].state.h, y_list[idx],self.loss)

        diff_h =loss_layer.bottom_diff(self.lstm_node_list[idx].state.h, y_list[idx])

        # here s is not affecting loss due to h(t+1), hence we set equal to zero
        diff_s = np.zeros(self.lstm_param.mem_cell_ct)
        self.lstm_node_list[idx].top_diff_is(diff_h, diff_s)
        idx -= 1

        ### ... following nodes also get diffs from next nodes, hence we add diffs to diff_h
        ### we also propagate error along constant error carousel using diff_s
        while idx >= 0:
            loss += loss_layer.loss(self.lstm_node_list[idx].state.h, y_list[idx],self.loss)
            diff_h = loss_layer.bottom_diff(self.lstm_node_list[idx].state.h, y_list[idx])
            diff_h += self.lstm_node_list[idx + 1].state.bottom_diff_h
            diff_s = self.lstm_node_list[idx + 1].state.bottom_diff_s
            self.lstm_node_list[idx].top_diff_is(diff_h, diff_s)
            idx -= 1 

        return loss

    def x_list_clear(self):
        self.x_list = []

    def x_list_add(self, x):
        self.x_list.append(x)
       # print(self.x_list)
        if len(self.x_list) > len(self.lstm_node_list):
            # need to add new lstm node, create new state mem
            lstm_state = LstmState(self.lstm_param.mem_cell_ct, self.lstm_param.x_dim)
            self.lstm_node_list.append(LstmNode(self.lstm_param, lstm_state))

        # get index of most recent x input
        idx = len(self.x_list) - 1
        if idx == 0:
            # no recurrent inputs yet
            self.lstm_node_list[idx].bottom_data_is(x)
        else:
            s_prev = self.lstm_node_list[idx - 1].state.s
            h_prev = self.lstm_node_list[idx - 1].state.h
            self.lstm_node_list[idx].bottom_data_is(x, s_prev, h_prev)



class LossLayer:
    """
    Computes square loss with first element of hidden layer array.
    MG-Attempted to add in mae loss for comparison, but RMSE and MAE loss performed the same.  
    """
    @classmethod
    def loss(self,pred, label,fn):
        if(fn=='mae'):
            return LossLayer.loss_mae(pred,label)
        else:
            return LossLayer.loss_rmse(pred,label)
    
    # MG added mean absolute error
    @classmethod
    def loss_mae(self, pred, label):
        return (np.abs(pred[0]-label))
        #return (pred[0] - label) ** 2
    
    @classmethod
    def loss_rmse(self, pred, label):
        return (pred[0] - label) ** 2

    @classmethod
    def bottom_diff(self, pred, label):
        diff = np.zeros_like(pred)
        diff[0] =2*(pred[0] - label)
        return diff



def train(loss, optimization):
    mem_cell_ct = 50
    x_dim = 4
    lstm_param = LstmParam(mem_cell_ct, x_dim,optimization)
    lstm_net = LstmNetwork(lstm_param,loss)
    losses=[]
    bestLoss=1e5
    print("Training...")
    for cur_iter in range(100):
       
        for ind in range(len(Y)):
            lstm_net.x_list_add(X[ind])

        if(cur_iter%50==0):
            print("iter", "%2s" % str(cur_iter), end=": ")
            print("y_pred = [" +
                  ", ".join(["% 2.5f" % lstm_net.lstm_node_list[ind].state.h[0] for ind in range(len(Y))]) +
                  "]", end=", ")

        loss = lstm_net.y_list_is(Y, LossLayer)
        losses.append(loss)
        if(loss<bestLoss):
            best_lstm_net = LstmNetwork(lstm_param,loss)
            
        lstm_param.apply_diff(lr=0.1)
        
        if(cur_iter%50==0):
            print("loss:", "%.3e" % loss)

        lstm_net.x_list_clear()
    
    for ind in range(len(Y)):
        best_lstm_net.x_list_add(X[ind])   
    loss = best_lstm_net.y_list_is(Y, LossLayer)
    return losses, [ best_lstm_net.lstm_node_list[ind].state.h[0] for ind in range(len(Y))],loss



def firstTurbineData():
	df = pd.read_csv('la-haute-borne-data-2013-2016.csv', sep=';')
	df['Date_time'] = df['Date_time'].astype(str).str[:-6] #remove timezone (caused me an hour of pain)
	df.Date_time=pd.to_datetime(df['Date_time'])
	df=df.fillna(method='ffill')

	df=df.sort_values(by='Date_time')
	df = df.reset_index()
	turbines=df.Wind_turbine_name.unique()
	print("Turbine name: "+str(turbines[0]))
	turbineData=df[df['Wind_turbine_name']==turbines[0]]
	return turbineData


def createGraph(losses, title):
	X = np.arange(0,len(losses))
	figure = plt.figure()
	tick_plot = figure.add_subplot(1, 1, 1)
	tick_plot.plot(X, losses,  color='green', linestyle='-', marker='*' )
	plt.xlabel('Iteration')
	plt.ylabel('Loss')
	plt.title(title)
	plt.show()


np.random.seed(0)
date_to_test=datetime.datetime(2016, 1, 1)
turbineData=np.sin(firstTurbineData().Wa_c_avg.values)[:10]
X=np.array([turbineData[:4],
                   turbineData[1:5],
                   turbineData[2:6],
                   turbineData[3:7],
                   turbineData[4:8],
                   turbineData[5:9]])
Y=np.array([turbineData[4],
                   turbineData[5],
                   turbineData[6],
                   turbineData[7],
                   turbineData[8],
                   turbineData[9]])


losses, predictions,loss=train('rmse','sgd')
print("Actual vs Predicted:")
print(Y)
print(predictions)
createGraph(losses,"SGD Optimization\nLoss="+str(loss))
losses, predictions,loss=train('rmse','adam')
print("Actual vs Predicted:")
print(Y)
print(predictions)
createGraph(losses,"Adam Optimization\nLoss="+str(loss))

FileNotFoundError: ignored

In [None]:
import re, html
import urllib.parse

google_form_txt='''
<span class="c91"><a class="c25" href="https://www.google.com/url?q=https://docs.google.com/forms/d/e/1FAIpQLSfmDHdlKoyJq8EPWOW933rsrbygjOVMs8uefbIcaWDsQr5NXg/viewform?usp%3Dsf_link&amp;sa=D&amp;source=editors&amp;ust=1645292050058291&amp;usg=AOvVaw1OFcRLELd7dNnX3Qfl-2pQ">Interactive Quiz</a></span>
'''

youtube_text='''
<p class="c24"><span class="c91"><a class="c25" href="https://www.google.com/url?q=https://www.youtube.com/watch?v%3DDddX_IdZxOg&amp;sa=D&amp;source=editors&amp;ust=1645292049854241&amp;usg=AOvVaw2fZP9cJjmmYiU0n_rcTK4X">https://www.youtube.com/watch?v=DddX_IdZxOg</a></span><span>&nbsp;</span></p>
'''
cur_text=youtube_text
print(html.unescape(cur_text))
#print(re.escape('<a '))
found_items=re.findall('(<a .+?href="(.+?)".*?>(.+?)</a>)',cur_text)
for outer,href,anchor in found_items:
  print("????", outer)
  print(href)
  href=html.unescape(href)
  href=urllib.parse.unquote(href)
  href=href.replace("https://www.google.com/url?q=","")
  print(">>>>", href)
  print(anchor)
  new_outer=""
  if "google.com" in href and "form" in href: #embedding google forms
    new_outer='<h6>%s</h6>'%anchor
    embedded_link=href.split("viewform?")[0]+"viewform?embedded=true"
    new_outer+='<iframe src="%s" width="640" height="1567" frameborder="0" marginheight="0" marginwidth="0">Loading…</iframe>'%embedded_link
  if "youtube.com" in href: #embedding youtube video
    #new_outer='<h6>%s</h6>'%anchor
    href=href.split("&")[0]
    embedded_link=href.replace('watch?v=','embed/')
    new_outer+='<iframe width="560" height="315" src="%s" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'%embedded_link

  if new_outer: cur_text=cur_text.replace(outer,new_outer)
  print(new_outer)
  print(cur_text)
  print("-----")
#print(found_items)


<p class="c24"><span class="c91"><a class="c25" href="https://www.google.com/url?q=https://www.youtube.com/watch?v%3DDddX_IdZxOg&sa=D&source=editors&ust=1645292049854241&usg=AOvVaw2fZP9cJjmmYiU0n_rcTK4X">https://www.youtube.com/watch?v=DddX_IdZxOg</a></span><span> </span></p>

???? <a class="c25" href="https://www.google.com/url?q=https://www.youtube.com/watch?v%3DDddX_IdZxOg&amp;sa=D&amp;source=editors&amp;ust=1645292049854241&amp;usg=AOvVaw2fZP9cJjmmYiU0n_rcTK4X">https://www.youtube.com/watch?v=DddX_IdZxOg</a>
https://www.google.com/url?q=https://www.youtube.com/watch?v%3DDddX_IdZxOg&amp;sa=D&amp;source=editors&amp;ust=1645292049854241&amp;usg=AOvVaw2fZP9cJjmmYiU0n_rcTK4X
>>>> https://www.youtube.com/watch?v=DddX_IdZxOg&sa=D&source=editors&ust=1645292049854241&usg=AOvVaw2fZP9cJjmmYiU0n_rcTK4X
https://www.youtube.com/watch?v=DddX_IdZxOg
<iframe width="560" height="315" src="https://www.youtube.com/embed/DddX_IdZxOg" title="YouTube video player" frameborder="0" allow="accelerometer; a

In [None]:
### NOT MY CODE
import numpy as np 
from scipy.special import expit as sigmoid

def forget_gate(x, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, prev_cell_state):
    forget_hidden  = np.dot(Weights_hf, h) + Bias_hf
    forget_eventx  = np.dot(Weights_xf, x) + Bias_xf
    return np.multiply( sigmoid(forget_hidden + forget_eventx), prev_cell_state )

def input_gate(x, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, Weights_hl, Bias_hl, Weights_xl, Bias_xl):
    ignore_hidden  = np.dot(Weights_hi, h) + Bias_hi
    ignore_eventx  = np.dot(Weights_xi, x) + Bias_xi
    learn_hidden   = np.dot(Weights_hl, h) + Bias_hl
    learn_eventx   = np.dot(Weights_xl, x) + Bias_xl
    return np.multiply( sigmoid(ignore_eventx + ignore_hidden), np.tanh(learn_eventx + learn_hidden) )


def cell_state(forget_gate_output, input_gate_output):
    return forget_gate_output + input_gate_output

  
def output_gate(x, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, cell_state):
    out_hidden = np.dot(Weights_ho, h) + Bias_ho
    out_eventx = np.dot(Weights_xo, x) + Bias_xo
    return np.multiply( sigmoid(out_eventx + out_hidden), np.tanh(cell_state) )
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def get_slices(hidden_dim):
    slices=[]
    breaker=(hidden_dim*4)
    slices=[[i,i+3] for i in range(0, breaker, breaker//4)]
    return slices

class numpy_lstm:
    def __init__( self, layer_num=0, hidden_dim=1, matching_in_out=False):
        self.matching_in_out=matching_in_out
        self.layer_num=layer_num
        self.hidden_dim=hidden_dim
        
    def init_weights_from_pytorch(self, state):
        slices=get_slices(self.hidden_dim)
        print (slices)

        #Event (x) Weights and Biases for all gates
        
        lstm_weight_ih='lstm.weight_ih_l'+str(self.layer_num)
        self.Weights_xi = state[lstm_weight_ih][slices[0][0]:slices[0][1]].numpy()  # shape  [h, x]
        self.Weights_xf = state[lstm_weight_ih][slices[1][0]:slices[1][1]].numpy()  # shape  [h, x]
        self.Weights_xl = state[lstm_weight_ih][slices[2][0]:slices[2][1]].numpy()  # shape  [h, x]
        self.Weights_xo = state[lstm_weight_ih][slices[3][0]:slices[3][1]].numpy() # shape  [h, x]

        
        lstm_bias_ih='lstm.bias_ih_l'+str(self.layer_num)
        self.Bias_xi = state[lstm_bias_ih][slices[0][0]:slices[0][1]].numpy()  #shape is [h, 1]
        self.Bias_xf = state[lstm_bias_ih][slices[1][0]:slices[1][1]].numpy()  #shape is [h, 1]
        self.Bias_xl = state[lstm_bias_ih][slices[2][0]:slices[2][1]].numpy()  #shape is [h, 1]
        self.Bias_xo = state[lstm_bias_ih][slices[3][0]:slices[3][1]].numpy() #shape is [h, 1]
        
        
        lstm_weight_hh='lstm.weight_hh_l'+str(self.layer_num)

        #Hidden state (h) Weights and Biases for all gates
        self.Weights_hi = state[lstm_weight_hh][slices[0][0]:slices[0][1]].numpy()  #shape is [h, h]
        self.Weights_hf = state[lstm_weight_hh][slices[1][0]:slices[1][1]].numpy()  #shape is [h, h]
        self.Weights_hl = state[lstm_weight_hh][slices[2][0]:slices[2][1]].numpy()  #shape is [h, h]
        self.Weights_ho = state[lstm_weight_hh][slices[3][0]:slices[3][1]].numpy() #shape is [h, h]
        
        
        lstm_bias_hh='lstm.bias_hh_l'+str(self.layer_num)

        self.Bias_hi = state[lstm_bias_hh][slices[0][0]:slices[0][1]].numpy()  #shape is [h, 1]
        self.Bias_hf = state[lstm_bias_hh][slices[1][0]:slices[1][1]].numpy()  #shape is [h, 1]
        self.Bias_hl = state[lstm_bias_hh][slices[2][0]:slices[2][1]].numpy()  #shape is [h, 1]
        self.Bias_ho = state[lstm_bias_hh][slices[3][0]:slices[3][1]].numpy() #shape is [h, 1]
    def forward_lstm_pass(self,input_data):
        h = np.zeros(self.hidden_dim)
        c = np.zeros(self.hidden_dim)
        
        output_list=[]
        for eventx in input_data:
            f = forget_gate(eventx, h, self.Weights_hf, self.Bias_hf, self.Weights_xf, self.Bias_xf, c)
            i =  input_gate(eventx, h, self.Weights_hi, self.Bias_hi, self.Weights_xi, self.Bias_xi, 
                        self.Weights_hl, self.Bias_hl, self.Weights_xl, self.Bias_xl)
            c = cell_state(f,i)
            h = output_gate(eventx, h, self.Weights_ho, self.Bias_ho, self.Weights_xo, self.Bias_xo, c)
            if self.matching_in_out: # doesnt make sense but it was as it was in main code :(
                output_list.append(h)
        if self.matching_in_out:
            return output_list
        else:
            return h

class fully_connected_layer:
    def __init__(self,state, dict_name='fc', ):
        self.fc_Weight = state[dict_name+'.weight'][0].numpy()
        self.fc_Bias = state[dict_name+'.bias'][0].numpy() #shape is [,output_size]
        
    def forward(self,lstm_output, is_sigmoid=True):
        res=np.dot(self.fc_Weight, lstm_output)+self.fc_Bias
        print (res)
        if is_sigmoid:
            return sigmoid(res)
        else:
            return res
class RNN_model_Numpy:
    def __init__(self, state, input_size, hidden_dim, output_size, num_layers, matching_in_out=True):
        self.lstm_layers=[]
        for i in range(0, num_layers):
            lstm_layer_obj=numpy_lstm(layer_num=i, hidden_dim=hidden_dim, matching_in_out=True)
            lstm_layer_obj.init_weights_from_pytorch(state) 
            self.lstm_layers.append(lstm_layer_obj)
        
        self.hidden2out=fully_connected_layer(state, dict_name='hidden2out')
        
    def forward(self, feature_list):
        for x in self.lstm_layers:
            lstm_output=x.forward_lstm_pass(feature_list)
            feature_list=lstm_output
            
        return self.hidden2out.forward(feature_list, is_sigmoid=False)            

In [None]:
#Set Parameters for a small LSTM network
input_size  = 2 # size of one 'event', or sample, in our batch of data
hidden_dim  = 16 # 3 cells in the LSTM layer
output_size = 1 # desired model output

num_layers=3
torch_lstm = RNN( input_size, 
                 hidden_dim ,
                 output_size,
                 num_layers,
                 matching_in_out=True
                 )

state = torch_lstm.state_dict() # state will capture the weights of your model

data = np.array(
           [[1,1],
            [2,2],
            [3,3]])



check=RNN_model_Numpy(state, input_size, hidden_dim, output_size, num_layers)
check.forward(data)

[[0, 3], [16, 19], [32, 35], [48, 51]]
[[0, 3], [16, 19], [32, 35], [48, 51]]
[[0, 3], [16, 19], [32, 35], [48, 51]]


ValueError: ignored

In [None]:
len(state)

14

In [None]:
state

OrderedDict([('lstm.weight_ih_l0', tensor([[ 0.5055,  0.4953],
                      [ 0.4535, -0.4565],
                      [ 0.2675,  0.4622],
                      [-0.4010,  0.5181],
                      [ 0.2984, -0.4150],
                      [ 0.4699, -0.5353],
                      [ 0.3299,  0.0105],
                      [ 0.4356, -0.1924],
                      [ 0.3139, -0.5515],
                      [-0.5613,  0.0754],
                      [-0.1207,  0.2033],
                      [ 0.2983, -0.4862]])),
             ('lstm.weight_hh_l0', tensor([[ 0.2720,  0.4060, -0.4331],
                      [ 0.5397,  0.3111,  0.4486],
                      [ 0.4865, -0.3880, -0.5542],
                      [ 0.4646, -0.1364,  0.0443],
                      [-0.4577,  0.4739,  0.2935],
                      [ 0.3804,  0.1048, -0.2287],
                      [ 0.1187,  0.1550,  0.5243],
                      [ 0.0682,  0.4561,  0.0381],
                      [-0.1711,  0.5209,  0

In [None]:
rnn = nn.LSTM(10, 20, 2)
input = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input, (h0, c0))

In [None]:
output.shape

torch.Size([5, 3, 20])

In [None]:
m = nn.Linear(20, 30)
input = torch.randn(128, 20)
output = m(input)

In [None]:
output.shape

torch.Size([128, 30])

In [None]:
import torch
e0=20
model_dir=os.path.join(cwd,"mfcc_models") 
PATH=os.path.join(model_dir, "model-%s.model"%e0)
test=torch.load(PATH)
print(test.keys())
#print(test["model_state_dict"])
for k,v in test["model_state_dict"].items():
  print(k,v.shape)
  print("---")

dict_keys(['epoch', 'model_state_dict', 'optimizer_state_dict', 'train_loss', 'test_loss'])
lstm.weight_ih_l0 torch.Size([128, 26])
---
lstm.weight_hh_l0 torch.Size([128, 32])
---
lstm.bias_ih_l0 torch.Size([128])
---
lstm.bias_hh_l0 torch.Size([128])
---
lstm.weight_ih_l1 torch.Size([128, 32])
---
lstm.weight_hh_l1 torch.Size([128, 32])
---
lstm.bias_ih_l1 torch.Size([128])
---
lstm.bias_hh_l1 torch.Size([128])
---
hidden2out.weight torch.Size([4200, 32])
---
hidden2out.bias torch.Size([4200])
---


In [None]:
state=test["model_state_dict"]
print(state['lstm.weight_ih_l0'].shape)
Weights_xi = state['lstm.weight_ih_l0'][0:3].numpy()  # shape  [h, x]
Weights_xf = state['lstm.weight_ih_l0'][3:6].numpy()  # shape  [h, x]
Weights_xl = state['lstm.weight_ih_l0'][6:9].numpy()  # shape  [h, x]
Weights_xo = state['lstm.weight_ih_l0'][9:12].numpy() # shape  [h, x]

print(Weights_xi.shape)
print(Weights_xf.shape)
print(Weights_xl.shape)
print(Weights_xo.shape)

torch.Size([128, 26])
(3, 26)
(3, 26)
(3, 26)
(3, 26)


In [None]:
def get_slices(hidden_dim):
    slices=[]
    breaker=(hidden_dim*4)
    slices=[[i,i+3] for i in range(0, breaker, breaker//4)]
    return slices

In [None]:
tmp=list(range(16))
for a in range(tmp)
print(tmp)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]


In [None]:
test2=state['lstm.weight_ih_l0']
test2=state['lstm.weight_ih_l1']
print(test2)
def get_slices(hidden_dim):
    slices=[]
    breaker=(hidden_dim*4)
    slices=[[i,i+3] for i in range(0, breaker, breaker//4)]
    return slices
print(get_slices(4))
print(test2.shape[0])
def get_quarters(array0):
  all_qt_slices=[]
  q_size=int(array0.shape[0]/4) #the size of all hidden arrays/tensors is always a multiple of 4
  for i0 in range(0,4):
    i1=i0+1
    cur_qt_slice=array0[i0*q_size:i1*q_size] #current quarter/slice
    all_qt_slices.append(cur_qt_slice)
  return all_qt_slices

tmp_slices=get_quarters(test2)
for a0 in tmp_slices:
  print(a0, a0.shape)

tensor([[-0.3485, -0.1107, -0.3841, -0.3364],
        [-0.0716,  0.0967, -0.4097,  0.4860],
        [ 0.3677, -0.4653,  0.3125, -0.3000],
        [-0.4074,  0.4684, -0.2011, -0.1659],
        [ 0.0623,  0.2826,  0.0385, -0.1344],
        [ 0.0424, -0.2591, -0.3637,  0.1302],
        [ 0.0527,  0.2484, -0.1857, -0.4942],
        [-0.1241, -0.4838, -0.0394,  0.2443],
        [ 0.0463,  0.2869,  0.0933,  0.2144],
        [ 0.4501,  0.0744,  0.0832, -0.2029],
        [ 0.3118,  0.4194, -0.3473,  0.4853],
        [ 0.1508, -0.0698, -0.4576, -0.4997],
        [-0.3928, -0.4182, -0.3334, -0.1456],
        [-0.4150, -0.4180, -0.0556, -0.4207],
        [-0.0452,  0.0852,  0.0949,  0.3138],
        [ 0.2729, -0.2804, -0.1684,  0.0115]])
[[0, 3], [4, 7], [8, 11], [12, 15]]
16
tensor([[-0.3485, -0.1107, -0.3841, -0.3364],
        [-0.0716,  0.0967, -0.4097,  0.4860],
        [ 0.3677, -0.4653,  0.3125, -0.3000],
        [-0.4074,  0.4684, -0.2011, -0.1659]]) torch.Size([4, 4])
tensor([[ 0.0623,  0

#Numpy alternative - OLD


In [None]:
import torch
import random
from torch import nn
#Set Parameters for a small LSTM network
input_size  = 2 # size of one 'event', or sample, in our batch of data
hidden_dim  = 3 # 3 cells in the LSTM layer
output_size = 5 # desired model output
n_layers = 3

torch.manual_seed(1)
random.seed(1)

def model_output(lstm_output, fc_Weight, fc_Bias):
  '''Takes the LSTM output and transforms it to our desired 
  output size using a final, fully connected layer'''
  return np.dot(fc_Weight, lstm_output) + fc_Bias

#Initialize an PyTorch LSTM for comparison to our Numpy LSTM
class LSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers=1):
        super(LSTM, self).__init__()
        self.hidden_dim=hidden_dim
        #LSTM Layer
        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)
        #Final, fully-connected layer
        self.fc = nn.Linear(hidden_dim, output_size)

    def forward(self, x, hidden):
        batch_size = 1
        # get LSTM outputs
        lstm_output, (h,c) = self.lstm(x, hidden)
        print("lstm_output -1",lstm_output)
        # shape output to be (batch_size*seq_length, hidden_dim)
        lstm_output = lstm_output.view(-1, self.hidden_dim)  
        print("lstm_output -2",lstm_output)
        
        # get final output 
        model_output = self.fc(lstm_output)
        
        return model_output, (h,c)
      


torch_lstm = LSTM(input_size = input_size, 
                 hidden_dim = hidden_dim,
                 output_size = output_size,
                  n_layers = n_layers
                 )

state = torch_lstm.state_dict()
#print(state)
for k,v in state.items():
  print(k,v.shape)
  print("---")
      

lstm.weight_ih_l0 torch.Size([12, 2])
---
lstm.weight_hh_l0 torch.Size([12, 3])
---
lstm.bias_ih_l0 torch.Size([12])
---
lstm.bias_hh_l0 torch.Size([12])
---
lstm.weight_ih_l1 torch.Size([12, 3])
---
lstm.weight_hh_l1 torch.Size([12, 3])
---
lstm.bias_ih_l1 torch.Size([12])
---
lstm.bias_hh_l1 torch.Size([12])
---
lstm.weight_ih_l2 torch.Size([12, 3])
---
lstm.weight_hh_l2 torch.Size([12, 3])
---
lstm.bias_ih_l2 torch.Size([12])
---
lstm.bias_hh_l2 torch.Size([12])
---
fc.weight torch.Size([5, 3])
---
fc.bias torch.Size([5])
---


In [None]:
import numpy as np
#from scipy.special import expit as sigmoid
torch.manual_seed(1)
random.seed(1)

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def forget_gate(x, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, prev_cell_state):
    forget_hidden  = np.dot(Weights_hf, h) + Bias_hf
    # print("Weights_xf",Weights_xf.shape)
    # print("x",x.shape)
    # print("Bias_xf",Bias_xf.shape)
    forget_eventx  = np.dot(Weights_xf, x) + Bias_xf
    return np.multiply( sigmoid(forget_hidden + forget_eventx), prev_cell_state )

def input_gate(x, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, Weights_hl, Bias_hl, Weights_xl, Bias_xl):
    ignore_hidden  = np.dot(Weights_hi, h) + Bias_hi
    ignore_eventx  = np.dot(Weights_xi, x) + Bias_xi
    learn_hidden   = np.dot(Weights_hl, h) + Bias_hl
    learn_eventx   = np.dot(Weights_xl, x) + Bias_xl
    return np.multiply( sigmoid(ignore_eventx + ignore_hidden), np.tanh(learn_eventx + learn_hidden) )


def cell_state(forget_gate_output, input_gate_output):
    return forget_gate_output + input_gate_output

  
def output_gate(x, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, cell_state):
    out_hidden = np.dot(Weights_ho, h) + Bias_ho
    out_eventx = np.dot(Weights_xo, x) + Bias_xo
    return np.multiply( sigmoid(out_eventx + out_hidden), np.tanh(cell_state) )


def get_quarters(array0): #split hidden arrays/tensors into quarters to get weights and biases for ignore, forget, learn, output
  all_qt_slices=[]
  q_size=int(array0.shape[0]/4) #the size of all hidden arrays/tensors is always a multiple of 4
  for i0 in range(0,4):
    i1=i0+1
    cur_qt_slice=array0[i0*q_size:i1*q_size] #current quarter/slice
    all_qt_slices.append(cur_qt_slice)
  return all_qt_slices

# class gate_cls: #get weights and biases for all gates for each layer
#   def __init__(self):
#     self.Weights_xi = None
#     self.Weights_xf = None
#     self.Weights_xl = None
#     self.Weights_xo = None
#     self.Bias_xi = None
#     self.Bias_xf = None
#     self.Bias_xl = None
#     self.Bias_xo = None
#     self.Weights_hi = None
#     self.Weights_hf = None
#     self.Weights_hl = None
#     self.Weights_ho = None
#     self.Bias_hi = None
#     self.Bias_hf = None
#     self.Bias_hl = None
#     self.Bias_ho = None


class state_dict_cls: #process state dict, extract weights and biases in a more organized way
  def __init__(self,state_dict_input):
    self.layer_list=[]
    self.layered_dict={}
    self.gate_dict={}
    self.layer_gate_dict={}
    self.n_hidden=None
    for key,val in state_dict_input.items():
      #a0,b0=key.split(".")
      tmp_split=key.split(".")
      b0=tmp_split[-1]

      b_split=b0.split("_")
      last_b=b_split[-1]
      if last_b.startswith("l"): #to get the weights/biases for each layer
        val=val.numpy()
        self.n_hidden=int(val.shape[0]/4)
        #cur_gate_class=self.layer_gate_dict.get(last_b,gate_cls())
        if not last_b in self.layer_list: self.layer_list.append(last_b) #just to collect all the layers
        tmp_dict0=self.layered_dict.get(last_b,{})
        tmp_gate_dict0=self.gate_dict.get(last_b,{})
        new_key="_".join(key.split("_")[:-1]) 
        new_key="_".join(b_split[:-1])
        new_key=new_key.replace("ih","x").replace("hh","h")
        tmp_dict0[new_key]=val
        self.layered_dict[last_b]=tmp_dict0
        val_slices=get_quarters(val)
        #tmp_gate_dict1={}
        gate_names=["i","f","l","o"]
        for gn0,gslice0 in zip(gate_names,val_slices):
          gate_key0=new_key+gn0
          tmp_gate_dict0[gate_key0]=gslice0
        #tmp_gate_dict0[new_key]=tmp_gate_dict1
        self.gate_dict[last_b]=tmp_gate_dict0
      if len(b_split)==1: #to get the fully connected weights/biases
        tmp_dict0=self.layered_dict.get("fc",{})
        tmp_dict0[b0]=val.numpy()
        self.layered_dict["fc"]=tmp_dict0
    self.n_layers=len(self.layer_list)

def lstm_fn(eventx,l0_dict,h,c):
  f = forget_gate(eventx, h, l0_dict["weight_hf"], l0_dict["bias_hf"], l0_dict["weight_xf"], l0_dict["bias_xf"], c)
  i =  input_gate(eventx, h, l0_dict["weight_hi"], l0_dict["bias_hi"], l0_dict["weight_xi"], l0_dict["bias_xi"], 
              l0_dict["weight_hl"], l0_dict["bias_hl"], l0_dict["weight_xl"], l0_dict["bias_xl"])
  c = cell_state(f,i)
  h = output_gate(eventx, h, l0_dict["weight_ho"], l0_dict["bias_ho"], l0_dict["weight_xo"], l0_dict["bias_xo"], c)
  return c,h


state_dict_obj=state_dict_cls(state)
ld=state_dict_obj.layered_dict #process_state_dict(state)
gd=state_dict_obj.gate_dict
for a,b in state.items():
  print(a,b.shape)
print(state_dict_obj.n_layers)
print(state_dict_obj.n_hidden)
n_hidden=state_dict_obj.n_hidden


input_data = np.array(
           [[1,1],
            [2,2],
            [3,3]])

print("input_data",input_data.shape)
# l0_dict=gd["l0"]
# print(l0_dict)
h = np.zeros(n_hidden)
c = np.zeros(n_hidden)  

# for layer_key0 in state_dict_obj.layer_list:
#   #continue
#   print(layer_key0)
#   cur_l0_dict=gd[layer_key0]
#   output_list=[]
#   h = np.zeros(n_hidden)
#   c = np.zeros(n_hidden)  
#   for eventx in input_data:
  
#     c,h = lstm_fn(eventx,cur_l0_dict,h,c)
#     print("c.shape,h.shape", c.shape,h.shape)
#     #print(c)
#     fc_wt,fc_bias=ld["fc"]["weight"],ld["fc"]["bias"]
#     #numpy_res=np.dot(fc_wt, fc_wt)+fc_bias
#     numpy_res=np.dot(fc_wt, h) + fc_bias
#     #sigmoid_numpy_res=sigmoid(numpy_res)
#     #print("numpy_res", numpy_res)
#     # numpy_res2=model_output(h, fc_wt, fc_bias)
#     # print("numpy_res2", numpy_res2) 
#     print("numpy_res", numpy_res) 
# for eventx in input_data:
#   h = np.zeros(n_hidden)
#   c = np.zeros(n_hidden)
#   for layer_key0 in state_dict_obj.layer_list: 
#     cur_l0_dict=gd[layer_key0]   
#     if layer_key0=="l0": cur_input=eventx
#     else: cur_input=h
#     c,h = lstm_fn(cur_input,cur_l0_dict,h,c)
#     fc_wt,fc_bias=ld["fc"]["weight"],ld["fc"]["bias"]
#     numpy_res=np.dot(fc_wt, h) + fc_bias
#     sigmoid_numpy_res=sigmoid(numpy_res)
#     print("numpy_res TEST", numpy_res) 

numpy_res=None
for layer_key0 in state_dict_obj.layer_list:
  print("layer_key0",layer_key0)
  h = np.zeros(n_hidden)
  c = np.zeros(n_hidden)  

  cur_l0_dict=gd[layer_key0]
  output_list=[]
  for eventx in input_data:
    if layer_key0=="l0": cur_input=eventx
    else: cur_input=h
    # print("eventx",eventx.shape)
    # print("h",h.shape)
    # print("c",c.shape)
    c,h = lstm_fn(cur_input,cur_l0_dict,h,c)
    print("hidden before fully connected:",h)
    fc_wt,fc_bias=ld["fc"]["weight"],ld["fc"]["bias"]
    numpy_res=np.dot(fc_wt, h) + fc_bias
    sigmoid_numpy_res=sigmoid(numpy_res)
    #print("numpy_res", numpy_res) 
    #print("sigmoid_numpy_res",sigmoid_numpy_res)


#rPyTorch expects an extra dimension for batch size:
torch_batch = torch.Tensor(input_data).unsqueeze(0) 

torch_output, (torch_hidden, torch_cell) = torch_lstm(torch_batch, None)
print(torch_output)


      #print()
      # f = forget_gate(eventx, h, l0_dict["weight_hf"], l0_dict["bias_hf"], l0_dict["weight_xf"], l0_dict["bias_xf"], c)
      # i =  input_gate(eventx, h, l0_dict["weight_hi"], l0_dict["bias_hi"], l0_dict["weight_xi"], l0_dict["bias_xi"], 
      #             l0_dict["weight_hl"], l0_dict["bias_hl"], l0_dict["weight_xl"], l0_dict["bias_xl"])
      # c = cell_state(f,i)
      # h = output_gate(eventx, h, l0_dict["weight_ho"], l0_dict["bias_ho"], l0_dict["weight_xo"], l0_dict["bias_xo"], c)
      #res=np.dot(self.fc_Weight, lstm_output)+self.fc_Bias


    # f = forget_gate(eventx, h, self.Weights_hf, self.Bias_hf, self.Weights_xf, self.Bias_xf, c)
    # i =  input_gate(eventx, h, self.Weights_hi, self.Bias_hi, self.Weights_xi, self.Bias_xi, 
    #             self.Weights_hl, self.Bias_hl, self.Weights_xl, self.Bias_xl)
    # c = cell_state(f,i)
    # h = output_gate(eventx, h, self.Weights_ho, self.Bias_ho, self.Weights_xo, self.Bias_xo, c)


#     if self.matching_in_out: # doesnt make sense but it was as it was in main code :(
#         output_list.append(h)
# if self.matching_in_out:
#     return output_list
# else:
#     return h


lstm.weight_ih_l0 torch.Size([12, 2])
lstm.weight_hh_l0 torch.Size([12, 3])
lstm.bias_ih_l0 torch.Size([12])
lstm.bias_hh_l0 torch.Size([12])
lstm.weight_ih_l1 torch.Size([12, 3])
lstm.weight_hh_l1 torch.Size([12, 3])
lstm.bias_ih_l1 torch.Size([12])
lstm.bias_hh_l1 torch.Size([12])
lstm.weight_ih_l2 torch.Size([12, 3])
lstm.weight_hh_l2 torch.Size([12, 3])
lstm.bias_ih_l2 torch.Size([12])
lstm.bias_hh_l2 torch.Size([12])
fc.weight torch.Size([5, 3])
fc.bias torch.Size([5])
3
3
input_data (3, 2)
layer_key0 l0
hidden before fully connected: [-0.11168634  0.07720813 -0.12893798]
hidden before fully connected: [-0.26774592  0.04562645 -0.14295836]
hidden before fully connected: [-0.44732296  0.00892997 -0.12411046]
layer_key0 l1
hidden before fully connected: [0.10540431 0.09455647 0.03062243]
hidden before fully connected: [0.16128964 0.14114295 0.0679142 ]
hidden before fully connected: [0.18657763 0.16564664 0.10196319]
layer_key0 l2
hidden before fully connected: [0.05085452 0.0176793

In [None]:
def out2label(rnn_flat_out,label_list): #a flat rnn output to split into slices, and get the label weights for each slice
  final_list=[]
  n_slices=int(len(rnn_flat_out)/len(label_list))
  for i0 in range(n_slices):
    i1=i0+1
    cur_slice=rnn_flat_out[i0*len(label_list):i1*len(label_list)]
    tmp_list=[]
    for lb0,cs0 in zip(label_list,cur_slice): tmp_list.append((lb0,cs0))
    tmp_list.sort(key=lambda x:-x[-1])
    final_list.append(tmp_list)
  return final_list

test_labels=["a","b","c"]
test_out=[0.1,0.5,0.2,0.7,0.4,0.9,0.6,0.5,0.3]
tmp_out=out2label(test_out,test_labels)
for to0 in tmp_out:
  print(to0)


[('b', 0.5), ('c', 0.2), ('a', 0.1)]
[('c', 0.9), ('a', 0.7), ('b', 0.4)]
[('a', 0.6), ('b', 0.5), ('c', 0.3)]


In [None]:
import pickle, os
e0=15
model_dir=os.path.join(cwd,"mfcc_models") 
pickle_path=os.path.join(model_dir, "model-%s.pickle"%e0)

with open(pickle_path, 'rb') as f:
    data_dict = pickle.load(f)
print(data_dict.keys())
for a,b in data_dict.items():
  print(a,b)
#data_dict["labels"]

dict_keys(['n_output', 'n_input', 'n_hidden', 'n_layers', 'out2labels', 'labels', 'state_dict'])
n_output 4200
n_input 26
n_hidden 32
n_layers 2
out2labels <function out2label at 0x7fc1eee01170>
labels ['', '+', '2', '3', '7', '?', 'D', 'S', 'T', 'a', 'aa', 'ai', 'b', 'd', 'dh', 'e', 'ee', 'ei', 'f', 'g', 'gh', 'h', 'i', 'ia', 'k', 'kh', 'l', 'm', 'n', 'o', 'oa', 'oo', 'q', 'r', 's', 'sh', 't', 'th', 'u', 'w', 'y', 'z']
state_dict {'lstm.weight_ih_l0': array([[ 0.11381322, -0.05507249, -0.01100899, ..., -0.05258892,
        -0.03288848,  0.03222077],
       [ 0.10720325,  0.09739417, -0.17165136, ..., -0.01772752,
        -0.16836134, -0.08270442],
       [ 0.10236993, -0.03652595,  0.1824562 , ...,  0.045928  ,
         0.071448  ,  0.09483866],
       ...,
       [ 0.00756519, -0.00283265,  0.00449615, ..., -0.14076333,
         0.05583347,  0.15749833],
       [ 0.1192165 , -0.12916146, -0.04465875, ...,  0.09137937,
        -0.07800445,  0.1311176 ],
       [-0.0896857 ,  0.1409714

#combined numpy alternative with pickled model - OLD


In [None]:
import os,pickle, random
import numpy as np
from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav


#torch.manual_seed(1)
random.seed(1)

#generic/utility functions
def load_pickle(pickle_path0):
  with open(pickle_path0, 'rb') as f0:
    data_dict0 = pickle.load(f0)
  return data_dict0  

def get_quarters(array0): #split hidden arrays/tensors into quarters to get weights and biases for ignore, forget, learn, output
  all_qt_slices=[]
  q_size=int(array0.shape[0]/4) #the size of all hidden arrays/tensors is always a multiple of 4
  for i0 in range(0,4):
    i1=i0+1
    cur_qt_slice=array0[i0*q_size:i1*q_size] #current quarter/slice
    all_qt_slices.append(cur_qt_slice)
  return all_qt_slices

#Output processing
def out2labels(rnn_flat_out,label_list): #a flat rnn output to split into slices, and get the label weights for each slice
  final_list=[]
  n_slices=int(len(rnn_flat_out)/len(label_list))
  for i0 in range(n_slices):
    i1=i0+1
    cur_slice=rnn_flat_out[i0*len(label_list):i1*len(label_list)]
    tmp_list=[]
    for lb0,cs0 in zip(label_list,cur_slice): tmp_list.append((lb0,cs0))
    tmp_list.sort(key=lambda x:-x[-1])
    final_list.append(tmp_list)
  return final_list


#Input processing/ feature extraction
def get_mfcc(wav_fpath0):
  (rate,sig) = wav.read(wav_fpath0)
  if len(sig.shape)>1: sig= sig.sum(axis=1) / 2 #handle mono/sterio
  return mfcc(sig,rate)


def get_fbank(wav_fpath0):
  (rate,sig) = wav.read(wav_fpath0)
  if len(sig.shape)>1: sig= sig.sum(axis=1) / 2 #handle mono/sterio
  return logfbank(sig,rate)

def extract_features(wav_fpath0): return get_fbank(wav_fpath0)

#NN functions

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def forget_gate(x, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, prev_cell_state):
    forget_hidden  = np.dot(Weights_hf, h) + Bias_hf
    forget_eventx  = np.dot(Weights_xf, x) + Bias_xf
    return np.multiply( sigmoid(forget_hidden + forget_eventx), prev_cell_state )

def input_gate(x, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, Weights_hl, Bias_hl, Weights_xl, Bias_xl):
    ignore_hidden  = np.dot(Weights_hi, h) + Bias_hi
    ignore_eventx  = np.dot(Weights_xi, x) + Bias_xi
    learn_hidden   = np.dot(Weights_hl, h) + Bias_hl
    learn_eventx   = np.dot(Weights_xl, x) + Bias_xl
    return np.multiply( sigmoid(ignore_eventx + ignore_hidden), np.tanh(learn_eventx + learn_hidden) )

def cell_state(forget_gate_output, input_gate_output):
    return forget_gate_output + input_gate_output

  
def output_gate(x, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, cell_state):
    out_hidden = np.dot(Weights_ho, h) + Bias_ho
    out_eventx = np.dot(Weights_xo, x) + Bias_xo
    return np.multiply( sigmoid(out_eventx + out_hidden), np.tanh(cell_state) )


# def lstm_fn(eventx,l0_dict,h,c):
#   f = forget_gate(eventx, h, l0_dict["weight_hf"], l0_dict["bias_hf"], l0_dict["weight_xf"], l0_dict["bias_xf"], c)
#   i =  input_gate(eventx, h, l0_dict["weight_hi"], l0_dict["bias_hi"], l0_dict["weight_xi"], l0_dict["bias_xi"], 
#               l0_dict["weight_hl"], l0_dict["bias_hl"], l0_dict["weight_xl"], l0_dict["bias_xl"])
#   c = cell_state(f,i)
#   h = output_gate(eventx, h, l0_dict["weight_ho"], l0_dict["bias_ho"], l0_dict["weight_xo"], l0_dict["bias_xo"], c)
#   return c,h

def lstm_fn(eventx0,l0_dict,h0,c0):
  f0 = forget_gate(eventx0, h0, l0_dict["weight_hf"], l0_dict["bias_hf"], l0_dict["weight_xf"], l0_dict["bias_xf"], c0)
  i0 =  input_gate(eventx0, h0, l0_dict["weight_hi"], l0_dict["bias_hi"], l0_dict["weight_xi"], l0_dict["bias_xi"], 
              l0_dict["weight_hl"], l0_dict["bias_hl"], l0_dict["weight_xl"], l0_dict["bias_xl"])
  c0 = cell_state(f0,i0)
  h0 = output_gate(eventx0, h0, l0_dict["weight_ho"], l0_dict["bias_ho"], l0_dict["weight_xo"], l0_dict["bias_xo"], c0)
  return c0,h0



class state_dict_cls: #process state dict, extract weights and biases in a more organized way
  def __init__(self,state_dict_input):
    self.layer_list=[]
    self.layered_dict={}
    self.gate_dict={}
    self.layer_gate_dict={}
    self.n_hidden=None
    for key,val in state_dict_input.items():
      #a0,b0=key.split(".")
      tmp_split=key.split(".")
      b0=tmp_split[-1]
      b_split=b0.split("_")
      last_b=b_split[-1]
      if last_b.startswith("l"): #to get the weights/biases for each layer
        #val=val.numpy()
        self.n_hidden=int(val.shape[0]/4)
        if not last_b in self.layer_list: self.layer_list.append(last_b) #just to collect all the layers
        tmp_dict0=self.layered_dict.get(last_b,{})
        tmp_gate_dict0=self.gate_dict.get(last_b,{})
        new_key="_".join(key.split("_")[:-1]) 
        new_key="_".join(b_split[:-1])
        new_key=new_key.replace("ih","x").replace("hh","h")
        tmp_dict0[new_key]=val
        self.layered_dict[last_b]=tmp_dict0
        val_slices=get_quarters(val)
        gate_names=["i","f","l","o"]
        for gn0,gslice0 in zip(gate_names,val_slices):
          gate_key0=new_key+gn0
          tmp_gate_dict0[gate_key0]=gslice0
        #tmp_gate_dict0[new_key]=tmp_gate_dict1
        self.gate_dict[last_b]=tmp_gate_dict0
      if len(b_split)==1: #to get the fully connected weights/biases
        tmp_dict0=self.layered_dict.get("fc",{})
        tmp_dict0[b0]=val#.numpy()
        self.layered_dict["fc"]=tmp_dict0
    self.n_layers=len(self.layer_list)

#create RNN from state dict and other model parameters, use it to predict based on the input feature list
class numpy_rnn_cls:
  def __init__(self,pickle_dict_path):
    data_dict=load_pickle(pickle_dict_path)
    self.n_input=data_dict["n_input"]
    self.n_output=data_dict["n_output"]
    self.n_hidden=data_dict["n_hidden"]
    self.n_layers=data_dict["n_layers"]
    self.state_dict=data_dict["state_dict"]
    self.labels=data_dict["labels"]
    #self.out2labels=data_dict["out2labels"]
    cur_state_dict_obj=state_dict_cls(self.state_dict)
    self.layered_dict=cur_state_dict_obj.layered_dict
    self.gate_dict=cur_state_dict_obj.gate_dict
    self.layer_list=cur_state_dict_obj.layer_list

  def predict(self,input_feature_list):
    #print(feature_list.shape)
    gd=self.gate_dict
    ld=self.layered_dict
    numpy_res=None
    h = np.zeros(n_hidden)
    c = np.zeros(n_hidden)  

    for layer_key0 in self.layer_list:
      print("layer_key0",layer_key0)
      h = np.zeros(n_hidden)
      c = np.zeros(n_hidden)  

      cur_l0_dict=gd[layer_key0]
      output_list=[]
      for eventx in input_feature_list:
        #print("eventx", eventx.shape, eventx[0])
        if layer_key0=="l0": cur_input=eventx
        else: cur_input=h
        c,h = lstm_fn(cur_input,cur_l0_dict,h,c)
        fc_wt,fc_bias=ld["fc"]["weight"],ld["fc"]["bias"]
        numpy_res=np.dot(fc_wt, h) + fc_bias
        sigmoid_numpy_res=sigmoid(numpy_res)
        print("cur_input", cur_input[:5])
        print("numpy_res", numpy_res[:10])
      #numpy_res=np.dot(fc_wt, h) + fc_bias
      #sigmoid_numpy_res=sigmoid(numpy_res)

      #print(numpy_res.shape)
    return numpy_res



e0=9
model_dir=os.path.join(cwd,"mfcc_models") 
pickle_path=os.path.join(model_dir, "model-%s.pickle"%e0)
numpy_rnn_obj=numpy_rnn_cls(pickle_path)
labels0=numpy_rnn_obj.labels
#cur_out2labels=numpy_rnn_obj.out2labels

cur_dir="basic"
wav_file_list=os.listdir(cur_dir)
test_i=15
for fname in wav_file_list[test_i:test_i+1]:
  wav_fpath=os.path.join(cur_dir,fname)
  print(wav_fpath)
  cur_feature_list=extract_features(wav_fpath)
  # for cf in cur_feature_list[:20]:
  #   print(cf[:5])
  continue
  # print(cur_feature_list.shape)
  flat_out0=numpy_rnn_obj.predict(cur_feature_list)
  #print(flat_out0[:10])
  labeled_out=out2labels(flat_out0,labels0)
  for a in labeled_out[:20]:
    print(a[:5])
  #print(labeled_out[0])
  print("--")

#print(numpy_rnn_obj.state_dict)



basic/enta-dhakee.wav


In [None]:
rnn = nn.LSTM(10, 20, 2)
input = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input, (h0, c0))
print(output.shape)

torch.Size([5, 3, 20])


In [None]:
def lstm_fn(eventx0,l0_dict,h0,c0):
  f0 = forget_gate(eventx0, h0, l0_dict["weight_hf"], l0_dict["bias_hf"], l0_dict["weight_xf"], l0_dict["bias_xf"], c0)
  i0 =  input_gate(eventx0, h0, l0_dict["weight_hi"], l0_dict["bias_hi"], l0_dict["weight_xi"], l0_dict["bias_xi"], 
              l0_dict["weight_hl"], l0_dict["bias_hl"], l0_dict["weight_xl"], l0_dict["bias_xl"])
  c0 = cell_state(f0,i0)
  h0 = output_gate(eventx0, h0, l0_dict["weight_ho"], l0_dict["bias_ho"], l0_dict["weight_xo"], l0_dict["bias_xo"], c0)
  return c0,h0

n_layers=1
n_hidden=4
n_input=2
seq_len=3
rnn = nn.LSTM(n_input, n_hidden, n_layers)
input0 = torch.randn(1,seq_len, n_input)
h0 = torch.randn(n_layers, seq_len, n_hidden)
c0 = torch.randn(n_layers, seq_len, n_hidden)
#lstm0 = nn.LSTM(input_size, hidden_size,num_layers)

output, (hn, cn) = rnn(input0, (h0, c0))
print("input", input0.shape)
print("output", output.shape)
print("h0", h0.shape)
print("c0", c0.shape)

# for a,b in rnn.state_dict().items():
#   print(a,b.shape)


cur_state_dict_obj=state_dict_cls(rnn.state_dict())
layered_dict=cur_state_dict_obj.layered_dict
gate_dict=cur_state_dict_obj.gate_dict
layer_list=cur_state_dict_obj.layer_list

print("RNN out:", output)

h0 = np.zeros(n_hidden)
c0 = np.zeros(n_hidden)  

l0_dict=gate_dict["l0"]
#print(l0_dict)
print(input0.shape)
input0=input0.squeeze()
print(input0.shape)

#c1,h1=lstm_fn(input0,l0_dict,h1,c1)
# for in0 in input0:
#   c1,h1=lstm_fn(in0,l0_dict,h1,c1)
#   print("numpy layer1 out:", h1)

for eventx0 in input0:
  f0 = forget_gate(eventx0, h0, l0_dict["weight_hf"], l0_dict["bias_hf"], l0_dict["weight_xf"], l0_dict["bias_xf"], c0)
  i0 =  input_gate(eventx0, h0, l0_dict["weight_hi"], l0_dict["bias_hi"], l0_dict["weight_xi"], l0_dict["bias_xi"], 
              l0_dict["weight_hl"], l0_dict["bias_hl"], l0_dict["weight_xl"], l0_dict["bias_xl"])
  c0 = cell_state(f0,i0)
  h0 = output_gate(eventx0, h0, l0_dict["weight_ho"], l0_dict["bias_ho"], l0_dict["weight_xo"], l0_dict["bias_xo"], c0)
  print(h0)


# l1_dict=gate_dict["l1"]
# input0=h1
# h1 = np.zeros(n_hidden)
# c1 = np.zeros(n_hidden)  

# for in0 in input0:
#   c1,h1=lstm_fn(in0,l1_dict,h1,c1)
#   print("numpy layer2 out:", h1)


#print(layer_list)



input torch.Size([1, 3, 2])
output torch.Size([1, 3, 4])
h0 torch.Size([1, 3, 4])
c0 torch.Size([1, 3, 4])
RNN out: tensor([[[-0.4255,  0.0703, -0.0704, -0.2604],
         [-0.1669, -0.1120,  0.1936,  0.1386],
         [ 0.2771,  0.3113, -0.0537, -0.1426]]], grad_fn=<StackBackward0>)
torch.Size([1, 3, 2])
torch.Size([3, 2])
[ 0.01599162 -0.09458869  0.022115    0.02131772]
[-0.20001404  0.05743947  0.02365302 -0.18735454]
[-0.07845213  0.03585899  0.0385348  -0.12812642]


OrderedDict([('weight_ih_l0',
              tensor([[ 1.0213e-01,  6.3952e-02, -9.7162e-02,  9.1892e-02,  2.0094e-01,
                        1.5534e-01,  2.1817e-01, -5.5249e-02, -7.6144e-02, -1.1524e-02],
                      [-1.5845e-01,  7.4578e-02,  1.3323e-01, -1.8331e-01,  3.8224e-03,
                        8.1399e-02, -3.8151e-02, -6.0633e-02, -1.1923e-01, -1.9061e-01],
                      [ 6.4217e-04, -1.0455e-01, -1.2971e-03,  1.3206e-01, -1.9188e-01,
                       -1.0039e-01, -8.7314e-02,  1.9655e-01,  3.2462e-02,  1.0090e-01],
                      [ 8.4178e-02,  3.3291e-02,  2.0106e-01,  8.2559e-02, -1.5858e-01,
                       -3.0008e-02,  2.1651e-01, -1.9542e-01, -2.0753e-01,  4.4175e-02],
                      [ 2.0201e-01,  2.0012e-02,  7.6981e-02,  1.2006e-01,  8.2522e-02,
                        2.2202e-01, -7.1537e-02, -1.2796e-01, -2.1222e-01, -1.9622e-02],
                      [-1.2919e-01,  1.3150e-01, -7.8594e-02,  1.9051e-01, -3.6335e-0

In [None]:
input_size=26
hidden_size=16
num_layers=3
lstm0 = nn.LSTM(input_size, hidden_size,num_layers)

input = torch.randn(5, 3, 9)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input, (h0, c0))


In [None]:
lstm0.state_dict()

OrderedDict([('weight_ih_l0',
              tensor([[ 0.1886,  0.1071,  0.0918,  ..., -0.1452,  0.0283, -0.0444],
                      [ 0.0538,  0.0290,  0.0762,  ...,  0.1501, -0.0477, -0.0128],
                      [-0.1771,  0.0381, -0.0900,  ...,  0.0302, -0.0964,  0.0283],
                      ...,
                      [ 0.0018,  0.2154, -0.2086,  ..., -0.2078, -0.0641, -0.1616],
                      [ 0.1231, -0.1023,  0.0296,  ..., -0.0549, -0.0996,  0.1044],
                      [-0.0550,  0.1001, -0.2244,  ..., -0.0688, -0.2028, -0.0229]])),
             ('weight_hh_l0',
              tensor([[ 0.1303, -0.2015,  0.2072,  ..., -0.1468,  0.2389,  0.2119],
                      [-0.1233, -0.1802, -0.1834,  ...,  0.0669,  0.2073,  0.0287],
                      [-0.0506,  0.2010, -0.0127,  ..., -0.0316,  0.0955, -0.2338],
                      ...,
                      [ 0.1254, -0.2221,  0.2206,  ...,  0.2223, -0.2220, -0.0778],
                      [-0.2065, -0.1893,  0

In [None]:
#From answer
#Set Parameters for a small LSTM network
input_size  = 2 # size of one 'event', or sample, in our batch of data
hidden_dim  = 3 # 3 cells in the LSTM layer
output_size = 10 # desired model output

def sigmoid(x):
    return 1/(1 + np.exp(-x))


num_layers=2
torch_lstm = RNN( input_size, 
                 hidden_dim ,
                 output_size,
                 num_layers,
                 matching_in_out=True
                 )

state = torch_lstm.state_dict() # state will capture the weights of your model

##
### NOT MY CODE
import numpy as np 
from scipy.special import expit as sigmoid

def forget_gate(x, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, prev_cell_state):
    forget_hidden  = np.dot(Weights_hf, h) + Bias_hf
    forget_eventx  = np.dot(Weights_xf, x) + Bias_xf
    return np.multiply( sigmoid(forget_hidden + forget_eventx), prev_cell_state )

def input_gate(x, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, Weights_hl, Bias_hl, Weights_xl, Bias_xl):
    ignore_hidden  = np.dot(Weights_hi, h) + Bias_hi
    ignore_eventx  = np.dot(Weights_xi, x) + Bias_xi
    learn_hidden   = np.dot(Weights_hl, h) + Bias_hl
    learn_eventx   = np.dot(Weights_xl, x) + Bias_xl
    return np.multiply( sigmoid(ignore_eventx + ignore_hidden), np.tanh(learn_eventx + learn_hidden) )


def cell_state(forget_gate_output, input_gate_output):
    return forget_gate_output + input_gate_output

  
def output_gate(x, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, cell_state):
    out_hidden = np.dot(Weights_ho, h) + Bias_ho
    out_eventx = np.dot(Weights_xo, x) + Bias_xo
    return np.multiply( sigmoid(out_eventx + out_hidden), np.tanh(cell_state) )

##

def get_slices(hidden_dim):
    slices=[]
    breaker=(hidden_dim*4)
    slices=[[i,i+3] for i in range(0, breaker, breaker//4)]
    return slices

###
class numpy_lstm:
    def __init__( self, layer_num=0, hidden_dim=1, matching_in_out=False):
        self.matching_in_out=matching_in_out
        self.layer_num=layer_num
        self.hidden_dim=hidden_dim
        
    def init_weights_from_pytorch(self, state):
        slices=get_slices(self.hidden_dim)
        #print (slices)

        #Event (x) Weights and Biases for all gates
        
        lstm_weight_ih='lstm.weight_ih_l'+str(self.layer_num)
        self.Weights_xi = state[lstm_weight_ih][slices[0][0]:slices[0][1]].numpy()  # shape  [h, x]
        self.Weights_xf = state[lstm_weight_ih][slices[1][0]:slices[1][1]].numpy()  # shape  [h, x]
        self.Weights_xl = state[lstm_weight_ih][slices[2][0]:slices[2][1]].numpy()  # shape  [h, x]
        self.Weights_xo = state[lstm_weight_ih][slices[3][0]:slices[3][1]].numpy() # shape  [h, x]

        
        lstm_bias_ih='lstm.bias_ih_l'+str(self.layer_num)
        self.Bias_xi = state[lstm_bias_ih][slices[0][0]:slices[0][1]].numpy()  #shape is [h, 1]
        self.Bias_xf = state[lstm_bias_ih][slices[1][0]:slices[1][1]].numpy()  #shape is [h, 1]
        self.Bias_xl = state[lstm_bias_ih][slices[2][0]:slices[2][1]].numpy()  #shape is [h, 1]
        self.Bias_xo = state[lstm_bias_ih][slices[3][0]:slices[3][1]].numpy() #shape is [h, 1]
        
        
        lstm_weight_hh='lstm.weight_hh_l'+str(self.layer_num)

        #Hidden state (h) Weights and Biases for all gates
        self.Weights_hi = state[lstm_weight_hh][slices[0][0]:slices[0][1]].numpy()  #shape is [h, h]
        self.Weights_hf = state[lstm_weight_hh][slices[1][0]:slices[1][1]].numpy()  #shape is [h, h]
        self.Weights_hl = state[lstm_weight_hh][slices[2][0]:slices[2][1]].numpy()  #shape is [h, h]
        self.Weights_ho = state[lstm_weight_hh][slices[3][0]:slices[3][1]].numpy() #shape is [h, h]
        
        
        lstm_bias_hh='lstm.bias_hh_l'+str(self.layer_num)

        self.Bias_hi = state[lstm_bias_hh][slices[0][0]:slices[0][1]].numpy()  #shape is [h, 1]
        self.Bias_hf = state[lstm_bias_hh][slices[1][0]:slices[1][1]].numpy()  #shape is [h, 1]
        self.Bias_hl = state[lstm_bias_hh][slices[2][0]:slices[2][1]].numpy()  #shape is [h, 1]
        self.Bias_ho = state[lstm_bias_hh][slices[3][0]:slices[3][1]].numpy() #shape is [h, 1]
    def forward_lstm_pass(self,input_data):
        h = np.zeros(self.hidden_dim)
        c = np.zeros(self.hidden_dim)
        
        output_list=[]
        for eventx in input_data:
            f = forget_gate(eventx, h, self.Weights_hf, self.Bias_hf, self.Weights_xf, self.Bias_xf, c)
            i =  input_gate(eventx, h, self.Weights_hi, self.Bias_hi, self.Weights_xi, self.Bias_xi, 
                        self.Weights_hl, self.Bias_hl, self.Weights_xl, self.Bias_xl)
            c = cell_state(f,i)
            h = output_gate(eventx, h, self.Weights_ho, self.Bias_ho, self.Weights_xo, self.Bias_xo, c)
            if self.matching_in_out: # doesnt make sense but it was as it was in main code :(
                output_list.append(h)
        if self.matching_in_out:
            return output_list
        else:
            return h

###
class fully_connected_layer:
    def __init__(self,state, dict_name='fc', ):
        print("state[dict_name+'.weight']",state[dict_name+'.weight'].shape)
        self.fc_Weight = state[dict_name+'.weight'][0].numpy()
        self.fc_Bias = state[dict_name+'.bias'][0].numpy() #shape is [,output_size]
        # self.fc_Weight = state[dict_name+'.weight'].numpy()
        # self.fc_Bias = state[dict_name+'.bias'].numpy() #shape is [,output_size]
        
    def forward(self,lstm_output, is_sigmoid=True):
        print("self.fc_Weight",self.fc_Weight.shape)
        print("self.fc_Bias",self.fc_Bias.shape)
        print("lstm_output",lstm_output.shape)
        res=np.dot(self.fc_Weight, lstm_output)+self.fc_Bias
        print(res)
        if is_sigmoid:
            return sigmoid(res)
        else:
            return res

###
class RNN_model_Numpy:
    def __init__(self, state, input_size, hidden_dim, output_size, num_layers, matching_in_out=True):
        self.lstm_layers=[]
        for i in range(0, num_layers):
            lstm_layer_obj=numpy_lstm(layer_num=i, hidden_dim=hidden_dim, matching_in_out=True)
            lstm_layer_obj.init_weights_from_pytorch(state) 
            self.lstm_layers.append(lstm_layer_obj)
        
        self.hidden2out=fully_connected_layer(state, dict_name='hidden2out')
        
    def forward(self, feature_list):
        for x in self.lstm_layers:
            lstm_output=x.forward_lstm_pass(feature_list)
            feature_list=lstm_output
            feature_list=np.array(feature_list)
        #print(feature_list.shape)
            
        return self.hidden2out.forward(feature_list, is_sigmoid=False)

###
data = np.array(
           [[1,1],
            [2,2],
            [3,3]])



check=RNN_model_Numpy(state, input_size, hidden_dim, output_size, num_layers)
check.forward(data)

# torch_batch = torch.Tensor(data).unsqueeze(0) 
# #torch_batch=data
# torch_output, (torch_hidden, torch_cell) = torch_lstm(torch_batch, None)
# print(torch_output)

#print(check.shape)

state[dict_name+'.weight'] torch.Size([10, 3])
self.fc_Weight (3,)
self.fc_Bias ()
lstm_output (3, 3)
[0.30007786 0.41136591 0.38244191]


TypeError: ignored

In [None]:
#from the tutorial
import torch
from torch import nn
input_size  = 2 # size of one 'event', or sample, in our batch of data
hidden_dim  = 3 # 3 cells in the LSTM layer
output_size = 10 # desired model output


#Initialize an PyTorch LSTM for comparison to our Numpy LSTM
class LSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers=1):
        super(LSTM, self).__init__()
        self.hidden_dim=hidden_dim
        #LSTM Layer
        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)
        #Final, fully-connected layer
        self.fc = nn.Linear(hidden_dim, output_size)

    def forward(self, x, hidden):
        batch_size = 1
        # get LSTM outputs
        lstm_output, (h,c) = self.lstm(x, hidden)
        # shape output to be (batch_size*seq_length, hidden_dim)
        lstm_output = lstm_output.view(-1, self.hidden_dim)  
        
        # get final output 
        model_output = self.fc(lstm_output)
        
        return model_output, (h,c)
      
torch.manual_seed(5)
torch_lstm = LSTM(input_size = input_size, 
                 hidden_dim = hidden_dim,
                 output_size = output_size,
                  n_layers=1
                 )

state = torch_lstm.state_dict()
#print(state)


#Set Parameters for a small LSTM network
input_size  = 2 # size of one 'event', or sample, in our batch of data
hidden_dim  = 3 # 3 cells in the LSTM layer
output_size = 4 # desired model output

def model_output(lstm_output, fc_Weight, fc_Bias):
  '''Takes the LSTM output and transforms it to our desired 
  output size using a final, fully connected layer'''
  return np.dot(fc_Weight, lstm_output) + fc_Bias

#############
import numpy as np 
from scipy.special import expit as sigmoid

def forget_gate(x, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, prev_cell_state):
    forget_hidden  = np.dot(Weights_hf, h) + Bias_hf
    forget_eventx  = np.dot(Weights_xf, x) + Bias_xf
    return np.multiply( sigmoid(forget_hidden + forget_eventx), prev_cell_state )

def input_gate(x, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, Weights_hl, Bias_hl, Weights_xl, Bias_xl):
    ignore_hidden  = np.dot(Weights_hi, h) + Bias_hi
    ignore_eventx  = np.dot(Weights_xi, x) + Bias_xi
    learn_hidden   = np.dot(Weights_hl, h) + Bias_hl
    learn_eventx   = np.dot(Weights_xl, x) + Bias_xl
    return np.multiply( sigmoid(ignore_eventx + ignore_hidden), np.tanh(learn_eventx + learn_hidden) )


def cell_state(forget_gate_output, input_gate_output):
    return forget_gate_output + input_gate_output

  
def output_gate(x, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, cell_state):
    out_hidden = np.dot(Weights_ho, h) + Bias_ho
    out_eventx = np.dot(Weights_xo, x) + Bias_xo
    return np.multiply( sigmoid(out_eventx + out_hidden), np.tanh(cell_state) )




#Event (x) Weights and Biases for all gates
Weights_xi = state['lstm.weight_ih_l0'][0:3].numpy()  # shape  [h, x]
Weights_xf = state['lstm.weight_ih_l0'][3:6].numpy()  # shape  [h, x]
Weights_xl = state['lstm.weight_ih_l0'][6:9].numpy()  # shape  [h, x]
Weights_xo = state['lstm.weight_ih_l0'][9:12].numpy() # shape  [h, x]

Bias_xi = state['lstm.bias_ih_l0'][0:3].numpy()  #shape is [h, 1]
Bias_xf = state['lstm.bias_ih_l0'][3:6].numpy()  #shape is [h, 1]
Bias_xl = state['lstm.bias_ih_l0'][6:9].numpy()  #shape is [h, 1]
Bias_xo = state['lstm.bias_ih_l0'][9:12].numpy() #shape is [h, 1]

#Hidden state (h) Weights and Biases for all gates
Weights_hi = state['lstm.weight_hh_l0'][0:3].numpy()  #shape is [h, h]
Weights_hf = state['lstm.weight_hh_l0'][3:6].numpy()  #shape is [h, h]
Weights_hl = state['lstm.weight_hh_l0'][6:9].numpy()  #shape is [h, h]
Weights_ho = state['lstm.weight_hh_l0'][9:12].numpy() #shape is [h, h]

Bias_hi = state['lstm.bias_hh_l0'][0:3].numpy()  #shape is [h, 1]
Bias_hf = state['lstm.bias_hh_l0'][3:6].numpy()  #shape is [h, 1]
Bias_hl = state['lstm.bias_hh_l0'][6:9].numpy()  #shape is [h, 1]
Bias_ho = state['lstm.bias_hh_l0'][9:12].numpy() #shape is [h, 1]

#--------------------------------------------------------------------
# Final, fully connected layer Weights and Bias
# fc_Weight = state['fc.weight'][0].numpy() #shape is [h, output_size]
# fc_Bias = state['fc.bias'][0].numpy() #shape is [,output_size]

fc_Weight = state['fc.weight'].numpy() #shape is [h, output_size]
fc_Bias = state['fc.bias'].numpy() #shape is [,output_size]

#

#Simple Time Series Data
data = np.array(
           [[1,1],
            [2,2],
            [3,3]])

#Initialize cell and hidden states with zeroes
h = np.zeros(hidden_dim)
c = np.zeros(hidden_dim)

#Loop through data, updating the hidden and cell states after each pass
out_list=[]
for eventx in data:
  f = forget_gate(eventx, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, c)
  i =  input_gate(eventx, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, 
                Weights_hl, Bias_hl, Weights_xl, Bias_xl)
  c = cell_state(f,i)
  h = output_gate(eventx, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, c)
  #print(h.shape)
  print(">>>", model_output(h, fc_Weight, fc_Bias))  
  #out_list.append(h)
  #print("fc_Bias", fc_Bias)
  #print(h.shape,fc_Weight.shape,fc_Bias.shape)
print(out_list)
# cur_array=np.array(out_list)
# print(cur_array.shape, cur_array )
# print(">>>", model_output(np.array(out_list), fc_Weight, fc_Bias))


#
#rPyTorch expects an extra dimension for batch size:
torch_batch = torch.Tensor(data).unsqueeze(0) 

torch_output, (torch_hidden, torch_cell) = torch_lstm(torch_batch, None)
print("torch_output:", torch_output)
print(torch_lstm)

>>> [ 0.09764027  0.39163138 -0.55473132 -0.20551252  0.5657635  -0.06806453
  0.10073607 -0.29592592  0.34075336 -0.36293995]
>>> [-0.02838194  0.38721036 -0.59302359 -0.30111695  0.60871665 -0.17436532
  0.02706821 -0.34745674  0.30024405 -0.48440875]
>>> [-0.08072774  0.39260536 -0.60136605 -0.34261238  0.61643253 -0.21081523
 -0.00487191 -0.37496768  0.28702191 -0.54419928]
[]
torch_output: tensor([[ 0.0976,  0.3916, -0.5547, -0.2055,  0.5658, -0.0681,  0.1007, -0.2959,
          0.3408, -0.3629],
        [-0.0284,  0.3872, -0.5930, -0.3011,  0.6087, -0.1744,  0.0271, -0.3475,
          0.3002, -0.4844],
        [-0.0807,  0.3926, -0.6014, -0.3426,  0.6164, -0.2108, -0.0049, -0.3750,
          0.2870, -0.5442]], grad_fn=<AddmmBackward0>)
LSTM(
  (lstm): LSTM(2, 3, batch_first=True)
  (fc): Linear(in_features=3, out_features=10, bias=True)
)


In [None]:
m = nn.Linear(20, 10)
input = torch.randn(16, 20)
output = m(input)
print(output.size())

torch.Size([16, 10])


In [None]:
m

Linear(in_features=20, out_features=10, bias=True)

In [None]:
#Set Parameters for a small LSTM network
input_size  = 2 # size of one 'event', or sample, in our batch of data
hidden_dim  = 3 # 3 cells in the LSTM layer
output_size = 1 # desired model output

num_layers=3
torch_lstm = RNN( input_size, 
                 hidden_dim ,
                 output_size,
                 num_layers,
                 matching_in_out=True
                 )

state = torch_lstm.state_dict() # state will capture the weights of your model
#torch_lstm(data)
#torch_lstm.forward(data)

NameError: ignored

#From Tutorial - adjusted with answer - OLD

In [None]:
#from the tutorial
import torch
from torch import nn
# input_size  = 2 # size of one 'event', or sample, in our batch of data
# hidden_dim  = 3 # 3 cells in the LSTM layer
# output_size = 10 # desired model output

#Set Parameters for a small LSTM network
input_size  = 2 # size of one 'event', or sample, in our batch of data
hidden_dim  = 3 # 3 cells in the LSTM layer
output_size = 4 # desired model output
num_layers=1


#Initialize an PyTorch LSTM for comparison to our Numpy LSTM
class LSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers=1):
        super(LSTM, self).__init__()
        self.hidden_dim=hidden_dim
        #LSTM Layer
        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)
        #Final, fully-connected layer
        self.fc = nn.Linear(hidden_dim, output_size)

    def forward(self, x, hidden):
        batch_size = 1
        # get LSTM outputs
        lstm_output, (h,c) = self.lstm(x, hidden)
        # shape output to be (batch_size*seq_length, hidden_dim)
        lstm_output = lstm_output.view(-1, self.hidden_dim)  
        
        # get final output 
        model_output = self.fc(lstm_output)
        
        return model_output, (h,c)
      
torch.manual_seed(5)
torch_lstm = LSTM(input_size = input_size, 
                 hidden_dim = hidden_dim,
                 output_size = output_size,
                  n_layers=1
                 )

state = torch_lstm.state_dict()
#print(state)



def model_output(lstm_output, fc_Weight, fc_Bias):
  '''Takes the LSTM output and transforms it to our desired 
  output size using a final, fully connected layer'''
  return np.dot(fc_Weight, lstm_output) + fc_Bias

#############
import numpy as np 
from scipy.special import expit as sigmoid

def forget_gate(x, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, prev_cell_state):
    forget_hidden  = np.dot(Weights_hf, h) + Bias_hf
    forget_eventx  = np.dot(Weights_xf, x) + Bias_xf
    return np.multiply( sigmoid(forget_hidden + forget_eventx), prev_cell_state )

def input_gate(x, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, Weights_hl, Bias_hl, Weights_xl, Bias_xl):
    ignore_hidden  = np.dot(Weights_hi, h) + Bias_hi
    ignore_eventx  = np.dot(Weights_xi, x) + Bias_xi
    learn_hidden   = np.dot(Weights_hl, h) + Bias_hl
    learn_eventx   = np.dot(Weights_xl, x) + Bias_xl
    return np.multiply( sigmoid(ignore_eventx + ignore_hidden), np.tanh(learn_eventx + learn_hidden) )


def cell_state(forget_gate_output, input_gate_output):
    return forget_gate_output + input_gate_output

  
def output_gate(x, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, cell_state):
    out_hidden = np.dot(Weights_ho, h) + Bias_ho
    out_eventx = np.dot(Weights_xo, x) + Bias_xo
    return np.multiply( sigmoid(out_eventx + out_hidden), np.tanh(cell_state) )


##
def get_slices(hidden_dim):
    slices=[]
    breaker=(hidden_dim*4)
    slices=[[i,i+3] for i in range(0, breaker, breaker//4)]
    return slices

class numpy_lstm:
    def __init__( self, layer_num=0, hidden_dim=1, matching_in_out=False):
        self.matching_in_out=matching_in_out
        self.layer_num=layer_num
        self.hidden_dim=hidden_dim
        
    def init_weights_from_pytorch(self, state):
        slices=get_slices(self.hidden_dim)
        print (slices)

        #Event (x) Weights and Biases for all gates
        
        lstm_weight_ih='lstm.weight_ih_l'+str(self.layer_num)
        self.Weights_xi = state[lstm_weight_ih][slices[0][0]:slices[0][1]].numpy()  # shape  [h, x]
        self.Weights_xf = state[lstm_weight_ih][slices[1][0]:slices[1][1]].numpy()  # shape  [h, x]
        self.Weights_xl = state[lstm_weight_ih][slices[2][0]:slices[2][1]].numpy()  # shape  [h, x]
        self.Weights_xo = state[lstm_weight_ih][slices[3][0]:slices[3][1]].numpy() # shape  [h, x]

        
        lstm_bias_ih='lstm.bias_ih_l'+str(self.layer_num)
        self.Bias_xi = state[lstm_bias_ih][slices[0][0]:slices[0][1]].numpy()  #shape is [h, 1]
        self.Bias_xf = state[lstm_bias_ih][slices[1][0]:slices[1][1]].numpy()  #shape is [h, 1]
        self.Bias_xl = state[lstm_bias_ih][slices[2][0]:slices[2][1]].numpy()  #shape is [h, 1]
        self.Bias_xo = state[lstm_bias_ih][slices[3][0]:slices[3][1]].numpy() #shape is [h, 1]
        
        
        lstm_weight_hh='lstm.weight_hh_l'+str(self.layer_num)

        #Hidden state (h) Weights and Biases for all gates
        self.Weights_hi = state[lstm_weight_hh][slices[0][0]:slices[0][1]].numpy()  #shape is [h, h]
        self.Weights_hf = state[lstm_weight_hh][slices[1][0]:slices[1][1]].numpy()  #shape is [h, h]
        self.Weights_hl = state[lstm_weight_hh][slices[2][0]:slices[2][1]].numpy()  #shape is [h, h]
        self.Weights_ho = state[lstm_weight_hh][slices[3][0]:slices[3][1]].numpy() #shape is [h, h]
        
        
        lstm_bias_hh='lstm.bias_hh_l'+str(self.layer_num)

        self.Bias_hi = state[lstm_bias_hh][slices[0][0]:slices[0][1]].numpy()  #shape is [h, 1]
        self.Bias_hf = state[lstm_bias_hh][slices[1][0]:slices[1][1]].numpy()  #shape is [h, 1]
        self.Bias_hl = state[lstm_bias_hh][slices[2][0]:slices[2][1]].numpy()  #shape is [h, 1]
        self.Bias_ho = state[lstm_bias_hh][slices[3][0]:slices[3][1]].numpy() #shape is [h, 1]
    def forward_lstm_pass(self,input_data):
        h = np.zeros(self.hidden_dim)
        c = np.zeros(self.hidden_dim)
        
        output_list=[]
        for eventx in input_data:
            f = forget_gate(eventx, h, self.Weights_hf, self.Bias_hf, self.Weights_xf, self.Bias_xf, c)
            i =  input_gate(eventx, h, self.Weights_hi, self.Bias_hi, self.Weights_xi, self.Bias_xi, 
                        self.Weights_hl, self.Bias_hl, self.Weights_xl, self.Bias_xl)
            c = cell_state(f,i)
            h = output_gate(eventx, h, self.Weights_ho, self.Bias_ho, self.Weights_xo, self.Bias_xo, c)
            if self.matching_in_out: # doesnt make sense but it was as it was in main code :(
                output_list.append(h)
        if self.matching_in_out:
            return output_list
        else:
            return h

class fully_connected_layer:
    def __init__(self,state, dict_name='fc', ):
        self.fc_Weight = state[dict_name+'.weight'][0].numpy()
        self.fc_Bias = state[dict_name+'.bias'][0].numpy() #shape is [,output_size]
        
    def forward(self,lstm_output, is_sigmoid=True):
        res=np.dot(self.fc_Weight, lstm_output)+self.fc_Bias
        print (res)
        if is_sigmoid:
            return sigmoid(res)
        else:
            return res

class RNN_model_Numpy:
    def __init__(self, state, input_size, hidden_dim, output_size, num_layers, matching_in_out=True):
        self.lstm_layers=[]
        for i in range(0, num_layers):
            lstm_layer_obj=numpy_lstm(layer_num=i, hidden_dim=hidden_dim, matching_in_out=True)
            lstm_layer_obj.init_weights_from_pytorch(state) 
            self.lstm_layers.append(lstm_layer_obj)
        
        self.hidden2out=fully_connected_layer(state, dict_name='fc')
        
    def forward(self, feature_list):
        for x in self.lstm_layers:
            lstm_output=x.forward_lstm_pass(feature_list)
            feature_list=lstm_output
            
        return self.hidden2out.forward(feature_list, is_sigmoid=False)

#Event (x) Weights and Biases for all gates
Weights_xi = state['lstm.weight_ih_l0'][0:3].numpy()  # shape  [h, x]
Weights_xf = state['lstm.weight_ih_l0'][3:6].numpy()  # shape  [h, x]
Weights_xl = state['lstm.weight_ih_l0'][6:9].numpy()  # shape  [h, x]
Weights_xo = state['lstm.weight_ih_l0'][9:12].numpy() # shape  [h, x]

Bias_xi = state['lstm.bias_ih_l0'][0:3].numpy()  #shape is [h, 1]
Bias_xf = state['lstm.bias_ih_l0'][3:6].numpy()  #shape is [h, 1]
Bias_xl = state['lstm.bias_ih_l0'][6:9].numpy()  #shape is [h, 1]
Bias_xo = state['lstm.bias_ih_l0'][9:12].numpy() #shape is [h, 1]

#Hidden state (h) Weights and Biases for all gates
Weights_hi = state['lstm.weight_hh_l0'][0:3].numpy()  #shape is [h, h]
Weights_hf = state['lstm.weight_hh_l0'][3:6].numpy()  #shape is [h, h]
Weights_hl = state['lstm.weight_hh_l0'][6:9].numpy()  #shape is [h, h]
Weights_ho = state['lstm.weight_hh_l0'][9:12].numpy() #shape is [h, h]

Bias_hi = state['lstm.bias_hh_l0'][0:3].numpy()  #shape is [h, 1]
Bias_hf = state['lstm.bias_hh_l0'][3:6].numpy()  #shape is [h, 1]
Bias_hl = state['lstm.bias_hh_l0'][6:9].numpy()  #shape is [h, 1]
Bias_ho = state['lstm.bias_hh_l0'][9:12].numpy() #shape is [h, 1]

#--------------------------------------------------------------------
# Final, fully connected layer Weights and Bias
# fc_Weight = state['fc.weight'][0].numpy() #shape is [h, output_size]
# fc_Bias = state['fc.bias'][0].numpy() #shape is [,output_size]

fc_Weight = state['fc.weight'].numpy() #shape is [h, output_size]
fc_Bias = state['fc.bias'].numpy() #shape is [,output_size]

#

#Simple Time Series Data
data = np.array(
           [[1,1],
            [2,2],
            [3,3]])

#Initialize cell and hidden states with zeroes
h = np.zeros(hidden_dim)
c = np.zeros(hidden_dim)

#Loop through data, updating the hidden and cell states after each pass
out_list=[]
for eventx in data:
  f = forget_gate(eventx, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, c)
  i =  input_gate(eventx, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, 
                Weights_hl, Bias_hl, Weights_xl, Bias_xl)
  c = cell_state(f,i)
  h = output_gate(eventx, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, c)
  #print(h.shape)
  print(">>>", model_output(h, fc_Weight, fc_Bias))  
  #out_list.append(h)
  #print("fc_Bias", fc_Bias)
  #print(h.shape,fc_Weight.shape,fc_Bias.shape)
print(out_list)
# cur_array=np.array(out_list)
# print(cur_array.shape, cur_array )
# print(">>>", model_output(np.array(out_list), fc_Weight, fc_Bias))


#
#rPyTorch expects an extra dimension for batch size:
torch_batch = torch.Tensor(data).unsqueeze(0) 

torch_output, (torch_hidden, torch_cell) = torch_lstm(torch_batch, None)
print("torch_output:", torch_output)
print(torch_lstm)

check=RNN_model_Numpy(state, input_size, hidden_dim, output_size, num_layers)
check.forward(data)

>>> [-0.38340237 -0.09575934  0.40119051  0.26347699]
>>> [-0.50942458 -0.10018035  0.36289824  0.16787255]
>>> [-0.56177038 -0.09478535  0.35455577  0.12637712]
[]
torch_output: tensor([[-0.3834, -0.0958,  0.4012,  0.2635],
        [-0.5094, -0.1002,  0.3629,  0.1679],
        [-0.5618, -0.0948,  0.3546,  0.1264]], grad_fn=<AddmmBackward0>)
LSTM(
  (lstm): LSTM(2, 3, batch_first=True)
  (fc): Linear(in_features=3, out_features=4, bias=True)
)
[[0, 3], [3, 6], [6, 9], [9, 12]]
[-0.1594631  -0.44545648 -0.36644464]


array([-0.1594631 , -0.44545648, -0.36644464])

In [None]:
def sp_extract_labels(item_label_list0,output_labels0=[]):
  final_out=[]
  if not type(item_label_list0) is list: item_label_list0=[item_label_list0] 
  for item_label0 in item_label_list0:
    cur_one_hot=[0.]*len(output_labels0)
    if item_label0 in output_labels0: 
      out_i=output_labels0.index(item_label0)
      cur_one_hot[out_i]=1.
    final_out.append(cur_one_hot)
  return np.array(final_out).astype("float32")

output_labels0=["a","b","c","d"]
cur_item_list=["b","b","a","b","c"]  
out=sp_extract_labels(cur_item_list,output_labels0)
print(out)
print(out.shape)

[[0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]]
(5, 4)
