## Setup

In [1]:
import os
from copy import deepcopy
os.environ['CUDA_VISIBLE_DEVICES'] = ''

import numpy as np
import sklearn.decomposition
import matplotlib.pyplot as plt
from hdf5storage import loadmat, savemat

import torch
import torch.nn as nn
import torch.nn.functional as F

from api.preprocessing import sound2coch
from api.librispeech import LibriDataset
from api.model import SpeechRecognitionCTC

# CUDA for PyTorch
device = torch.device("cpu")
print(device)

cpu


## Load trained model

In [2]:
LibriDataset.set_mode('word') # Should be one of 'grapheme', 'phoneme', 'word'

freq_bins = 65
size_vocab = LibriDataset.vocab_size()
print(f'Frequency dimension: {freq_bins}, alphabet size: {size_vocab}')

Frequency dimension: 65, alphabet size: 10002


In [3]:
N_LAYER, N_NODES = 5, 500
model_id = f'{LibriDataset.MODE}-{N_LAYER}-{N_NODES}'
model_arch = dict(rnn_hidden_size=N_NODES, nb_layers=N_LAYER, window_size=1, rnn_stride=1)
model_name = 'models/model-ctc-{:s}'.format(model_id)

print(model_id)

word-5-500


In [4]:
model = SpeechRecognitionCTC(rnn_type=nn.GRU, labels=LibriDataset.alphabet(), **model_arch, freq_bins=freq_bins)
model = model.to(device)

model.load_state_dict(torch.load(f'{model_name}.pt', map_location=device))
model.eval();

## Inferrence

In [5]:
import glob

alphabet = LibriDataset.alphabet()
maindir='./sounds/homophones/'
nTrial=37
activation=[]
prediction=[]

actv=[]
pred=[]
#make a list of names
root_dir = os.getcwd()
os.chdir(maindir+'soundTemp/')
sounds_names = glob.glob('*.wav')
os.chdir(root_dir)
sounds_names=[nm[0:-4] for nm in sounds_names]
print( len(sounds_names))

for nm in sounds_names: 
    sound_file = maindir+'soundTemp/'+nm+'.wav'
    cochleagram = sound2coch(sound_file)
    prediction.append(model.get_prediction(cochleagram))
    activation.append(model.get_activation(cochleagram))

    
print((activation[0][0].shape)) # dimentions: trial, layer,time,neuron

366
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
h

In [6]:
print(sounds_names)

['hmphn_1f1_trl2', 'hmphn_0m3_trl5', 'hmphn_0m2_trl5', 'hmphn_1m3_trl17', 'hmphn_2m2_trl13', 'hmphn_1m1_trl6', 'hmphn_0f3_trl1', 'hmphn_0f2_trl1', 'hmphn_2f1_trl13', 'hmphn_2m1_trl8', 'hmphn_2m1_trl9', 'hmphn_1m1_trl7', 'hmphn_3f3_trl17', 'hmphn_1m3_trl16', 'hmphn_0f2_trl13', 'hmphn_0m2_trl4', 'hmphn_0m3_trl4', 'hmphn_1f1_trl3', 'hmphn_0m1_trl13', 'hmphn_1f1_trl1', 'hmphn_0m1_trl11', 'hmphn_0f1_trl19', 'hmphn_0f2_trl11', 'hmphn_0m2_trl19', 'hmphn_3m3_trl8', 'hmphn_1m3_trl14', 'hmphn_3m2_trl8', 'hmphn_3f3_trl15', 'hmphn_2m2_trl10', 'hmphn_0f2_trl2', 'hmphn_0f3_trl2', 'hmphn_2f1_trl10', 'hmphn_0f3_trl3', 'hmphn_0f2_trl3', 'hmphn_3m3_trl20', 'hmphn_1m1_trl4', 'hmphn_3m2_trl9', 'hmphn_1m3_trl15', 'hmphn_3m3_trl9', 'hmphn_0m2_trl18', 'hmphn_0f2_trl10', 'hmphn_0m3_trl7', 'hmphn_0m2_trl7', 'hmphn_0f1_trl18', 'hmphn_0m1_trl10', 'hmphn_1f1_trl4', 'hmphn_0m1_trl14', 'hmphn_0f1_trl20', 'hmphn_0m2_trl3', 'hmphn_0m3_trl3', 'hmphn_1f3_trl19', 'hmphn_1m3_trl11', 'hmphn_0m2_trl20', 'hmphn_0f2_trl14', 

In [7]:
print(prediction[23])

HE WOULD QUIT HELP HIS FUTURE WAS <NIL> BELIEVE HE WOULD DO IT ANY THAN MOST PLAYS


## Custom inferrence

In [8]:
def give_hmpn_and_trl(word):
    a=word.split('_')
    hmphn=a[1][:-2]
    trl=a[2][3:]
    return int(hmphn),int(trl)-1

In [9]:
from utils import give_hmphn_trl_label
Data,Labels,OpLabels=give_hmphn_trl_label(root_dir)

In [10]:
total_count=0
corr_count=0
for ad, nm in enumerate(sounds_names):
    hmphn,trl=give_hmpn_and_trl(nm)
    label=Labels[hmphn][trl].upper()
    oplabel=OpLabels[hmphn][trl].upper()
    if label in alphabet and oplabel in alphabet:
        print(label, oplabel)
        if label in prediction[ad] or oplabel in prediction[ad]:
            total_count+=1
            if label in prediction[ad]:
                corr_count+=1


PEAR PAIR
PEAR PAIR
PEAR PAIR
PEAR PAIR
FLOUR FLOWER
PAIR PEAR
PAIR PEAR
PEAR PAIR
FLOUR FLOWER
PEAR PAIR
FLOUR FLOWER
FLOWER FLOUR
FLOWER FLOUR
PEAR PAIR
FLOWER FLOUR
PAIR PEAR
FLOWER FLOUR
PEAR PAIR
PAIR PEAR
PEAR PAIR
FLOWER FLOUR
FLOWER FLOUR
FLOUR FLOWER
FLOUR FLOWER
FLOWER FLOUR
PEAR PAIR
PEAR PAIR
PAIR PEAR
PAIR PEAR
FLOWER FLOUR
FLOWER FLOUR
PEAR PAIR
PAIR PEAR
PEAR PAIR
FLOUR FLOWER
PEAR PAIR
PEAR PAIR
FLOWER FLOUR
FLOUR FLOWER
PAIR PEAR
PAIR PEAR
FLOUR FLOWER
FLOUR FLOWER
FLOUR FLOWER
FLOWER FLOUR
FLOWER FLOUR
FLOUR FLOWER
PEAR PAIR
PAIR PEAR
PAIR PEAR
PEAR PAIR
FLOUR FLOWER
PAIR PEAR
PEAR PAIR
PEAR PAIR
PAIR PEAR
FLOUR FLOWER
FLOWER FLOUR
FLOWER FLOUR
PEAR PAIR
PEAR PAIR
FLOUR FLOWER
PEAR PAIR
PAIR PEAR
PEAR PAIR
PAIR PEAR
FLOUR FLOWER
FLOUR FLOWER
FLOWER FLOUR
PAIR PEAR
PAIR PEAR
FLOWER FLOUR
FLOWER FLOUR
FLOWER FLOUR
FLOUR FLOWER
PAIR PEAR
PAIR PEAR
FLOUR FLOWER
FLOUR FLOWER
PAIR PEAR
PAIR PEAR
FLOWER FLOUR
FLOWER FLOUR
FLOWER FLOUR
FLOUR FLOWER
PEAR PAIR
PEAR PAIR
FLOUR F

In [13]:
print(corr_count,total_count,'the ratio of choosing correctly:', corr_count/total_count,
      'the ratio of missing the sound', total_count/(len(sounds_names)))

67 98 the ratio of choosing correctly: 0.6836734693877551 the ratio of missing the sound 0.2677595628415301


In [18]:
print('my'.upper() in alphabet)

True
