In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import os
from models.Inception import InceptionClassifier
train_dir = r"C:\Users\rafci\Desktop\tensorflow-speech-recognition-challenge\train"
test_dir = r"C:\Users\rafci\Desktop\tensorflow-speech-recognition-challenge\test"

In [13]:
from data_preparation import make_spec,compute_fbank,wav_padding
def get_test_list(test_dir):
    test_list  = []
     
    files = os.listdir(os.path.join(test_dir,'audio'))
    for i, f in enumerate(files):
        # print(f)
        # path = folder+'/'+f
        test_list.append(f)

    return test_list
def create_test_set(file_list, test_dir, method = 'spec'):
    if method=='spec':
        X_array = np.zeros([len(file_list), 122, 85])
    elif method=='fbank':
        X_array = np.zeros([len(file_list), 97, 80])
    file_names = []
    for ind, file in enumerate(file_list):

        if method == 'spec':
            X_array[ind] = make_spec(file,test_dir)
        elif method == 'fbank':
            X_array[ind] = wav_padding(compute_fbank(test_dir+'/audio/'+file), 97, 80)
        else:
            raise ValueError("Invalid case")
        # print(file,ind)
        file_names.append(file)
    return X_array, file_names

In [15]:
X_test = np.load("data/X_test.npy")
X_test = X_test.reshape((-1, X_test.shape[1], X_test.shape[2]))
print(X_test.shape)
test_file_names = np.loadtxt('data/test_file_names.npy', dtype=str)
print(test_file_names.shape)

(158538, 122, 85)
(158538,)


In [21]:
classes = os.listdir(train_dir+'/audio/')
NB_CLASSES = len(classes)
def convert_list_dict(lst):
    res_dct = {i: val for i, val in enumerate(lst)}
    return res_dct
         
classes_index = convert_list_dict(classes)
print(type(classes_index))
classes_index

<class 'dict'>


{0: 'bed',
 1: 'bird',
 2: 'cat',
 3: 'dog',
 4: 'down',
 5: 'eight',
 6: 'five',
 7: 'four',
 8: 'go',
 9: 'happy',
 10: 'house',
 11: 'left',
 12: 'marvin',
 13: 'nine',
 14: 'no',
 15: 'off',
 16: 'on',
 17: 'one',
 18: 'right',
 19: 'seven',
 20: 'sheila',
 21: 'silence',
 22: 'six',
 23: 'stop',
 24: 'three',
 25: 'tree',
 26: 'two',
 27: 'up',
 28: 'wow',
 29: 'yes',
 30: 'zero'}

In [17]:
BATCH_SIZE = 64

X_test = torch.tensor(X_test)

from torch.utils.data import Dataset
class CustomDataset(Dataset):
    def __init__(self, X, file_names):
        self.X = X.float().transpose(2, 1).to('cuda')
        self.file_names = file_names

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.file_names[idx]

test_dataset = CustomDataset(X_test, test_file_names)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [31]:
import csv
INPUT_SHAPE = (85, 122)

model = InceptionClassifier(INPUT_SHAPE, NB_CLASSES).to('cuda')
model.load_state_dict(torch.load('saved_models/inception_aug.pth'))
with open('outputs/output.csv', 'w', newline='') as csvfile:
    fieldnames = ['fname', 'label']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    model.eval()
    with torch.no_grad():
        
        for inputs, file_names in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            for num,file_name in zip(predicted.cpu().numpy(),file_names):
                predicted_word = classes_index[num]
                
                writer.writerow({'fname': file_name, 'label': predicted_word})



In [32]:
print(predicted)
words_list = [classes_index[num] for num in predicted.cpu().numpy()]
print(words_list)


tensor([14, 23, 13, 18, 21,  0, 14, 23, 21, 22], device='cuda:0')
['no', 'stop', 'nine', 'right', 'silence', 'bed', 'no', 'stop', 'silence', 'six']
