# Prep

In [1]:
import os
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F

from Inception import ClassifierInception

In [2]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(DEVICE)

cuda


In [3]:
train_dir = "C://Users//jakub//Desktop//PD//sem10//deep//tensorflow-speech-recognition-challenge//train"

In [4]:
X_train = np.load("data/X_train.npy")
y_train = np.load("data/y_train.npy")

X_val = np.load("data/X_val.npy")
y_val = np.load("data/y_val.npy")

X_train = X_train.reshape((-1, X_train.shape[1], X_train.shape[2]))
X_val = X_val.reshape((-1, X_val.shape[1], X_val.shape[2]))

classes = os.listdir(train_dir+'/audio/')

def convert_list_dict(lst):
    res_dct = {i: val for i, val in enumerate(lst)}
    return res_dct
         
classes_index = convert_list_dict(classes)

In [5]:
INPUT_SHAPE = (85, 122)
NB_CLASSES = len(classes)

In [6]:
y_train = torch.tensor(y_train).long()
y_val = torch.tensor(y_val).long()
X_train = torch.tensor(X_train)
X_val = torch.tensor(X_val)

y_train = F.one_hot(y_train, num_classes = NB_CLASSES)
#y_val = F.one_hot(y_val, num_classes = NB_CLASSES)

# Tests

## Model tests

In [7]:
nb_epochs = 10
batch_size = 64
lr = 0.0005
weight_decay = 1e-8

### Inception test

In [8]:
# inception params
nb_filters = 32
use_residual = True
use_bottleneck = True
depth = 10
kernel_size = 41
bottleneck_size = 32 * 4

In [13]:
# Model, criterion, optimizer
model = ClassifierInception(INPUT_SHAPE, NB_CLASSES, nb_filters, use_residual, use_bottleneck, depth, kernel_size, bottleneck_size).to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = weight_decay)

# Data loaders
train_dataset = TensorDataset(X_train.float().transpose(2, 1).to('cuda'), y_train.float().to('cuda'))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = TensorDataset(X_val.float().transpose(2, 1).to('cuda'), y_val.float().to('cuda'))
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)



In [16]:
for epoch in range(nb_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    model.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            # _, labels = torch.max(labels, 1)
            
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)
        
        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch+1}/{nb_epochs}], Validation Accuracy: {accuracy:.4f}')

KeyboardInterrupt: 

### Mod Inception test

In [9]:
# TODO

### Emformer test

In [None]:
# TODO

### Conformer test

In [17]:
from ConformerClassifier import ConformerClassifier
# Model, criterion, optimizer
model = ConformerClassifier(
     input_dim = 85,
     num_classes = 31
     ).to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = weight_decay)

# Data loaders
train_dataset = TensorDataset(X_train.float().transpose(2, 1), y_train.float())
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = TensorDataset(X_val.float().transpose(2, 1), y_val.float())
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [20]:
# Training loop
for epoch in range(nb_epochs):
    model.train()
    for inputs, labels in train_loader:
        inputs = inputs.permute(0, 2, 1)
        # inputs = inputs.reshape(16, -1)
        # inputs = inputs.permute(1, 0)
        
        inputs = inputs.cuda()
        labels = labels.cuda()

        optimizer.zero_grad()

        outputs = model(inputs)

        # print(type(outputs))
        # print(outputs.shape)
        # print(type(labels))
        # print(labels.shape)
        # print(outputs.shape)
        # print(outputs)
        # print(labels)
        labels = labels.long()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    model.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in val_loader:
            inputs = inputs.permute(0, 2, 1)
            # inputs = inputs.reshape(16, -1)
            # inputs = inputs.permute(1, 0)

            inputs = inputs.cuda()
            labels = labels.cuda()

            outputs = model(inputs)

            _, outputs = torch.max(torch.tensor(outputs), 1)

            _, predicted = torch.max(torch.tensor(outputs), 1)

            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)
        
        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch+1}/{nb_epochs}], Validation Accuracy: {accuracy:.4f}')

print("Training finished.")

  _, outputs = torch.max(torch.tensor(outputs), 1)
  _, predicted = torch.max(torch.tensor(outputs), 1)


Epoch [1/10], Validation Accuracy: 0.0246
Epoch [2/10], Validation Accuracy: 0.0251
Epoch [3/10], Validation Accuracy: 0.0542
Epoch [4/10], Validation Accuracy: 0.1323
Epoch [5/10], Validation Accuracy: 0.2316
Epoch [6/10], Validation Accuracy: 0.4365
Epoch [7/10], Validation Accuracy: 0.5856
Epoch [8/10], Validation Accuracy: 0.4895
Epoch [9/10], Validation Accuracy: 0.4227
Epoch [10/10], Validation Accuracy: 0.6168
Training finished.


## Approaches tests

In [18]:
# TODO

## Augmentation tests

In [19]:
# TODO

## Hyperparametres tests

In [20]:
# TODO