In [1]:
train_dir = r"C:\Users\rafci\Desktop\tensorflow-speech-recognition-challenge\train"

In [2]:
import numpy as np
import os

In [3]:
X_train = np.load("data/X_train.npy")
y_train = np.load("data/y_train.npy")

X_val = np.load("data/X_val.npy")
y_val = np.load("data/y_val.npy")

X_train = X_train.reshape((-1, X_train.shape[1], X_train.shape[2]))
X_val = X_val.reshape((-1, X_val.shape[1], X_val.shape[2]))

classes = os.listdir(train_dir+'/audio/')
NB_CLASSES = len(classes)
classes

def convert_list_dict(lst):
    res_dct = {i: val for i, val in enumerate(lst)}
    return res_dct
         
classes_index = convert_list_dict(classes)
classes_index

{0: 'bed',
 1: 'bird',
 2: 'cat',
 3: 'dog',
 4: 'down',
 5: 'eight',
 6: 'five',
 7: 'four',
 8: 'go',
 9: 'happy',
 10: 'house',
 11: 'left',
 12: 'marvin',
 13: 'nine',
 14: 'no',
 15: 'off',
 16: 'on',
 17: 'one',
 18: 'right',
 19: 'seven',
 20: 'sheila',
 21: 'silence',
 22: 'six',
 23: 'stop',
 24: 'three',
 25: 'tree',
 26: 'two',
 27: 'up',
 28: 'wow',
 29: 'yes',
 30: 'zero'}

In [4]:
import torch.nn.functional as F

import torch

y_train = torch.tensor(y_train).long()
y_val = torch.tensor(y_val).long()
X_train = torch.tensor(X_train)
X_val = torch.tensor(X_val)



In [5]:
y_train = F.one_hot(y_train, num_classes=NB_CLASSES)
# y_val = F.one_hot(y_val, num_classes=NB_CLASSES)
print(y_val[:10])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data.dataset import random_split
import numpy as np
import time

class InceptionModule(nn.Module):
    def __init__(self, in_channels, nb_filters=32, bottleneck_size=32*4, kernel_sizes=41):
        super(InceptionModule, self).__init__()
        self.bottleneck_size = bottleneck_size
        
        if bottleneck_size and in_channels > 1:
            self.bottleneck = nn.Conv1d(in_channels, bottleneck_size, kernel_size=1, padding='same')
        
        self.conv_layers = nn.ModuleList()
        for kernel_size in kernel_sizes:
            self.conv_layers.append(nn.Conv1d(in_channels if not bottleneck_size else bottleneck_size,
                                              nb_filters, kernel_size=kernel_size, padding='same'))
        
        self.max_pool = nn.MaxPool1d(kernel_size=3, stride=1, padding=1)
        self.conv_6 = nn.Conv1d(in_channels, nb_filters, kernel_size=1, padding='same')
        self.batch_norm = nn.BatchNorm1d(nb_filters * len(kernel_sizes) + nb_filters)
        
    def forward(self, x):
        input_inception = self.bottleneck(x) #if hasattr(self, 'bottleneck') else x
        conv_outputs = [conv_layer(input_inception) for conv_layer in self.conv_layers]
        x = self.max_pool(x)
        x = self.conv_6(x)
        conv_outputs.append(x)
        
        
        #[print(x.shape) for x in conv_outputs]
        
        output = torch.cat(conv_outputs, dim=1)
        output = self.batch_norm(output)
        output = F.relu(output)
        return output

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, nb_filters):
        super(ResidualBlock, self).__init__()
        self.conv = nn.Conv1d(in_channels, nb_filters, kernel_size=1)
        self.batch_norm = nn.BatchNorm1d(nb_filters)
        
    def forward(self, x):
        shortcut = self.conv(x)
        shortcut = self.batch_norm(shortcut)
        x = x + shortcut
        x = F.relu(x)
        return x

class ClassifierInception(nn.Module):
    def __init__(self, input_shape, nb_classes, nb_filters=32, use_residual=True, use_bottleneck=True,
                 depth=10, kernel_size=41, bottleneck_size=32):
        super(ClassifierInception, self).__init__()
        self.nb_filters = nb_filters
        self.use_residual = use_residual
        self.use_bottleneck = use_bottleneck
        self.depth = depth
        self.kernel_size = kernel_size
        self.bottleneck_size = bottleneck_size
        
        self.conv1 = nn.Conv1d(input_shape[0], nb_filters *4, kernel_size=kernel_size, padding=kernel_size//2)
        
        inception_modules = [InceptionModule(nb_filters * 4, nb_filters, bottleneck_size, [kernel_size // (2 ** i) for i in range(3)]) 
                             for _ in range(depth)]
        self.inception_modules = nn.Sequential(*inception_modules)
        
        if use_residual:
            self.residual_blocks = nn.ModuleList([ResidualBlock(nb_filters * 4, nb_filters * 4) for _ in range(depth // 3)])
        
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(nb_filters * 4, nb_classes)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.inception_modules(x)
        
        if self.use_residual:
            for i, block in enumerate(self.residual_blocks):
                if i % 3 == 2:
                    x = block(x)
        
        x = self.global_avg_pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# # Dummy data for demonstration
# x_train = torch.randn(284, 85, 122)  # 100 samples, 3 channels, 1000 timesteps
# y_train = torch.randint(0, 10, (284,))  # 10 classes
# x_val = torch.randn(84, 85, 122)
# y_val = torch.randint(0, 10, (84,))

# Hyperparameters
input_shape = (85, 122)
nb_classes = 10
nb_filters = 32
use_residual = True
use_bottleneck = True
depth = 10
kernel_size = 41
bottleneck_size = 32 *4
nb_epochs = 10
batch_size = 64




In [9]:
INPUT_SHAPE = (85, 122)
print(X_train.shape)
BATCH_SIZE = 64

torch.Size([58284, 122, 85])


In [14]:
# import torch
# from torchviz import make_dot


# model = ClassifierInception(INPUT_SHAPE, NB_CLASSES, nb_filters, use_residual, use_bottleneck, depth, kernel_size, bottleneck_size)
# print(X_val.shape)
# print(X_train.shape)
# # Przekazanie przykładowego wejścia przez model, aby uzyskać strukturę grafu
# output = model(X_val[[0]].float().transpose(2, 1))



# # Generowanie grafu
# dot = make_dot(output, params=dict(model.named_parameters()))

# model_name = 'Inception'

# # Zapis grafu do pliku PNG
# dot.render('./{}'.format(model_name))

torch.Size([6839, 122, 85])
torch.Size([58284, 122, 85])


'Inception.pdf'

In [10]:
# Model, criterion, optimizer
model = ClassifierInception(INPUT_SHAPE, NB_CLASSES, nb_filters, use_residual, use_bottleneck, depth, kernel_size, bottleneck_size).to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Data loaders
train_dataset = TensorDataset(X_train.float().transpose(2, 1).to('cuda'), y_train.float().to('cuda'))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = TensorDataset(X_val.float().transpose(2, 1).to('cuda'), y_val.float().to('cuda'))
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Training loop
for epoch in range(nb_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    model.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            # _, labels = torch.max(labels, 1)
            
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)
        
        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch+1}/{nb_epochs}], Validation Accuracy: {accuracy:.4f}')

print("Training finished.")

  return F.conv1d(input, weight, bias, self.stride,
  return F.conv1d(input, weight, bias, self.stride,
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch [1/10], Validation Accuracy: 0.7932
Epoch [2/10], Validation Accuracy: 0.8557
Epoch [3/10], Validation Accuracy: 0.8639
Epoch [4/10], Validation Accuracy: 0.8862
Epoch [5/10], Validation Accuracy: 0.9063
Epoch [6/10], Validation Accuracy: 0.9108
Epoch [7/10], Validation Accuracy: 0.9212
Epoch [8/10], Validation Accuracy: 0.9210
Epoch [9/10], Validation Accuracy: 0.9316
Epoch [10/10], Validation Accuracy: 0.9259
Training finished.


In [1]:
# Model, criterion, optimizer
model = ClassifierInception(INPUT_SHAPE, NB_CLASSES, nb_filters, use_residual, use_bottleneck, depth, kernel_size, bottleneck_size).to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.0005)

# Data loaders
train_dataset = TensorDataset(X_train.float().transpose(2, 1).to('cuda'), y_train.float().to('cuda'))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = TensorDataset(X_val.float().transpose(2, 1).to('cuda'), y_val.float().to('cuda'))
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Training loop
for epoch in range(nb_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    model.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            # _, labels = torch.max(labels, 1)
            
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)
        
        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch+1}/{nb_epochs}], Validation Accuracy: {accuracy:.4f}')

print("Training finished.")

NameError: name 'ClassifierInception' is not defined