# Table Of Contents<a class="anchor" id="zero-bullet"></a>:
* [Imports](#first-bullet)
* [General settings and Global Functions](#second-bullet)
* [Import spectrograms](#third-bullet)
* [Model Initialization](#fourth-bullet)
* [Training](#fifth-bullet)
* [Instant Evaluation Logic](#sixth-bullet)

## Imports<a class="anchor" id="first-bullet"></a>

[Back to the Table of Contents](#zero-bullet)

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, TensorDataset, random_split
from torch.optim import lr_scheduler
import os
import numpy as np
from torch.utils.tensorboard import SummaryWriter
import seaborn as sns
import json

from utils import *
%matplotlib inline

  warn(f"Failed to load image Python extension: {e}")
The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.
The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.


## General settings and Global Functions<a class="anchor" id="second-bullet"></a>

[Back to the Table of Contents](#zero-bullet)

In [4]:
torch.manual_seed(42)

<torch._C.Generator at 0x22eba05ebb0>

In [5]:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [6]:
device = torch.device('cuda')

## Import spectrograms<a class="anchor" id="third-bullet"></a>

[Back to the Table of Contents](#zero-bullet)

In [8]:
root = r'C:\~\imdb_speechbrain_1000_last_names_robust'

### Load specs

In [9]:
spectrograms = torch.load(root + '/mel_spectrograms.pt')
labels_encoded = torch.load(root + '/labels_encoded.pt')

In [10]:
spectrograms.shape

torch.Size([100000, 1, 80, 100])

In [11]:
labels_encoded.shape

torch.Size([100000])

In [12]:
with open(root + '/labels_dict.json', 'r', encoding='utf-8') as f:
    labels = json.loads(f.read())

In [13]:
len_labels = len(labels)
len_labels

1000

## Model Initialization<a class="anchor" id="fourth-bullet"></a>

[Back to the Table of Contents](#zero-bullet)

Make a dataset

In [13]:
spectrograms = TensorDataset(spectrograms, labels_encoded)

In [16]:
# Define the hyperparameters.
batch_size = 32
num_epochs = 100
learning_rate = 0.0001
seed = 42  # Replace with your desired random seed.
input_shape = (1, 80, 100)
len_labels = 10000


In [17]:
# train_dataset, val_dataset = random_split(spectrograms, [train_size, val_size])

Split the data into Train and Test parts (70/30)

In [18]:
# Define indices of samples to include in validation set
val_indices1 = torch.arange(7, len(spectrograms), 10)
val_indices2 = torch.arange(8, len(spectrograms), 10)
val_indices3 = torch.arange(9, len(spectrograms), 10)

concat_indices = torch.cat((val_indices1, val_indices2, val_indices3), dim=0)
inverse_indeces = [i for i in range(len(spectrograms)) if i not in concat_indices]

# Use the selected indices to create a Subset of the TensorDataset
val_dataset = torch.utils.data.Subset(spectrograms, concat_indices)
train_dataset = torch.utils.data.Subset(spectrograms, inverse_indeces)

In [19]:
# Load the data samples into PyTorch datasets and dataloaders.
print("Train size: ", len(train_dataset))
print("Val size: ", len(val_dataset))

Train size:  140000
Val size:  60000


In [20]:
train_dataset = DataLoader(train_dataset, shuffle=seed, batch_size=batch_size)
val_dataset = DataLoader(val_dataset, shuffle=seed, batch_size=batch_size)

Note: we don't initialize Softmax layer explicitley, since it is already pre-implemented inside of CrossValidation Loss function

In [24]:
# SCNN ver 1 (relu).
class simple_CNN_relu(nn.Module):
    def __init__(self, num_labels):
        super().__init__()

        self.dropout = 0.25

        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(self.dropout),
        )

        self.flatten = nn.Flatten()

        self.fc_layers = nn.Sequential(
            nn.Linear(64000, 2048),
            nn.BatchNorm1d(num_features=2048),
            nn.ReLU(),
            nn.Linear(2048, num_labels),
            # nn.Softmax(dim=0)
        )

    def forward(self, x):

        # x = F.normalize(x, mean=[0.5], std=[0.5])
        x = self.conv_layers(x)
        x = self.flatten(x)
        x = self.fc_layers(x)
        return x

    
# SCNN ver 1 (sigm).
class simple_CNN_sigm(nn.Module):
    def __init__(self, num_labels):
        super().__init__()

        self.dropout = 0.25

        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.Sigmoid(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.Sigmoid(),
            nn.MaxPool2d(2),
            nn.Dropout(self.dropout),
        )

        self.flatten = nn.Flatten()

        self.fc_layers = nn.Sequential(
            nn.Linear(64000, 2048),
            nn.BatchNorm1d(num_features=2048),
            nn.Sigmoid(),
            nn.Linear(2048, num_labels),
        )

    def forward(self, x):

        # x = F.normalize(x, mean=[0.5], std=[0.5])
        x = self.conv_layers(x)
        x = self.flatten(x)
        x = self.fc_layers(x)
        return x

    
# SCNN ver 2 (and 2.4) (sigm or relu).
class lstm_CNN_simple(nn.Module):
    def __init__(self, num_labels):
        super().__init__()

        self.dropout = 0.25
        self.output_height = 256

        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.Sigmoid(),
            nn.MaxPool2d(2),
            nn.Dropout(self.dropout),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.Sigmoid(),
            nn.MaxPool2d(2),
            nn.Dropout(self.dropout),
        )

        self.lstm = nn.LSTM(input_size=128, hidden_size=256, num_layers=2, batch_first=True, bidirectional=True, dropout=self.dropout)

        self.flatten = nn.Flatten()

        self.fc_layers = nn.Sequential(
            nn.Linear(256, 2048),
            nn.BatchNorm1d(num_features=2048),
            nn.Sigmoid(),
            nn.Linear(2048, num_labels),
            # nn.Softmax(dim=0)
        )
    def forward(self, x):
        # x = F.resize(x, (self.input_height, self.input_width))
        # x = F.normalize(x, mean=[0.5], std=[0.5])
        # print(x.shape)
        x = self.conv_layers(x)
        x_hor = nn.AdaptiveAvgPool2d((1, 25))(x)  # Replace 'sequence_length' with the desired value
        # print('x_hor: ', x_hor.shape)
        x_hor = x_hor.squeeze().permute(0, 2, 1)
        # print('x_hor: ', x_hor.shape)
        x_hor , (hn_hor, cn_hor) = self.lstm(x_hor)
        
        # Uncomment for 2-axis LSTM
        # x_ver = nn.AdaptiveAvgPool2d((20, 1))(x)  # Replace 'sequence_length' with the desired value
        # print('x_ver: ', x_ver.shape)
        # x_ver = x_ver.squeeze().permute(0, 2, 1)
        # print('x_ver: ', x_ver.shape)
        # x_ver , (hn_ver, cn_ver) = self.lstm(x_ver)
        # print('x: ', x.shape)
        # print('hn: ', hn.shape)
        # x = torch.concat([hn_hor[-1], hn_ver[-1]], dim=1)
        # print(x.shape)
        # x = x.permute(1, 0, 2)
        # print(x.shape)
        x = hn_hor[-1]
        x = self.flatten(x)
        x = self.fc_layers(x)
        return x


# SCNN ver 3 and 4 (ReLU).
class lstm_CNN_relu(nn.Module):
    def __init__(self, num_labels):
        super().__init__()

        self.dropout = 0.25
        self.output_height = 256
        
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.Dropout(self.dropout),
        )

        self.lstm = nn.LSTM(input_size=256, hidden_size=512, num_layers=2, batch_first=True, bidirectional=True, dropout=self.dropout)

        self.flatten = nn.Flatten()

        self.fc_layers = nn.Sequential(
            nn.Linear(512, 12800),
            nn.BatchNorm1d(num_features=12800),
            nn.ReLU(),
            nn.Linear(12800, num_labels),
            # nn.Softmax(dim=0)
        )

    def forward(self, x):
        # x = F.resize(x, (self.input_height, self.input_width))
        # x = F.normalize(x, mean=[0.5], std=[0.5])
        # print(x.shape)
        x = self.conv_layers(x)
        x_hor = nn.AdaptiveAvgPool2d((1, 25))(x)  # Replace 'sequence_length' with the desired value
        # print('x_hor: ', x_hor.shape)
        x_hor = x_hor.squeeze().permute(0, 2, 1)
        # print('x_hor: ', x_hor.shape)
        x_hor , (hn_hor, cn_hor) = self.lstm(x_hor)
        
        # # Uncomment for 2-axis LSTM
        # x_ver = nn.AdaptiveAvgPool2d((20, 1))(x)  # Replace 'sequence_length' with the desired value
        # print('x_ver: ', x_ver.shape)
        # x_ver = x_ver.squeeze().permute(0, 2, 1)
        # print('x_ver: ', x_ver.shape)
        # x_ver , (hn_ver, cn_ver) = self.lstm(x_ver)
        # print('x: ', x.shape)
        # print('hn: ', hn.shape)
        # x = torch.concat([hn_hor[-1], hn_ver[-1]], dim=1)
        # print(x.shape)
        x = hn_hor[-1]
        # x = x.permute(1, 0, 2)
        # print(x.shape)
        x = self.flatten(x)
        x = self.fc_layers(x)
        return x


# SCNN ver 3 and 4 (sigm).
class lstm_CNN_sigm(nn.Module):
    def __init__(self, num_labels):
        super().__init__()

        self.dropout = 0.25
        self.output_height = 256

        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.Sigmoid(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.Sigmoid(),
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.Sigmoid(),
            nn.Dropout(self.dropout),
        )

        self.lstm = nn.LSTM(input_size=256, hidden_size=512, num_layers=2, batch_first=True, bidirectional=True, dropout=self.dropout)

        self.flatten = nn.Flatten()

        self.fc_layers = nn.Sequential(
            nn.Linear(512, 12800),
            nn.BatchNorm1d(num_features=12800),
            nn.Sigmoid(),
            nn.Linear(12800, num_labels),
            # nn.Softmax(dim=0)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x_hor = nn.AdaptiveAvgPool2d((1, 25))(x)  # Replace 'sequence_length' with the desired value
        # print('x_hor: ', x_hor.shape)
        x_hor = x_hor.squeeze().permute(0, 2, 1)
        # print('x_hor: ', x_hor.shape)
        x_hor , (hn_hor, cn_hor) = self.lstm(x_hor)
        x = hn_hor[-1]
        
        # Uncomment for 2-axis LSTM
        # x_ver = nn.AdaptiveAvgPool2d((20, 1))(x)  # Replace 'sequence_length' with the desired value
        # print('x_ver: ', x_ver.shape)
        # x_ver = x_ver.squeeze().permute(0, 2, 1)
        # print('x_ver: ', x_ver.shape)
        # x_ver , (hn_ver, cn_ver) = self.lstm(x_ver)
        # print('x: ', x.shape)
        # print('hn: ', hn.shape)
        # x = torch.concat([hn_hor[-1], hn_ver[-1]], dim=1)
        # print(x.shape)
        x = self.flatten(x)
        x = self.fc_layers(x)
        return x

    
# LSTM only model architecture.
class lstm_nn(nn.Module):
    def __init__(self, num_labels):
        super().__init__()

        self.dropout = 0.25

        self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=2, batch_first=True, bidirectional=True, dropout=0.25)

        self.flatten = nn.Flatten()

        self.fc_layers = nn.Sequential(
            nn.Linear(512, int(12288/2)),
            nn.BatchNorm1d(num_features=int(12288/2)),
            nn.Sigmoid(),
            nn.Linear(int(12288/2), 12288),
            nn.BatchNorm1d(num_features=12288),
            nn.Sigmoid(),
            nn.Linear(12288, num_labels),
            # nn.Softmax(dim=0)
        )

    def forward(self, x):
        # x = F.resize(x, (self.input_height, self.input_width))
        # x = F.normalize(x, mean=[0.5], std=[0.5])
        # print(x.shape)
        # x = self.conv_layers(x)
        x_hor = nn.AdaptiveAvgPool2d((1, 100))(x)  # Replace 'sequence_length' with the desired value
        # print('x_hor: ', x_hor.shape)
        x_hor = x_hor.squeeze(1).permute(0, 2, 1)
        # print('x_hor: ', x_hor.shape)
        x_hor , (hn_hor, cn_hor) = self.lstm(x_hor)

        x_ver = nn.AdaptiveAvgPool2d((80, 1))(x)  # Replace 'sequence_length' with the desired value
        # print('x_ver: ', x_ver.shape)
        x_ver = x_ver.squeeze(3).permute(0, 2, 1)
        # print('x_ver: ', x_ver.shape)
        x_ver , (hn_ver, cn_ver) = self.lstm(x_ver)
        # print('x: ', x.shape)
        # print('hn: ', hn.shape)
        x = torch.concat([hn_hor[-1], hn_ver[-1]], dim=1)
        # print(x.shape)
        # x = x.permute(1, 0, 2)
        # print(x.shape)
        x = self.flatten(x)
        x = self.fc_layers(x)
        return x

Create optional methods for weight initialization 

In [25]:
def weights_init_he(m):
    if isinstance(m, nn.Linear):
        nn.init.kaiming_uniform_(m.weight.data, mode='fan_in')
        nn.init.zeros_(m.bias.data)

def weights_init_xavier(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight.data)
        nn.init.zeros_(m.bias.data)


In [26]:
# Instantiate the CNN model and the loss and optimizer functions.
model = lstm_CNN_sigm(num_labels=len_labels)

# Init weights (optional)
#model.apply(weights_init_xavier)

# Init loss function (with or without label weights)
criterion = nn.CrossEntropyLoss()
#criterion = nn.CrossEntropyLoss(weight=class_weights)

# Init Optimization Algorithm
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# define the scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True,)

# Put the model on the CPU or GPU (Cuda)
model.to(device)

lstm_CNN_sigm(
  (conv_layers): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Sigmoid()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Sigmoid()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): Sigmoid()
    (11): Dropout(p=0.25, inplace=False)
  )
  (lstm): LSTM(256, 512, num_layers=2, batch_first=True, dropout=0.25, bidirectional=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc_layers): Sequential(
    (0): Linear(in_featur

## Training<a class="anchor" id="fifth-bullet"></a>

[Back to the Table of Contents](#zero-bullet)

Chose the path and save the desired architecture

In [None]:
path_to_model = r'models\complex_CNN_lstm_sigmoid'
architecture_num = 5
run = 1

In [27]:
with open(fr'{path_to_model}\architecture_{architecture_num}\architecture.txt', 'w') as f:
    print(model, file=f)

Ensure that the path to the model folder exists. This folder should include subfolder "architecture_{architecture_num}" and subfolder "logs"

In [28]:
# Initialize a SummaryWriter
writer = SummaryWriter(fr'{path_to_model}\architecture_{architecture_num}\logs\run_{run}')

best_val_acc = 0.0
best_val_loss = np.inf
best_loss_epoch = 0
best_acc_epoch = 0
patience = 0

# Train the model
for epoch in range(num_epochs):
    running_loss = 0.0
    correct = 0
    total = 0

    # Set the model to training mode
    model.train()

    for data, target in train_dataset:
        target = target.type(torch.LongTensor)
        data, target = data.to(device), target.to(device)
        # data = data.permute(0, 3, 1, 2)
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        output = model(data)
        loss = criterion(output, target)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Compute running loss and accuracy
        running_loss += loss.item() * data.size(0)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

    # Compute training loss and accuracy
    train_loss = running_loss / len(train_dataset)
    train_acc = 100.0 * correct / total

    # Write to TensorBoard
    writer.add_scalar('Training Loss', train_loss, epoch)
    writer.add_scalar('Training Accuracy', train_acc, epoch)

    # Evaluate the model on the validation set
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in val_dataset:
            target = target.type(torch.LongTensor)
            data, target = data.to(device), target.to(device)
            # data = data.permute(0, 3, 1, 2)

            # Forward pass
            output = model(data)
            loss = criterion(output, target)

            # Compute running loss and accuracy
            running_loss += loss.item() * data.size(0)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    # Compute validation loss and accuracy
    val_loss = running_loss / len(val_dataset)
    val_acc = 100.0 * correct / total 
    scheduler.step(val_loss)

    # Write to TensorBoard
    writer.add_scalar('Validation Loss', val_loss, epoch)
    writer.add_scalar('Validation Accuracy', val_acc, epoch)

    if val_loss < best_val_loss:
        patience = 0
        best_val_loss = val_loss
        best_loss_epoch = epoch
        # Save the model's state dictionary
        torch.save(model.state_dict(), fr'{path_to_model}\architecture_{architecture_num}\best_loss_model.pth')
    else:
        patience += 1

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_acc_epoch = epoch
        # Save the model's state dictionary
        torch.save(model.state_dict(), fr'{path_to_model}\architecture_{architecture_num}\best_acc_model.pth')

    # Print training and validation statistics
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Los'
          f''
          f's: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
    if train_acc > 99.9:
        break

torch.save(model.state_dict(), fr'{path_to_model}\architecture_{architecture_num}\last_epoch.pth')
# Close the SummaryWriter
writer.close()

Epoch [1/100], Train Loss: 315.2178, Train Acc: 0.42%, Val Loss: 321.2847, Val Acc: 0.39%
Epoch [2/100], Train Loss: 235.9790, Train Acc: 2.64%, Val Loss: 279.2842, Val Acc: 2.07%
Epoch [3/100], Train Loss: 180.2233, Train Acc: 9.47%, Val Loss: 190.3739, Val Acc: 10.67%
Epoch [4/100], Train Loss: 138.3776, Train Acc: 21.36%, Val Loss: 273.5992, Val Acc: 2.64%
Epoch [5/100], Train Loss: 108.1334, Train Acc: 33.81%, Val Loss: 150.4012, Val Acc: 25.13%
Epoch [6/100], Train Loss: 85.7527, Train Acc: 44.43%, Val Loss: 228.5961, Val Acc: 14.86%
Epoch [7/100], Train Loss: 68.4395, Train Acc: 53.09%, Val Loss: 163.5896, Val Acc: 23.11%
Epoch [8/100], Train Loss: 55.3748, Train Acc: 60.28%, Val Loss: 138.4757, Val Acc: 35.09%
Epoch [9/100], Train Loss: 44.7948, Train Acc: 66.45%, Val Loss: 146.5965, Val Acc: 29.44%
Epoch [10/100], Train Loss: 36.9958, Train Acc: 71.38%, Val Loss: 151.1981, Val Acc: 35.87%
Epoch [11/100], Train Loss: 29.9925, Train Acc: 76.00%, Val Loss: 166.1921, Val Acc: 26.68

Show the best epochs

In [None]:
best_loss_epoch + 1

In [30]:
best_acc_epoch + 1

23

Quick test of the predictions

In [None]:
_, predicted = torch.max(output.data, 1)

In [None]:
sns.barplot(x=list(range(968)), y=list(torch.softmax(output.data.cpu(), dim=0)[0].numpy()))

In [None]:
list(torch.softmax(output.data.cpu(), dim=0)[0].numpy())

In [None]:
torch.softmax(output.data.cpu(), dim=0)[0]

In [None]:
sns.heatmap(torch.softmax(output.data.cpu(), dim=0))

In [None]:
sns.heatmap(data.cpu()[0].squeeze())

In [None]:
data.shape

To improve the regularization strength of your model, you can try adjusting the hyperparameters related to regularization techniques, such as L1 or L2 regularization, dropout, or early stopping.

L1 and L2 regularization are techniques that add a penalty term to the loss function, which encourages the model to learn simpler weights and reduce overfitting. In PyTorch, you can add L1 or L2 regularization by setting the weight_decay parameter in the optimizer to a nonzero value. For example, you can set weight_decay=0.01 to add L2 regularization to the Adam optimizer:

optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.01)

Dropout is a technique that randomly drops out some of the neurons in a layer during training, which helps prevent the model from relying too heavily on specific neurons and reduces overfitting. In your current architecture, you already have two dropout layers with probabilities of 0.25 and 0.5. You can experiment with adjusting the dropout probabilities to see if it improves the model's performance.

Early stopping is a technique where you stop training the model once the validation loss stops improving or starts to increase. This helps prevent the model from overfitting to the training data and improves its generalization ability. In PyTorch, you can use the torch.optim.lr_scheduler.ReduceLROnPlateau scheduler to reduce the learning rate when the validation loss stops improving:

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, verbose=True)

You can also experiment with adjusting the factor, patience, and verbose parameters to see if it improves the model's performance.

In [35]:
# Free memory
torch.cuda.empty_cache()

## Instant evaluation logic<a class="anchor" id="sixth-bullet"></a>

[Back to the Table of Contents](#zero-bullet)

In [42]:
# extract each 10th spectrogram from the RHS
test_data_set = TensorDataset(*spectrograms[::10])

In [44]:
test_data_set = DataLoader(test_data_set, shuffle=seed, batch_size=batch_size)

In [46]:
state_dict = torch.load(fr'{path_to_model}\architecture_{architecture_num}/best_loss_model.pth')

In [47]:
model.load_state_dict(state_dict)

<All keys matched successfully>

In [48]:
model.eval()

lstm_CNN(
  (conv_layers): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Dropout(p=0.25, inplace=False)
  )
  (lstm): LSTM(128, 256, num_layers=2, batch_first=True, dropout=0.25, bidirectional=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc_layers): Sequential(
    (0): Linear(in_features=256, out_features=2048, bias=True)
    (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=2048, out_features=1000, b

In [49]:
running_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_data_set:
                target = target.type(torch.LongTensor)
                data, target = data.to(device), target.to(device)
                # data = data.permute(0, 3, 1, 2)

                # Forward pass
                output = model(data)
                loss = criterion(output, target)

                # Compute running loss and accuracy
                running_loss += loss.item() * data.size(0)
                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()

    # Compute validation loss and accuracy
    val_loss = running_loss / len(test_data_set)
    val_acc = 100.0 * correct / total

In [51]:
print(f'Evaluation - Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')


Evaluation - Val Loss: 0.0149, Val Acc: 100.00%
