# Training with DS-CNN

## Import the required modules

In [None]:
import numpy as np
import os
import pandas as pd
import random
import torch
import torchaudio
import torchaudio.transforms as T

from time import time
from torch import nn
from torch.utils.data import Dataset

from msc_dataset_lab4 import MSCDataset

## Define the Hyperparameters

In [2]:
CFG = {
    'sampling_rate': 16000,
    'frame_length_in_s': 0.04,
    'frame_step_in_s': 0.02,
    'n_mels': 40,
    'f_min': 0,
    'f_max': 8000,
    'n_mfcc': 40,
    'seed': 0,
    'train_steps': 2000,
    'train_batch_size': 32,
}

## Define the target classes

In [None]:
# Define the set of target classes
CLASSES = ['start', 'stop', 'other']

## Set Deterministic Behaviour

In [3]:
torch.manual_seed(CFG['seed'])
np.random.seed(CFG['seed'])
random.seed(CFG['seed'])

## Create Datasets and Dataloaders for train/test

In [None]:
transform = T.MFCC(
    sample_rate=16000,
    n_mfcc=CFG['n_mfcc'],
    log_mels=True,
    melkwargs=dict(
        # Spectrogram parameters
        n_fft=int(CFG['frame_length_in_s'] * CFG['sampling_rate']),
        win_length=int(CFG['frame_length_in_s'] * CFG['sampling_rate']),
        hop_length=int(CFG['frame_step_in_s'] * CFG['sampling_rate']),
        center=False,
        # Mel Spectrogram paramaters
        f_min=CFG['f_min'],
        f_max=CFG['f_max'],
        n_mels=CFG['n_mels'],
    )
)

# Instantiate train_ds and test_ds objects
train_ds = MSCDataset(
    root='.',
    classes=CLASSES,
    split='training',
    preprocess=transform,
    download=True,
)

test_ds = MSCDataset(
    root='.',
    classes=CLASSES,
    split='testing',
    preprocess=transform,
    download=True,
)

sampler = torch.utils.data.RandomSampler(
    train_ds,
    replacement=True,
    num_samples=CFG['train_steps'] * CFG['train_batch_size'],
)
train_loader = torch.utils.data.DataLoader(
    train_ds,
    batch_size=CFG['train_batch_size'],
    sampler=sampler,
    num_workers=2,
)

test_loader = torch.utils.data.DataLoader(
    test_ds, batch_size=100, num_workers=2
)

## Create the Model

In [None]:
# DS-CNN Model with Depthwise Separable Convolutions
class DSCNN(nn.Module):
    def __init__(self, num_classes=8):
        super(DSCNN, self).__init__()
        
        # Initial 2D Conv
        self.conv1 = nn.Conv2d(1, 128, kernel_size=3, stride=2, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(128)
        self.relu1 = nn.ReLU()
        
        # First DS Block: DConv + SConv
        self.dconv1 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, groups=128, bias=False)  # Depthwise
        self.bn2 = nn.BatchNorm2d(128)
        self.relu2 = nn.ReLU()
        self.sconv1 = nn.Conv2d(128, 128, kernel_size=1, stride=1, padding=0, bias=False)  # Separable (pointwise)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()
        
        # Second DS Block: DConv + SConv
        self.dconv2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, groups=128, bias=False)  # Depthwise
        self.bn4 = nn.BatchNorm2d(128)
        self.relu4 = nn.ReLU()
        self.sconv2 = nn.Conv2d(128, 128, kernel_size=1, stride=1, padding=0, bias=False)  # Separable (pointwise)
        self.bn5 = nn.BatchNorm2d(128)
        self.relu5 = nn.ReLU()
        
        # Global Average Pooling
        self.gap = nn.AdaptiveAvgPool2d(1)
        
        # Linear classifier
        self.fc = nn.Linear(128, num_classes, bias=True)
    
    def forward(self, x):
        # Initial Conv block
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        
        # First DS block
        x = self.dconv1(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.sconv1(x)
        x = self.bn3(x)
        x = self.relu3(x)
        
        # Second DS block
        x = self.dconv2(x)
        x = self.bn4(x)
        x = self.relu4(x)
        x = self.sconv2(x)
        x = self.bn5(x)
        x = self.relu5(x)
        
        # GAP and classifier
        x = self.gap(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        
        return x

model = DSCNN(num_classes=len(CLASSES))

In [None]:
print(model)

## Define the Training Loss and Optimizer

In [None]:
# Instantiate the loss and optimizer objects
loss_module = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

## Define the Training Loop

In [None]:
# Training loop
model.train()

for step, batch in enumerate(train_loader):
    x = batch['x']
    y = batch['y']
    
    # Forward pass
    output = model(x)
    loss = loss_module(output, y)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if ((step + 1) % 100) == 0 or step == 0:
        print(f'Step={step}; Training Loss={loss.item():.3f}')

## Evaluate the Model

In [None]:
# Evaluation loop
model.eval()

correct = 0
total = 0

with torch.no_grad():
    for batch in test_loader:
        x = batch['x']
        y = batch['y']
        
        output = model(x)
        predictions = output.argmax(dim=1)
        
        correct += (predictions == y).sum().item()
        total += y.size(0)

test_accuracy = (correct / total) * 100

print(f'Test Accuracy: {test_accuracy:.2f}%')

Test Accuracy: 93.25%


## Save the Model

In [None]:
timestamp = int(time())

saved_model_dir = './saved_models/'
if not os.path.exists(saved_model_dir):
    os.makedirs(saved_model_dir)

print(f'Model Timestamp: {timestamp}')

torch.onnx.export(
    transform,  # model to export
    torch.randn(1, 1, 16000),  # inputs of the model,
    f'{saved_model_dir}/{timestamp}_frontend.onnx',  # filename of the ONNX model
    input_names=['input'], # input name in the ONNX model
    dynamo=True,
    optimize=True,
    report=False,
    external_data=False,
)
torch.onnx.export(
    model,  # model to export
    train_ds[0]['x'].unsqueeze(0),  # inputs of the model,
    f'{saved_model_dir}/{timestamp}_model.onnx',  # filename of the ONNX model
    input_names=['input'], # input name in the ONNX model
    dynamo=True,
    optimize=True,
    report=False,
    external_data=False,
)

## Save Hyperparameters & Results

In [11]:
output_dict = {
    'timestamp': timestamp,
    **CFG,
    'test_accuracy': test_accuracy
}

df = pd.DataFrame([output_dict])

output_path='./dscnn_results.csv'
df.to_csv(output_path, mode='a', header=not os.path.exists(output_path), index=False)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=3880e510-b64c-4bb5-b488-c2122d5d9e2d' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>