**Model training in three ways:**

- using images obtained with `MFCC` in `07_preprocessing_mfcc.ipynb`
- using spectrograms transformed by `MFCC` on the fly
- using spectrograms transformed by `MFCC` on the fly with augmentations

In [1]:
import os
import yaml
import joblib
#
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
#
import torchaudio
import torchaudio.transforms as T
import torchaudio.functional as F
#
import torchvision
from torchvision.io import read_image
from torchvision.transforms import ConvertImageDtype
#
import matplotlib.pyplot as plt
#
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [2]:
print(torch.__version__)
print(torchaudio.__version__)
print(torchvision.__version__)

2.3.0+cu121
2.3.0+cu121
0.18.0+cu121


In [3]:
raw_path = 'data/raw/'
data_path = 'data/mfcc/'

In [4]:
LEARNING_RATE = 0.001

In [5]:
labels = yaml.safe_load(open('labels.yaml'))[2022]

In [6]:
X, y = zip(*labels.items())
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0, shuffle=False)

In [7]:
def training(model, optimizer, criterion, dataloader):
    losses, y_true, y_score = [], [], []
    #
    model.train()
    for X, Y in dataloader:
        X, Y = X.to(device), Y.to(device, dtype=torch.float32)
        #
        output = model(X)
        loss = criterion(output.view(1), Y)
        #
        losses.append(loss.item())
        y_true.append(Y.item())
        y_score.append(torch.sigmoid(output).item())
        #
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return torch.tensor(losses).mean(), roc_auc_score(y_true, y_score)


def testing(model, optimizer, criterion, dataloader):
    losses, y_true, y_score = [], [], []
    #
    model.eval()
    with torch.inference_mode():
        for X, Y in dataloader:
            X, Y = X.to(device), Y.to(device, dtype=torch.float32)
            #
            output = model(X)
            loss = criterion(output.view(1), Y)
            #
            losses.append(loss.item())
            y_true.append(Y.item())
            y_score.append(torch.sigmoid(output).item())
    #
    return torch.tensor(losses).mean(), roc_auc_score(y_true, y_score)

# Modelling using images

In [8]:
device = 'cpu'

In [9]:
from utils.miscellaneous import update_stats
from utils.plot import get_figure, update_figure

In [10]:
class Podcast(Dataset):

    def __init__(self, X, y):
        self.x = X
        self.y = y
        self.cid = ConvertImageDtype(torch.float32)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        label = self.y[idx]
        episode = self.x[idx]
        image = read_image(f'{data_path}/{episode}.png')
        return self.cid(image), int(label)

In [11]:
train = Podcast(X_train, y_train)
train_dataloader = DataLoader(train, shuffle=True)

In [12]:
test = Podcast(X_test, y_test)
test_dataloader = DataLoader(test)

In [13]:
class CNNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=4, out_channels=4, kernel_size=(2, 4)),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=(2, 4))
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=4, out_channels=4, kernel_size=(3, 6)),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=(3, 6))
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=4, out_channels=4, kernel_size=(5, 10)),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d(output_size=(5, 10))
        )
        #
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.33)
        self.linear1 = nn.Linear(200, 32)
        self.linear2 = nn.Linear(32, 1)

    def forward(self, x):
        shapes = [x.shape]
        # print()
        x = self.layer1(x)
        shapes.append(x.shape)
        x = self.layer2(x)
        shapes.append(x.shape)
        x = self.layer3(x)
        # shapes.append(x.shape)
        #
        # print(shapes)
        x = self.flatten(x)
        # print(x.shape)
        x = self.relu(self.linear1(x))
        x = self.dropout(x)
        # print(x.shape, self.training)
        #
        print('=' if self.training else '-', end='')
        return self.linear2(x)


model = CNNet().to(device, dtype=torch.float32)

In [14]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [15]:
filename = '08_mfcc_images'

In [None]:
# !rm artifacts/08_mfcc_images.joblib
# !rm artifacts/08_mfcc_images.pt

In [16]:
if os.path.exists(f'artifacts/{filename}.pt'):
    model.load_state_dict(
        torch.load(f'artifacts/{filename}.pt')
    )
    print('!Model weights are loaded.')
    
if os.path.exists(f'artifacts/{filename}.joblib'):
    stats = joblib.load(f'artifacts/{filename}.joblib')
    print('!Training stats is loaded.')
else:
    stats = {
        'epoch': [],
        'training_loss': [],
        'testing_loss': [],
        'training_auc': [],
        'testing_auc': [],
    }
    
se = stats['epoch'][-1] if stats['epoch'] else 0

In [17]:
figure = get_figure(stats)
figure

FigureWidget({
    'data': [{'line': {'color': 'black', 'dash': 'dash', 'width': 0.5},
              'marker': {'color': 'white', 'line': {'color': 'black', 'width': 1}, 'size': 6},
              'mode': 'lines+markers',
              'name': 'Training',
              'type': 'scatter',
              'uid': '5e3b2cf9-d500-4d83-9e78-be372e64baa0',
              'x': [],
              'xaxis': 'x',
              'y': [],
              'yaxis': 'y'},
             {'line': {'color': 'red', 'dash': 'dash', 'width': 0.5},
              'marker': {'color': 'white', 'line': {'color': 'red', 'width': 1}, 'size': 6},
              'mode': 'lines+markers',
              'name': 'Testing',
              'type': 'scatter',
              'uid': '20b84428-f359-4b48-ac37-f245f8254328',
              'x': [],
              'xaxis': 'x',
              'y': [],
              'yaxis': 'y'},
             {'line': {'color': 'black', 'dash': 'dash', 'width': 0.5},
              'marker': {'color': 'white', '

In [18]:
for epoch in range(se, 200):
    train_loss, train_auc = training(model, optimizer, criterion, train_dataloader)
    test_loss, test_auc = testing(model, optimizer, criterion, test_dataloader)
    #
    update_stats(stats, epoch, train_loss, test_loss, train_auc, test_auc)
    update_figure(figure, stats)
    #
    joblib.dump(stats, f'artifacts/{filename}.joblib')
    if epoch % 5 == 0:
        if len(stats['testing_loss']) >= 10:
            x0 = torch.tensor(stats['testing_loss'][-10:-5]).mean()
            if x0 > torch.tensor(stats['testing_loss'][-5:]).mean():
                torch.save(
                    model.state_dict(),
                    f'artifacts/{filename}.pt'
                )
                print('!Model weights were saved.')

Epoch: 0 | Training loss: 0.6676 | Testing loss: 0.5911 | Training AUC: 0.5438 | Testing AUC: 0.6917
Epoch: 1 | Training loss: 0.6433 | Testing loss: 0.5823 | Training AUC: 0.3953 | Testing AUC: 0.6992
Epoch: 2 | Training loss: 0.6111 | Testing loss: 0.6061 | Training AUC: 0.5057 | Testing AUC: 0.6917
Epoch: 3 | Training loss: 0.5888 | Testing loss: 0.6063 | Training AUC: 0.5893 | Testing AUC: 0.7293
Epoch: 4 | Training loss: 0.6083 | Testing loss: 0.5949 | Training AUC: 0.4570 | Testing AUC: 0.7293
Epoch: 5 | Training loss: 0.6081 | Testing loss: 0.5864 | Training AUC: 0.4724 | Testing AUC: 0.7444
Epoch: 6 | Training loss: 0.5903 | Testing loss: 0.5909 | Training AUC: 0.5893 | Testing AUC: 0.7143
Epoch: 7 | Training loss: 0.6472 | Testing loss: 0.5904 | Training AUC: 0.5674 | Testing AUC: 0.7218
Epoch: 8 | Training loss: 0.6301 | Testing loss: 0.5894 | Training AUC: 0.3677 | Testing AUC: 0.7293
Epoch: 9 | Training loss: 0.6074 | Testing loss: 0.5893 | Training AUC: 0.4992 | Testing AU

# Modelling `as is`

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [9]:
from utils.spectrograms import get_spectrogram
from utils.miscellaneous import update_stats
from utils.plot import get_figure, update_figure

In [10]:
class Podcast(Dataset):

    def __init__(self, X, y):
        self.x = X
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        label = self.y[idx]
        episode = self.x[idx]
        #
        waveform, sample_rate = torchaudio.load(f'{raw_path}/{episode}.mp3')
        #
        return get_spectrogram(waveform.mean(dim=0), sample_rate, 'mfcc'), label

In [11]:
train = Podcast(X_train, y_train)
train_dataloader = DataLoader(train, shuffle=True)

In [12]:
test = Podcast(X_test, y_test)
test_dataloader = DataLoader(test)

In [13]:
class CNNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(2, 4)),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=(2, 4))
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3, 6)),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=(3, 6))
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(5, 10)),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d(output_size=(5, 10))
        )
        #
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.33)
        self.linear1 = nn.Linear(50, 8)
        self.linear2 = nn.Linear(8, 1)

    def forward(self, x):
        # print('.', end='')
        shapes = [x.shape]
        # print()
        x = self.layer1(x)
        shapes.append(x.shape)
        x = self.layer2(x)
        shapes.append(x.shape)
        x = self.layer3(x)
        # print(shapes, self.training)
        #
        x = self.flatten(x)
        # print(x.shape)
        x = self.relu(self.linear1(x))
        x = self.dropout(x)
        print('=' if self.training else '-', end='')
        #
        return self.linear2(x)


model = CNNet().to(device, dtype=torch.float32)

In [14]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [15]:
filename = '08_mfcc'

In [14]:
# !rm artifacts/08_mfcc.joblib
# !rm artifacts/08_mfcc.pt

In [16]:
if os.path.exists(f'artifacts/{filename}.pt'):
    model.load_state_dict(
        torch.load(f'artifacts/{filename}.pt')
    )
    print('!Model weights are loaded.')
    
if os.path.exists(f'artifacts/{filename}.joblib'):
    stats = joblib.load(f'artifacts/{filename}.joblib')
    print('!Training stats is loaded.')
else:
    stats = {
        'epoch': [],
        'training_loss': [],
        'testing_loss': [],
        'training_auc': [],
        'testing_auc': [],
    }
    
se = stats['epoch'][-1] if stats['epoch'] else 0

!Model weights are loaded.
!Training stats is loaded.


In [17]:
figure = get_figure(stats)
figure

FigureWidget({
    'data': [{'line': {'color': 'black', 'dash': 'dash', 'width': 0.5},
              'marker': {'color': 'white', 'line': {'color': 'black', 'width': 1}, 'size': 6},
              'mode': 'lines+markers',
              'name': 'Training',
              'type': 'scatter',
              'uid': '3f68674d-9163-4d53-8842-c1965f96482f',
              'x': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14, 15, 16,
                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
                    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45],
              'xaxis': 'x',
              'y': [0.6181247234344482, 0.611427903175354, 0.5844593644142151,
                    0.5849707126617432, 0.526597261428833, 0.5729634761810303,
                    0.5079140067100525, 0.5262066721916199, 0.516080379486084,
                    0.5420356392860413, 0.5084055066108704, 0.49899885058403015,
                    0.49587610363960266, 0.5069407820701599, 0.482

In [None]:
for epoch in range(se, 200):
    train_loss, train_auc = training(model, optimizer, criterion, train_dataloader)
    test_loss, test_auc = testing(model, optimizer, criterion, test_dataloader)
    #
    update_stats(stats, epoch, train_loss, test_loss, train_auc, test_auc)
    update_figure(figure, stats, filename)
    #
    joblib.dump(stats, f'artifacts/{filename}.joblib')
    if epoch % 5 == 0:
        if len(stats['testing_loss']) >= 10:
            x0 = torch.tensor(stats['testing_loss'][-10:-5]).mean()
            if x0 > torch.tensor(stats['testing_loss'][-5:]).mean():
                torch.save(
                    model.state_dict(),
                    f'artifacts/{filename}.pt'
                )
                print('!Model weights were saved.')

=


Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)





## + augmentations

In [34]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [35]:
from utils.augmentations import augment, masking
from utils.spectrograms import get_spectrogram
from utils.miscellaneous import update_stats
from utils.plot import get_figure, update_figure

In [36]:
class Podcast(Dataset):

    def __init__(self, X, y, clipping=True, training=True):
        self.x = X
        self.y = y
        self.clipping = clipping
        self.training = training

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        label = self.y[idx]
        episode = self.x[idx]
        #
        waveform, sample_rate = torchaudio.load(f'{raw_path}/{episode}.mp3')
        waveform = waveform.mean(dim=0)
        #
        if self.training:
            waveform = augment(waveform, sample_rate, self.clipping)
        spectrogram = get_spectrogram(waveform, sample_rate, 'mfcc')
        if self.training:
            spectrogram = masking(spectrogram)
        #
        return spectrogram, label

In [37]:
train = Podcast(X_train, y_train)
train_dataloader = DataLoader(train, shuffle=True)

In [38]:
test = Podcast(X_test, y_test, training=False)
test_dataloader = DataLoader(test)

In [49]:
class CNNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(2, 2)),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=(2, 2))
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3, 3)),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=(3, 3))
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(5, 5)),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d(output_size=(16, 32))
        )
        #
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.33)
        self.linear1 = nn.Linear(512, 64)
        self.linear2 = nn.Linear(64, 1)

    def forward(self, x):
        # print('.', end='')
        shapes = [x.shape]
        # print()
        x = self.layer1(x)
        shapes.append(x.shape)
        x = self.layer2(x)
        shapes.append(x.shape)
        x = self.layer3(x)
        print(shapes, self.training)
        #
        x = self.flatten(x)
        # print(x.shape)
        x = self.relu(self.linear1(x))
        x = self.dropout(x)
        # print('=' if self.training else '-', end='')
        #
        return self.linear2(x)


model = CNNet().to(device, dtype=torch.float32)

In [50]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [51]:
train_loss, train_auc = training(model, optimizer, criterion, train_dataloader)

[torch.Size([1, 128, 227]), torch.Size([1, 63, 113]), torch.Size([1, 20, 37])] True
[torch.Size([1, 128, 209]), torch.Size([1, 63, 104]), torch.Size([1, 20, 34])] True
[torch.Size([1, 128, 227]), torch.Size([1, 63, 113]), torch.Size([1, 20, 37])] True
[torch.Size([1, 128, 227]), torch.Size([1, 63, 113]), torch.Size([1, 20, 37])] True
[torch.Size([1, 128, 227]), torch.Size([1, 63, 113]), torch.Size([1, 20, 37])] True
[torch.Size([1, 128, 227]), torch.Size([1, 63, 113]), torch.Size([1, 20, 37])] True
[torch.Size([1, 128, 227]), torch.Size([1, 63, 113]), torch.Size([1, 20, 37])] True
[torch.Size([1, 128, 209]), torch.Size([1, 63, 104]), torch.Size([1, 20, 34])] True
[torch.Size([1, 128, 227]), torch.Size([1, 63, 113]), torch.Size([1, 20, 37])] True
[torch.Size([1, 128, 227]), torch.Size([1, 63, 113]), torch.Size([1, 20, 37])] True
[torch.Size([1, 128, 227]), torch.Size([1, 63, 113]), torch.Size([1, 20, 37])] True
[torch.Size([1, 128, 227]), torch.Size([1, 63, 113]), torch.Size([1, 20, 37]

In [19]:
filename = '08_mfcc_augmented'

In [None]:
# !rm artifacts/08_mfcc_augmented.joblib
# !rm artifacts/08_mfcc_augmented.pt

In [20]:
if os.path.exists(f'artifacts/{filename}.pt'):
    model.load_state_dict(
        torch.load(f'artifacts/{filename}.pt')
    )
    print('!Model weights are loaded.')
    
if os.path.exists(f'artifacts/{filename}.joblib'):
    stats = joblib.load(f'artifacts/{filename}.joblib')
    print('!Training stats is loaded.')
else:
    stats = {
        'epoch': [],
        'training_loss': [],
        'testing_loss': [],
        'training_auc': [],
        'testing_auc': [],
    }
    
se = stats['epoch'][-1] if stats['epoch'] else 0

In [21]:
figure = get_figure(stats)
figure

FigureWidget({
    'data': [{'line': {'color': 'black', 'dash': 'dash', 'width': 0.5},
              'marker': {'color': 'white', 'line': {'color': 'black', 'width': 1}, 'size': 6},
              'mode': 'lines+markers',
              'name': 'Training',
              'type': 'scatter',
              'uid': 'edc7e996-62e5-4959-af8c-b5de8b11f1bd',
              'x': [],
              'xaxis': 'x',
              'y': [],
              'yaxis': 'y'},
             {'line': {'color': 'red', 'dash': 'dash', 'width': 0.5},
              'marker': {'color': 'white', 'line': {'color': 'red', 'width': 1}, 'size': 6},
              'mode': 'lines+markers',
              'name': 'Testing',
              'type': 'scatter',
              'uid': 'd1c68424-6ef3-4598-bb1a-22cce02b78dd',
              'x': [],
              'xaxis': 'x',
              'y': [],
              'yaxis': 'y'},
             {'line': {'color': 'black', 'dash': 'dash', 'width': 0.5},
              'marker': {'color': 'white', '

In [24]:
for epoch in range(se, 200):
    train_loss, train_auc = training(model, optimizer, criterion, train_dataloader)
    test_loss, test_auc = testing(model, optimizer, criterion, test_dataloader)
    #
    update_stats(stats, epoch, train_loss, test_loss, train_auc, test_auc)
    update_figure(figure, stats)
    #
    joblib.dump(stats, f'artifacts/{filename}.joblib')
    if epoch % 5 == 0:
        if len(stats['testing_loss']) >= 10:
            x0 = torch.tensor(stats['testing_loss'][-10:-5]).mean()
            if x0 > torch.tensor(stats['testing_loss'][-5:]).mean():
                torch.save(
                    model.state_dict(),
                    f'artifacts/{filename}.pt'
                )
                print('!Model weights were saved.')

=


Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)



Epoch: 0 | Training loss: 0.6839 | Testing loss: 0.6688 | Training AUC: 0.5463 | Testing AUC: 0.5000
Epoch: 1 | Training loss: 0.6566 | Testing loss: 0.6394 | Training AUC: 0.4830 | Testing AUC: 0.5000
Epoch: 2 | Training loss: 0.6341 | Testing loss: 0.6189 | Training AUC: 0.5779 | Testing AUC: 0.5000
Epoch: 3 | Training loss: 0.6154 | Testing loss: 0.5984 | Training AUC: 0.3945 | Testing AUC: 0.5000
Epoch: 4 | Training loss: 0.6063 | Testing loss: 0.5902 | Training AUC: 0.4635 | Testing AUC: 0.5000
Epoch: 5 | Training loss: 0.5996 | Testing loss: 0.5872 | Training AUC: 0.4399 | Testing AUC: 0.5000
Epoch: 6 | Training loss: 0.5977 | Testing loss: 0.5852 | Training AUC: 0.3693 | Testing AUC: 0.5000

KeyboardInterrupt: 