## Set up paths and imports

In [1]:
import os

import torch
import torch.nn as nn
from torchvision import transforms

if not os.path.exists("./notebooks"):
    %cd ..

from src.training import train, validate
from src.dataset import prepare_dataset_loaders
from src.data_processing import load_mean_std
from src.config import DATASET_DIR, PATIENCE_THRESHOLD, VALID_ACCESS_LABELS

wandb_enabled = False

  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


/home/mytkom/Documents/iml


## 1. Load standarization data and define Config

In [2]:
class Config:
    def __init__(self, lr=0.001, epochs=40, batch_size=32):
        self.learning_rate = lr
        self.epochs = epochs
        self.batch_size = batch_size

### Optionally initialize W&B project

In [3]:
import wandb

wandb_enabled = True

## 2. Define training and validation loop

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def do_train(name, train_loader, val_loader, config, model, criterion, optimizer):
    if wandb_enabled:
            wandb.init(name=name, project="iml", config=vars(config))
 
    model.device = device
    model.to(device)

    saved = False
    patience = 0
    best_f1 = -1

    for epoch in range(config.epochs):
        print(f"Epoch {epoch+1}/{config.epochs}")

        if wandb_enabled:
            logger = wandb.log
        else:
            logger = lambda data,step: print(f"  Step {step}: {data}")

        train(model, train_loader, criterion, optimizer, epoch, logger, len(train_loader) // 5 - 1)
        metrics = validate(model, val_loader)
        print(metrics)

        if wandb_enabled:
            wandb.log({"validation/recall": metrics.recall, "validation/accuracy": metrics.accuracy, "validation/precision": metrics.precision, "validation/f1": metrics.f1, "epoch": epoch+1})

        if metrics.f1 < best_f1:
            patience = patience + 1
        else:
            patience = 0
            best_f1 = metrics.f1
        if patience >= PATIENCE_THRESHOLD:
            model_path = f"./models/{name}.pth"
            os.makedirs(os.path.dirname(model_path), exist_ok=True)
            torch.save(model.state_dict(), model_path)
            saved = True

    if(saved == False):
            model_path = f"./models/{name}.pth"
            os.makedirs(os.path.dirname(model_path), exist_ok=True)
            torch.save(model.state_dict(), model_path)

    if wandb_enabled:
        wandb.save(model_path)
        wandb.finish()
    


In [5]:
# EfficientNetB0
from torchvision.models import efficientnet_b0
from torchvision.models import EfficientNet_B0_Weights

weights = EfficientNet_B0_Weights.DEFAULT
pretrained_model = efficientnet_b0(weights=weights)
pre_trans = weights.transforms()
name_base="EfficientNet_B0"

In [5]:
# VGG16
from torchvision.models import vgg16
from torchvision.models import VGG16_Weights

weights = VGG16_Weights.DEFAULT
pretrained_model = vgg16(weights=weights)
pre_trans = weights.transforms()
name_base="VGG16"

In [10]:

from torchvggish import vggish, VGGISH_WEIGHTS

pretrained_model = vggish(postprocess=False)
name_base="VGGish"

# Our own classifier
N_CLASSES = 2

my_model = nn.Sequential(
    pretrained_model,
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, N_CLASSES)
)

In [26]:
# Freeze base model (transfer learning)
pretrained_model.requires_grad_(False)
next(iter(pretrained_model.parameters())).requires_grad
name = name_base + "_transfer_learning"

In [6]:
# Do not freeze model
name = name_base + "_fine_tuning"

In [7]:
# Our own classifier
N_CLASSES = 2

num_features = pretrained_model.classifier[0].in_features
pretrained_model.classifier = nn.Sequential(
    nn.Linear(num_features, 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, N_CLASSES)
)
my_model = pretrained_model

In [8]:
from torch.utils.data import Dataset
from PIL import Image

class SpectrogramVGG16Dataset(Dataset):
    def __init__(self, directory, transform=None):
        self.files = [
            os.path.join(directory, f)
            for f in os.listdir(directory)
            if f.endswith(".png")
        ]
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        """
        Retrieves an image and its label.

        Parameters:
            idx (int): Index of the image in the dataset.

        Returns:
            tuple: A tuple containing the transformed image and its label.
        """
        img_path = self.files[idx]
        speaker_id = img_path.split("/")[-1].split("_")[0]
        label = int(speaker_id in VALID_ACCESS_LABELS)

        image = Image.open(img_path).convert("RGB")
        image = pre_trans(image)

        if self.transform:
            image = self.transform(image)

        return image, label
    

In [9]:
model = my_model
config = Config(batch_size=32, epochs=40, lr=0.0001)
transform = transforms.Compose([])
train_loader, val_loader, test_loader = prepare_dataset_loaders(transform, config.batch_size, SpectrogramVGG16Dataset)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

do_train(name, train_loader, val_loader, config, model, criterion, optimizer)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmytkom[0m ([33mmytkom-warsaw-university-of-technology[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/40
Metrics:
    F1: 0.87,
    Accuracy: 0.84,
    Recall: 0.79,
    Precision: 0.98,
    False acceptance: 0.04,
    False rejection: 0.21
Epoch 2/40
Metrics:
    F1: 0.87,
    Accuracy: 0.84,
    Recall: 0.78,
    Precision: 0.99,
    False acceptance: 0.02,
    False rejection: 0.22
Epoch 3/40
Metrics:
    F1: 0.95,
    Accuracy: 0.93,
    Recall: 0.96,
    Precision: 0.94,
    False acceptance: 0.13,
    False rejection: 0.04
Epoch 4/40
Metrics:
    F1: 0.94,
    Accuracy: 0.92,
    Recall: 0.93,
    Precision: 0.96,
    False acceptance: 0.09,
    False rejection: 0.07
Epoch 5/40
Metrics:
    F1: 0.94,
    Accuracy: 0.91,
    Recall: 0.90,
    Precision: 0.97,
    False acceptance: 0.06,
    False rejection: 0.10
Epoch 6/40
Metrics:
    F1: 0.91,
    Accuracy: 0.87,
    Recall: 0.96,
    Precision: 0.87,
    False acceptance: 0.32,
    False rejection: 0.04
Epoch 7/40
Metrics:
    F1: 0.95,
    Accuracy: 0.93,
    Recall: 0.93,
    Precision: 0.97,
    False acceptance: 0.0