# Lighter Architectures for Audio Scene Classification

In this notebook, we first train **ResNet34** to classify the audio scenes, and then train a lighter model developed for the DCase21 challenge called **sp4sc**, which implement separable convolutions and merges batch normalization with convolutional layer at test time.

**Note:** In this notebook, we do not compare the results, training times, and performances, this will be done within the final report.

In [None]:
import torch.nn as nn
from torch.utils.data import random_split, DataLoader
import torchvision.models as models
import librosa
import time

#utils
from utils import *

#code from original repository
from sp4asc.models.cnns import LogMelSpectrogram, Cnn6_60k, Cnn6
from sp4asc.models import get_net
from sp4asc.training import TrainingManager
from sp4asc.testing import TestManager



## General Config of Notebook

config = {
    "batchsize": 32,
    "num_workers": 4,
    "reload": False,
    "net": "Cnn6_60k",
    "dropout": 0.2,
    "specAugment": [128, 2, 16, 2],
    "lr": 1e-3,
    "eta_min": 1e-5,
    "max_epoch": 100,
    "weight_decay": 1e-5,
    "mixup_alpha": 0.2,
    "out_dir": "./trained_models/log",
}

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using ",device," device")


## Loading dataset

In [None]:
# create dataset with all the data

dataset = DCaseDataset("data")
length = len(dataset)

val_split = int(0.9*length)
data_train_test, data_val = random_split(dataset,[val_split, length - val_split])
length_train_test = len(data_train_test)
train_n = int(0.7*length_train_test)
eval_n = length_train_test - train_n

# split it into train and test datasets
data_train, data_eval = random_split(data_train_test,[train_n, eval_n])


# ---
loader_train = DataLoader(
    data_train,
    batch_size=config["batchsize"],
    shuffle=True,
    pin_memory=True,
    num_workers=config["num_workers"],
    drop_last=True,
)
loader_test = DataLoader(
    data_eval,
    batch_size=config["batchsize"],
    shuffle=False,
    pin_memory=True,
    num_workers=config["num_workers"],
    drop_last=False,
)
loader_val = DataLoader(
    data_val,
    batch_size=config["batchsize"],
    shuffle=False,
    pin_memory=True,
    num_workers=config["num_workers"],
    drop_last=False,
)



print("train len:", len(data_train))
print("eval len:", len(data_eval))
print("val len:", len(data_val))

## Load Big Model

In [None]:
model = models.resnet34(pretrained=True)

# Replacing the layers to resize the output to 10 
model.fc = nn.Linear(512,10)
model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model = model.to(device)

In [None]:
# pathModel = "trained_models/resnet/resnet34.pth" # find the pretrained resnet, and retrain only two layers
# model.load_state_dict(torch.load(pathModel))

model.eval()
print("number of parameters in the model: ",count_parameters(model))
print(model)


In [None]:
spectrogram = LogMelSpectrogram()
#spec=librosa.feature.melspectrogram(wav, sr=sr, n_fft=n_fft,hop_length=hop_length,n_mels=n_mels,fmin=fmin,fmax=fmax)
#spec_db=librosa.power_to_db(spec,top_db=top_db)

# ---
optim = torch.optim.AdamW(
    [
        {"params": model.parameters()},
    ],
    lr=config["lr"],
    weight_decay=config["weight_decay"],
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optim,
    config["max_epoch"],
    eta_min=config["eta_min"],
)

# --- Training
mng = TrainingManager(
    model,
    spectrogram,
    loader_train,
    loader_test,
    optim,
    scheduler,
    config,
    config["out_dir"],
)

mng.train()


In [None]:

real = []
resume=[]

for i, data in enumerate(loader_test):
    x,y, path=data
    x.to(device)
    #print(path)
    x=spectrogram(x)
    y_pred=model(x)
    #print(y_pred)
    #print(np.shape(y_pred))
    y_pred=torch.argmax(y_pred, dim=1)
    resume.append(y_pred)
    real.append(y)
    print(f'y pred is {y_pred}')
    print(f'y is {y}')
    
    plot_confusion_matrix(real,resume, [0,1,2,3,4,5,6,7,8,9])

## CNN6

We load the CNN6 model

In [None]:
nets = [Cnn6]
get_net = {str(n.__name__): n for n in nets}

config_cnn6 = {
    "batchsize": 32,
    "num_workers": 4,
    "reload": False,
    "net": "Cnn6",
    "dropout": 0.2,
    "specAugment": [128, 2, 16, 2],
    "lr": 1e-3,
    "eta_min": 1e-5,
    "max_epoch": 100,
    "weight_decay": 1e-5,
    "mixup_alpha": 0.2,
    "out_dir": "./trained_models/log",
}

In [None]:
# --- Log dir
path2log = config_cnn6["out_dir"]
os.makedirs(path2log, exist_ok=True)


# --- Get network
spectrogram = LogMelSpectrogram()
net = get_net[config_cnn6["net"]](
    config_cnn6["dropout"],
    config_cnn6["specAugment"],
)

In [None]:
# --- Log dir
path2log = config_cnn6["out_dir"]
os.makedirs(path2log, exist_ok=True)

# --- Get network
spectrogram = LogMelSpectrogram()
net = get_net[config_cnn6["net"]](
    config_cnn6["dropout"],
    config_cnn6["specAugment"],
)


In [None]:
print("\n\nNet at training time")
print(net)
print("Nb. of parameters at training time: ", net.get_nb_parameters() / 1e3, "k")

# ---
optim = torch.optim.AdamW(
    [
        {"params": net.parameters()},
    ],
    lr=config["lr"],
    weight_decay=config["weight_decay"],
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optim,
    config["max_epoch"],
    eta_min=config["eta_min"],
)

# --- Training
mng = TrainingManager(
    net,
    spectrogram,
    loader_train,
    loader_test,
    optim,
    scheduler,
    config,
    path2log,
)

In [None]:
mng.train()

## Distillation

We now load and train the sp4sc model

In [None]:
nets = [Cnn6_60k]
get_net = {str(n.__name__): n for n in nets}

In [None]:
# --- Log dir
path2log = config["out_dir"]
os.makedirs(path2log, exist_ok=True)


# --- Get network
spectrogram = LogMelSpectrogram()
net = get_net[config["net"]](
    config["dropout"],
    config["specAugment"],
)



### Training by distillation

In [None]:
print("\n\nNet at training time")
print(net)
print("Nb. of parameters at training time: ", net.get_nb_parameters() / 1e3, "k")

# ---
optim = torch.optim.AdamW(
    [
        {"params": net.parameters()},
    ],
    lr=config["lr"],
    weight_decay=config["weight_decay"],
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optim,
    config["max_epoch"],
    eta_min=config["eta_min"],
)

# --- Training
mng = TrainingManager(
    net,
    spectrogram,
    loader_train,
    loader_test,
    optim,
    scheduler,
    config,
    path2log,
)


In [None]:
mng.train()

# Evaluation 

Saving current state of our Training Manager

In [None]:
dict_to_save = {
    "epoch": mng.current_epoch,
    "net": mng.net.state_dict(),
    "optim": mng.optim.state_dict(),
    "scheduler": mng.scheduler.state_dict(),
    "config": mng.config,
}

mng.save_state()

Loading Test Manager without Batch Normalization Merging

In [None]:
mngTest = TestManager(
    net,
    spectrogram,
    loader_val,
    loader_test,
    path2model="./trained_models/log/",
)

In [None]:
start_time = time.time()
mngTest.test(merge_bn=False)
print(f'Took {time.time() - start_time} sec to run')

Loading Test Manager without Batch Normalization Merging

In [None]:
mngTest = TestManager(
    net,
    spectrogram,
    loader_val,
    loader_test,
    path2model="./trained_models/log/",
)

In [None]:
# --- Testing
start_time = time.time()
mngTest.test(merge_bn=True)
print(f'Took {time.time() - start_time} sec to run')