# 2. Packages

In [1]:
import torch, torchvision
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim
import time
import torchaudio.transforms as T
from torch.utils.data import DataLoader
import torch.utils.data.dataset as dataset
import pandas as pd
import os
import torchaudio
import cv2
from sklearn.metrics import auc, roc_curve
from models import resnetish18
import pytorch_lightning as pl
from pytorch_lightning.metrics.functional import accuracy
import torch.nn.functional as F


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 3. Dataloader

In [2]:
class AICovidVNDataset(dataset.Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.aicovidvn_data = pd.read_csv(csv_file)
        self.file_path = self.aicovidvn_data['file_path'].values
        self.assessment_result = self.aicovidvn_data['assessment_result'].values
        self.root_dir = root_dir

        self.transform = transform

    def __len__(self):
        return len(self.aicovidvn_data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        SAMPLE_WAV_PATH = os.path.join(self.root_dir, self.file_path[idx])
        waveform, sample_rate = torchaudio.load(SAMPLE_WAV_PATH)
        waveform = waveform.to(device)
        if self.transform:
            waveform = self.transform(waveform)
        target = torch.tensor(self.assessment_result[idx], dtype=torch.long, device=device)
        sample = (waveform, target)
        return sample

# 4. Training

### 4.1. Applying MFCC transforms to the Data

In [3]:
mfcc_transform = T.MFCC(
    sample_rate=8000,
    n_mfcc=256,
    melkwargs={
        'n_fft': 2048,
        'n_mels': 256,
        'hop_length': 512,
        'mel_scale': 'htk',
    }
)

### 4.2. Load data

In [4]:
train_dataset = AICovidVNDataset(csv_file='./data/aicv115m_public_train/metadata_train_challenge.csv',
                                 root_dir='./data/aicv115m_public_train/train_audio_files_8k',
                                 transform=transforms.Compose([
                                     mfcc_transform.to(device),
                                     transforms.Resize(256).to(device),
                                     transforms.CenterCrop(224).to(device)
                                 ]))
lengths = [int(len(train_dataset) * 0.8), len(train_dataset) - int(len(train_dataset) * 0.8)]
train_data, test_data = torch.utils.data.random_split(dataset=train_dataset, lengths=lengths,
                                                      generator=torch.Generator().manual_seed(42))



batch_size = 16
train_data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4)
test_data_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=4)

train_data_size = len(train_data)
test_data_size = len(test_data)

### 4.3. Training

In [5]:
class MyLearner(pl.LightningModule):

    def __init__(self, model, learning_rate=3e-4):

        super().__init__()
        self.learning_rate = learning_rate
        self.model = model

    def forward(self, x):
        x = self.model(x)
        x = F.log_softmax(x, dim=1)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        return loss

    def validation_step(self, batch, batch_idx, split='val'):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = accuracy(preds, y)

        self.log(f'{split}_loss', loss, prog_bar=True)
        self.log(f'{split}_acc', acc, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        return self.validation_step(batch, batch_idx, split='test')

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

    def train_dataloader(self):
        return train_data_loader

    def val_dataloader(self):
        return test_data_loader

    def test_dataloader(self):
        return test_loader

In [None]:
learner = MyLearner(resnetish18(2))
checkpoint = pl.callbacks.ModelCheckpoint(dirpath='models/', monitor='val_acc', verbose=True)
trainer = pl.Trainer(gpus=1, max_epochs=5, callbacks=[checkpoint])
trainer.fit(learner)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type      | Params
------------------------------------
0 | model | ResNetish | 11.2 M
------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.779    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Epoch 0, global step 58: val_acc reached 0.50417 (best 0.50417), saving model to "/media/sondt/ubuntu/Competitive/AICovidVN/models/epoch=0-step=58.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 1, global step 117: val_acc was not in top 1
