In [1]:
import librosa
import argparse
import json
import os
import time
import shutil
import pandas as pd
import numpy as np
import pickle as pkl 
from PIL import Image
import lmdb
import random

import torch
import torch.nn as nn
from torch.utils.data import *
import torchaudio
import torchvision
import torchvision.models as models
from torchsummary import summary

from tqdm import tqdm
from tensorboardX import SummaryWriter

import wandb

In [2]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mcharleestefanski[0m ([33mchars[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
%cd

/home/ubuntu


In [4]:
class MelSpectrogram(object):
    def __init__(self, bins, mode, dataset):
        self.window_length = [25, 50, 100]
        self.hop_length = [10, 25, 50]
        self.fft = 4410
        self.melbins = bins
        self.mode = mode
        self.sr = 44100
        self.length = 250
    def __call__(self, value):
        sample = value
        limits = ((-2, 2), (0.9, 1.2))

        if self.mode=="train":
            pitch_shift = np.random.randint(limits[0][0], limits[0][1] + 1)
            time_stretch = np.random.random() * (limits[1][1] - limits[1][0]) + limits[1][0]
            new_audio = librosa.effects.time_stretch(y = librosa.effects.pitch_shift(y = sample, sr = self.sr, n_steps = pitch_shift), rate = time_stretch)
        else:
            pitch_shift = 0
            time_stretch = 1
            new_audio = sample
        specs = []
        for i in range(len(self.window_length)):
            clip = torch.Tensor(new_audio)

            window_length = int(round(self.window_length[i]*self.sr/1000))
            hop_length = int(round(self.hop_length[i]*self.sr/1000))
            spec = torchaudio.transforms.MelSpectrogram(sample_rate=self.sr, n_fft=self.fft, win_length=window_length, hop_length=hop_length, n_mels=self.melbins)(clip)
            eps = 1e-6
            spec = spec.numpy()
            spec = np.log(spec+ eps)
            spec = np.asarray(torchvision.transforms.Resize((128, self.length))(Image.fromarray(spec)))
            specs.append(spec)
        specs = np.array(specs).reshape(-1, 128, self.length)
        specs = torch.Tensor(specs)
        return specs

class AudioDataset(Dataset):
    def __init__(self, pkl_dir, dataset_name, transforms=None):
        self.transforms = transforms
        self.data = []
        self.length = 1500 if dataset_name=="GTZAN" else 250
        with open(pkl_dir, "rb") as f:
            self.data = pkl.load(f)
    def __len__(self):
        if self.transforms.mode == "train":
            return 2*len(self.data)
        else:
            return len(self.data)
    def __getitem__(self, idx):
        if idx >= len(self.data):
            new_idx = idx - len(self.data)
            entry = self.data[new_idx]
            if self.transforms:
                values = self.transforms(entry["audio"])
        else:
            entry = self.data[idx]
            values = torch.Tensor(entry["values"].reshape(-1, 128, self.length))
        target = torch.LongTensor([entry["target"]])
        return (values, target)

def fetch_dataloader(pkl_dir, dataset_name, batch_size, num_workers, mode):
    transforms = MelSpectrogram(128, mode, dataset_name)
    dataset = AudioDataset(pkl_dir, dataset_name, transforms=transforms)
    dataloader = DataLoader(dataset,shuffle=True, batch_size=batch_size, num_workers=num_workers)
    return dataloader

In [5]:
class ResNet(nn.Module):
    def __init__(self, dataset, pretrained=True):
        super(ResNet, self).__init__()
        num_classes = 50 if dataset=="ESC" else 10
        self.model = models.resnet50(pretrained=pretrained)
        self.model.fc = nn.Linear(2048, num_classes)

    def forward(self, x):
        output = self.model(x)
        return output

In [6]:
# parameters
dataset_name = "ESC"
data_dir = "DataSci251_FinalProject/DataSet/ESC-50-master/store_spectograms"
dataaug = True
pretrained = True
scheduler = True,
model = "resnet"
batch_size = 32
num_workers = 8
epochs = 70
lr = 1e-4
weight_decay = 1e-3
num_folds = 5
checkpoint_dir = "DataSci251_FinalProject/DataSet/ESC-50-master/checkpoint_dir_resnet"

In [15]:
run = wandb.init(
    # Set the project where this run will be logged
    project="audio_densenet",
    # Track hyperparameters and run metadata
    config={
        "model": 'resnet',
        "batch_size":batch_size,
        "learning_rate": lr,
        "epochs": epochs,
    })

0,1
accuracy,▁
loss,▁

0,1
accuracy,77.0
loss,2.1174


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669086650000508, max=1.0…

In [8]:
class RunningAverage():
    def __init__(self):
        self.total = 0
        self.steps = 0
    def update(self, loss):
        self.total += loss
        self.steps += 1
    def __call__(self):
        return (self.total/float(self.steps))

In [9]:
def save_checkpoint(state, is_best, split, checkpoint):
    filename = os.path.join(checkpoint, 'last{}.pth.tar'.format(split))
    if not os.path.exists(checkpoint):
        print("Checkpoint Directory does not exist")
        os.mkdir(checkpoint)
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, os.path.join(checkpoint, "model_best_{}.pth.tar".format(split)))

In [10]:
def train(model, device, data_loader, optimizer, loss_fn):
    model.train()
    loss_avg = RunningAverage()

    with tqdm(total=len(data_loader)) as t:
        for batch_idx, data in enumerate(data_loader):
            inputs = data[0].to(device)
            target = data[1].squeeze(1).to(device)

            outputs = model(inputs)

            loss = loss_fn(outputs, target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_avg.update(loss.item())

            t.set_postfix(loss='{:05.3f}'.format(loss_avg()))
            t.update()
    return loss_avg()

In [11]:
def evaluate(model, device, test_loader):
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for batch_idx, data in enumerate(test_loader):
            inputs = data[0].to(device)
            target = data[1].squeeze(1).to(device)

            outputs = model(inputs)

            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    return (100*correct/total)

In [12]:
def train_and_evaluate(model, device, train_loader, val_loader, optimizer, loss_fn, writer, epochs, checkpoint_dir, split, scheduler=None):
    best_acc = 0.0

    for epoch in range(epochs):
        avg_loss = train(model, device, train_loader, optimizer, loss_fn)

        acc = evaluate(model, device, val_loader)
        print("Epoch {}/{} Loss:{} Valid Acc:{}".format(epoch, epochs, avg_loss, acc))
        
        wandb.log({"accuracy": acc, "loss": avg_loss})

        is_best = (acc > best_acc)
        if scheduler:
            scheduler.step()
        if is_best:
            best_acc = acc
            filename = os.path.join("{}".format(checkpoint_dir), 'myModel_export.pt')
            model_cpu = model.to('cpu')
            model_scripted = torch.jit.script(model_cpu)
            model_scripted.save(filename)
            
            model = model.to('cuda')

        save_checkpoint({"epoch": epoch + 1,
                               "model": model.state_dict(),
                               "optimizer": optimizer.state_dict()}, is_best, split, "{}".format(checkpoint_dir))
        writer.add_scalar("data{}/trainingLoss{}".format(dataset_name, split), avg_loss, epoch)
        writer.add_scalar("data{}/valLoss{}".format(dataset_name, split), acc, epoch)
    writer.close()

In [16]:
## need config path
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i in range(1, num_folds+1):
    if dataaug:
        train_loader = fetch_dataloader( "{}training128mel{}.pkl".format(data_dir, i), "ESC", batch_size, num_workers, 'train')
        val_loader = fetch_dataloader("{}validation128mel{}.pkl".format(data_dir, i), "ESC", batch_size, num_workers, 'validation')
    else:
        print("something wrong")

    writer = SummaryWriter(comment="ESC")
    model = ResNet("ESC", pretrained).to(device)

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    if scheduler:
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.1)
    else:
        scheduler = None

    train_and_evaluate(model, device, train_loader, val_loader, optimizer, loss_fn, writer,epochs, checkpoint_dir, i, scheduler)

100%|█████████████████████████████| 100/100 [03:34<00:00,  2.15s/it, loss=2.118]


Epoch 0/70 Loss:2.118273896574974 Valid Acc:74.5


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.15s/it, loss=0.459]


Epoch 1/70 Loss:0.4585291515290737 Valid Acc:79.75


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.16s/it, loss=0.169]


Epoch 2/70 Loss:0.16943812269717454 Valid Acc:81.75


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.16s/it, loss=0.137]


Epoch 3/70 Loss:0.13702006470412015 Valid Acc:81.0


100%|█████████████████████████████| 100/100 [03:34<00:00,  2.15s/it, loss=0.118]


Epoch 4/70 Loss:0.11795390056446195 Valid Acc:82.75


100%|█████████████████████████████| 100/100 [03:34<00:00,  2.14s/it, loss=0.073]


Epoch 5/70 Loss:0.07278454903513193 Valid Acc:83.0


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.15s/it, loss=0.074]


Epoch 6/70 Loss:0.07446868944913149 Valid Acc:80.5


100%|█████████████████████████████| 100/100 [03:34<00:00,  2.14s/it, loss=0.076]


Epoch 7/70 Loss:0.07593282578513026 Valid Acc:82.5


100%|█████████████████████████████| 100/100 [03:34<00:00,  2.15s/it, loss=0.074]


Epoch 8/70 Loss:0.07351952964439988 Valid Acc:78.5


100%|█████████████████████████████| 100/100 [03:34<00:00,  2.15s/it, loss=0.081]


Epoch 9/70 Loss:0.0810258402954787 Valid Acc:83.0


100%|█████████████████████████████| 100/100 [03:34<00:00,  2.15s/it, loss=0.074]


Epoch 10/70 Loss:0.07397115013562143 Valid Acc:77.75


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.16s/it, loss=0.086]


Epoch 11/70 Loss:0.0859751855675131 Valid Acc:81.0


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.16s/it, loss=0.053]


Epoch 12/70 Loss:0.05316291957860812 Valid Acc:85.5


100%|█████████████████████████████| 100/100 [03:37<00:00,  2.17s/it, loss=0.029]


Epoch 13/70 Loss:0.029040141915902495 Valid Acc:87.5


100%|█████████████████████████████| 100/100 [03:38<00:00,  2.19s/it, loss=0.042]


Epoch 14/70 Loss:0.04181705158436671 Valid Acc:85.25


100%|█████████████████████████████| 100/100 [03:37<00:00,  2.17s/it, loss=0.043]


Epoch 15/70 Loss:0.042619888270273805 Valid Acc:85.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.16s/it, loss=0.054]


Epoch 16/70 Loss:0.05388797287363559 Valid Acc:84.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.17s/it, loss=0.100]


Epoch 17/70 Loss:0.0998273429647088 Valid Acc:80.5


100%|█████████████████████████████| 100/100 [03:37<00:00,  2.17s/it, loss=0.079]


Epoch 18/70 Loss:0.07937784261070192 Valid Acc:84.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.17s/it, loss=0.053]


Epoch 19/70 Loss:0.05307394221425057 Valid Acc:84.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.17s/it, loss=0.034]


Epoch 20/70 Loss:0.034430835610255596 Valid Acc:85.5


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.17s/it, loss=0.033]


Epoch 21/70 Loss:0.032944110538810495 Valid Acc:80.5


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.16s/it, loss=0.068]


Epoch 22/70 Loss:0.06781523839570582 Valid Acc:84.75


100%|█████████████████████████████| 100/100 [03:37<00:00,  2.17s/it, loss=0.038]


Epoch 23/70 Loss:0.038184603741392494 Valid Acc:86.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.16s/it, loss=0.037]


Epoch 24/70 Loss:0.03682733250316232 Valid Acc:86.5


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.17s/it, loss=0.038]


Epoch 25/70 Loss:0.03832303703064099 Valid Acc:85.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.17s/it, loss=0.036]


Epoch 26/70 Loss:0.035696450406685475 Valid Acc:86.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.16s/it, loss=0.071]


Epoch 27/70 Loss:0.07132542388513684 Valid Acc:78.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.17s/it, loss=0.065]


Epoch 28/70 Loss:0.0653350274078548 Valid Acc:83.25


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.16s/it, loss=0.082]


Epoch 29/70 Loss:0.08177971350494773 Valid Acc:81.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.17s/it, loss=0.039]


Epoch 30/70 Loss:0.03916788811795414 Valid Acc:87.5


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.16s/it, loss=0.019]


Epoch 31/70 Loss:0.018729726208839566 Valid Acc:86.25


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.15s/it, loss=0.011]


Epoch 32/70 Loss:0.011130445818416775 Valid Acc:87.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.16s/it, loss=0.008]


Epoch 33/70 Loss:0.008421390186995267 Valid Acc:87.25


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.16s/it, loss=0.009]


Epoch 34/70 Loss:0.008539193533360958 Valid Acc:87.25


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.16s/it, loss=0.007]


Epoch 35/70 Loss:0.007047209672164172 Valid Acc:88.0


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.16s/it, loss=0.006]


Epoch 36/70 Loss:0.005786624426255002 Valid Acc:87.0


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.16s/it, loss=0.006]


Epoch 37/70 Loss:0.006162048467667773 Valid Acc:86.5


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.16s/it, loss=0.007]


Epoch 38/70 Loss:0.0072184599225874994 Valid Acc:87.5


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.16s/it, loss=0.005]


Epoch 39/70 Loss:0.004966322724940255 Valid Acc:89.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.16s/it, loss=0.005]


Epoch 40/70 Loss:0.004668865983840078 Valid Acc:87.75


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.16s/it, loss=0.006]


Epoch 41/70 Loss:0.0057287115918006745 Valid Acc:88.25


100%|█████████████████████████████| 100/100 [03:34<00:00,  2.15s/it, loss=0.005]


Epoch 42/70 Loss:0.00493657871382311 Valid Acc:89.0


100%|█████████████████████████████| 100/100 [03:35<00:00,  2.15s/it, loss=0.005]


Epoch 43/70 Loss:0.0050342719780746845 Valid Acc:89.0


100%|█████████████████████████████| 100/100 [03:36<00:00,  2.17s/it, loss=0.004]


Epoch 44/70 Loss:0.004260176977841184 Valid Acc:89.0


  0%|                                                   | 0/100 [00:02<?, ?it/s]


KeyboardInterrupt: 