In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.models as models

import os
import torch
import librosa

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from datasets import AudioDataset

In [23]:
# path to urban sound 8k
data_root = "/home/tiz007/228/228_data/UrbanSound8K/"
# path to label
label_path = "/home/tiz007/228/228_data/UrbanSound8K/metadata/UrbanSound8K.csv"  

In [24]:
def lr_schedule(epoch, init_lr):
    if epoch <20:
        return init_lr
    elif epoch>=20 and epoch<40:
        return init_lr/10
    elif epoch>=40 and epoch <80:
        return init_lr/100
    else:
        return init_lr/1000

In [28]:
def train(MAX_EPOCH = 100):

    eval_interval = 5
    
    # initialize dataset (feature can be "mfcc", "spec", "mel_raw")
    audio_dataset = AudioDataset(1, DataRoot=data_root, LabelPath=label_path, feature="spec", mode="train")

    # define lstm model
    cnn_model = models.resnet18(num_classes=10)

    # to gpu
    cnn_model = cnn_model.cuda()
    #print(cnn_model)
    # initialize dataloader
    data_loader = torch.utils.data.DataLoader(audio_dataset, batch_size=32, shuffle=True, num_workers=1)

    # loss function
    loss_fn = nn.CrossEntropyLoss().cuda()

    # lr
    learning_rate = 1e-3

    # initialize optimizer
    optimizer = torch.optim.Adam(cnn_model.parameters(), lr=learning_rate)
    
    # initialize logger
    train_acc = []
    test_acc = []
    
    train_loss = []
    test_loss = []
    
    for epoch in range(MAX_EPOCH):
        # iterate through dataset
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr_schedule(epoch, learning_rate)
            
        # initialize epoch stat
        correct_num = 0
        total_num = 0
        loss_sum = 0

        for idx, data in enumerate(data_loader):
            #print(idx)
            train_data, labels = data
            
            #train_data = train_data.type(torch.float32)/255
            
            # data to gpu
            train_data = train_data.cuda()
            labels = labels.cuda()

            prob = cnn_model(train_data)   
            loss = loss_fn(prob, labels)

            output = prob.argmax(1)
            
            loss_sum += loss.item()*float(labels.shape[0])
            correct_num += (output==labels).sum().double()
            total_num += float(labels.shape[0])

            optimizer.zero_grad()

            loss.backward()

            optimizer.step()
            
        train_acc.append(correct_num/total_num)

        print("epoch: {} acc: {:.4} avg loss: {:.4f}".format(epoch, correct_num/total_num, loss_sum/total_num))
        
        if epoch%5 == 4:
            test_acc.append(test(cnn_model))
    
    plt.figure()
    plt.plot(np.arange(MAX_EPOCH), train_acc)
    plt.plot(np.arange(eval_interval-1, MAX_EPOCH, eval_interval), test_acc)
    plt.title("accuracy")
    legend(["train","val"])
    
    plt.figure()
    plt.plot(np.arange(MAX_EPOCH), train_loss)
    plt.plot(np.arange(eval_interval-1, MAX_EPOCH, eval_interval), test_loss)
    plt.title("loss")
    legend(["train","val"])

In [29]:
def test(model, ):
    test_dataset = AudioDataset(3, DataRoot=data_root, LabelPath=label_path, feature="spec", mode="test")
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=1)
    
    model.eval()

    correct_num = 0
    total_num = 0
    loss_sum = 0
    
    for idx, data in enumerate(test_loader):
        test_data, labels = data
        test_data = test_data.cuda()
        labels = labels.cuda()

        test_data = test_data.cuda()

        prob = model(test_data)   
        loss = loss_fn(prob, labels)
        
        output = prob.argmax(1)
        
        correct_num += (output==labels).sum().double()
        loss_sum += loss.item()*float(labels.shape[0])
        total_num += float(labels.shape[0])
        
    model.train()

    print("##Testing## epoch acc: {:.4}".format(correct_num/total_num))
    return correct_num/total_num, loss/total_num

In [30]:
train(10)

verify spec feature success
0
1
2
3
4
5
6
7


KeyboardInterrupt: 

In [8]:
159*0.9

143.1

In [9]:
np.arange(4,100,5)

array([ 4,  9, 14, 19, 24, 29, 34, 39, 44, 49, 54, 59, 64, 69, 74, 79, 84,
       89, 94, 99])