In [16]:
import librosa
import librosa.display
import torch
import sys
import glob
import time
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch import optim
from pytorch_model_summary import summary
import shutil

sys.path.insert(1, '../utils')
sys.path.insert(1, '../models')

from data import StutterData, load_data
from audioCNN import AudioCNN


In [17]:
model = AudioCNN()

In [18]:
print(summary(AudioCNN(), torch.zeros((1, 1, 20, 50)), show_input=True))

---------------------------------------------------------------------------
          Layer (type)         Input Shape         Param #     Tr. Param #
   AdaptiveAvgPool2d-1      [1, 1, 20, 50]               0               0
              Conv2d-2      [1, 1, 32, 32]              80              80
   AdaptiveAvgPool2d-3      [1, 8, 30, 30]               0               0
             Dropout-4      [1, 8, 16, 16]               0               0
              Conv2d-5      [1, 8, 16, 16]           1,168           1,168
   AdaptiveAvgPool2d-6     [1, 16, 14, 14]               0               0
              Linear-7           [1, 1024]         524,800         524,800
              Linear-8            [1, 512]         131,328         131,328
              Linear-9            [1, 256]             257             257
Total params: 657,633
Trainable params: 657,633
Non-trainable params: 0
---------------------------------------------------------------------------


In [19]:
model.forward(torch.zeros((1, 1, 20, 50)))  # Test forward pass

tensor([[0.0122]], grad_fn=<AddmmBackward>)

In [20]:
lr = 0.001
batch_size = 1
epochs = 25
validation_split=0.2
shuffle_dataset=True
random_seed=42
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")     #Check whether a GPU is present.

# optimizer = optim.SGD(model.parameters(), lr = 0.00001, momentum=0.9, weight_decay=5e-4)
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 200], gamma=0.1)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr = lr)

In [21]:
dataset = StutterData('../data/*')
train_loader, validation_loader = load_data(dataset, batch_size, validation_split=0.2, shuffle_dataset=True, random_seed=42)

In [22]:
model.to(device)

AudioCNN(
  (maxpool): AdaptiveAvgPool2d(output_size=32)
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1))
  (maxpool1): AdaptiveAvgPool2d(output_size=16)
  (dropout): Dropout(p=0.2, inplace=False)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
  (maxpool2): AdaptiveAvgPool2d(output_size=8)
  (fc1): Linear(in_features=1024, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=1, bias=True)
)

In [23]:
def save_ckp(state, is_best, checkpoint_path, best_model_path):
    """
    state: checkpoint we want to save
    is_best: is this the best checkpoint; min validation loss
    checkpoint_path: path to save checkpoint
    best_model_path: path to save best model
    """
    f_path = checkpoint_path
    # save checkpoint data to the path given, checkpoint_path
    torch.save(state, f_path)
    # if it is a best model, min validation loss
    if is_best:
        best_fpath = best_model_path
        # copy that checkpoint file to best path given, best_model_path
        shutil.copyfile(f_path, best_fpath)

In [24]:
valid_loss_min = float('inf') #init val_loss
checkpoint_path = '../models/audioCNN_ckpt.pth'
best_model_path = '../models/audioCNN_best_ckpt.pth'
for epoch in range(epochs):
    losses=[]
#     scheduler.step()
    
    start = time.time()
    
    for b_idx, x in enumerate(train_loader):
#         print(b_idx)
        inputs, targets = x['mfcc'].to(device), x['label'].to(device)
        
        optimizer.zero_grad()
        
        op = model(inputs).view(-1)
#         print(op[0], targets[0])
#         print(type(op.view(-1)[0]), type(targets[0]))
#         print(b_idx, op)
#         print(targets)
        loss = criterion(op, targets)
        loss.backward()
        
        optimizer.step()
        losses.append(loss.item())
        end = time.time()
        if b_idx % 100 == 0:
            print('Batch Index : %d Loss : %.10f Time : %.3f seconds ' % (b_idx, np.mean(losses), end - start))    
    model.eval()
    total = 0
    correct = 0
    acc = 0
    
    with torch.no_grad():
        for b_idx, x in enumerate(validation_loader):
            inputs, targets = x['mfcc'].to(device), x['label'].to(device)

            outputs = torch.sigmoid(model(inputs))
#             print(outputs, targets)
            
            
            predicted = torch.round(outputs.data)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()
            valid_loss = criterion(predicted.view(-1), targets.data)
            acc = 100.*correct/total
        print('Epoch : %d Val_Acc : %.3f Val_loss: %.3f' % (epoch, acc, valid_loss))
        print('--------------------------------------------------------------')
    checkpoint = {
            'epoch': epoch + 1,
            'valid_loss_min': valid_loss,
            'valid_acc': acc,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }

    save_ckp(checkpoint, False, checkpoint_path, best_model_path)
    
    if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))
            # save checkpoint as best model
            save_ckp(checkpoint, True, checkpoint_path, best_model_path)
            valid_loss_min = valid_loss
    model.train()   

Batch Index : 0 Loss : 0.0614712387 Time : 0.016 seconds 
Batch Index : 100 Loss : 1.4136889238 Time : 1.735 seconds 
Batch Index : 200 Loss : 1.0112973245 Time : 3.703 seconds 
Batch Index : 300 Loss : 0.8644421383 Time : 5.735 seconds 
Batch Index : 400 Loss : 0.7959458611 Time : 7.434 seconds 
Batch Index : 500 Loss : 0.7615691856 Time : 8.987 seconds 
Batch Index : 600 Loss : 0.7337328635 Time : 10.661 seconds 
Epoch : 0 Val_Acc : 66.667 Val_loss: 0.313
--------------------------------------------------------------
Validation loss decreased (inf --> 0.313262).  Saving model ...
Batch Index : 0 Loss : 0.7301947102 Time : 0.029 seconds 
Batch Index : 100 Loss : 0.5973110587 Time : 1.779 seconds 
Batch Index : 200 Loss : 0.5892476275 Time : 3.401 seconds 
Batch Index : 300 Loss : 0.5678401484 Time : 4.995 seconds 
Batch Index : 400 Loss : 0.5740428980 Time : 6.588 seconds 
Batch Index : 500 Loss : 0.5897502337 Time : 8.224 seconds 
Batch Index : 600 Loss : 0.5963254225 Time : 9.792 se