# Import Files

In [1]:
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import time
from datetime import datetime
from torch.optim.lr_scheduler import StepLR
import argparse

# Train, Validation, Test NumPy files loading

In [2]:
class LibriSpeech():

    def __init__(self, data_path):
        self.data_path = data_path
        self.dev_set = None
        self.train_set = None
        self.test_set = None
  
    @property
    def dev(self):
        if self.dev_set is None:
            self.dev_set = load_data(self.data_path, 'dev')
        return self.dev_set

    @property
    def train(self):
        if self.train_set is None:
            self.train_set = load_data(self.data_path, 'train')
        return self.train_set
  
    @property
    def test(self):
        if self.test_set is None:
            self.test_set = (np.load(os.path.join(self.data_path, 'test.npy'), encoding='bytes', allow_pickle=True), None)
        return self.test_set

    
def load_data(path, name):
    return (
        np.load(os.path.join(path, '{}.npy'.format(name)), encoding='bytes', allow_pickle=True),
        np.load(os.path.join(path, '{}_labels.npy'.format(name)), encoding='bytes', allow_pickle=True)
    )

# DataSet Class

In [3]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, librispeech, k = 15, lowest=0.1):
        self.k = k
        self.x_list = librispeech[0]
        self.y_list = librispeech[1] if len(librispeech) == 2 else None
        self.idx_map = []
        for i, xs in enumerate(self.x_list):
            for j in range(xs.shape[0]):
                self.idx_map.append((i, j))
        
        self.win_mask = np.concatenate((np.arange(lowest, 1.0, (1 - lowest)/k),
                            np.arange(1.0, lowest, -(1 - lowest)/k),
                            np.array([0.1])))

        # self.win_mask = np.concatenate( (np.zeros(k), np.array([1.]), np.zeros(k)), axis=None )
        self.win_mask = np.repeat(self.win_mask, librispeech[0][0].shape[1])
        

    def __getitem__(self, idx):
        i, j = self.idx_map[idx]
        context = self.x_list[i].take(range(j - self.k, j + self.k + 1), mode='clip', axis=0).flatten()
        context *= self.win_mask
        xi = torch.from_numpy(context).float()
        yi = self.y_list[i][j] if self.y_list is not None else -1
        return xi, yi

    def __len__(self):
        return len(self.idx_map)

# Xavier Initialization

In [4]:
def init_xavier(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

# Model

In [5]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        k = 15
        in_size = ((k * 2) + 1) * 13
        out_size = 346

        layers = []
        size_list = [in_size, in_size, 1024,  2048, 2048, 1024, 512, out_size, out_size]

        for i in range(len(size_list) - 2):
            layers.append(nn.Linear(size_list[i],size_list[i+1]))
            layers.append(nn.BatchNorm1d(size_list[i+1]))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.2))

        layers.append(nn.Linear(size_list[-2], size_list[-1]))
        self.net = nn.Sequential(*layers)
        print(self.net)
        
    def forward(self, x):
        return self.net(x)


In [6]:
def get_model(k):
    in_size = ((k * 2) + 1) * 13
    out_size = 346

    layers = []
    size_list = [in_size, in_size, 1024,  2048, 2048, 1024, 512, out_size, out_size]

    for i in range(len(size_list) - 2):
        layers.append(nn.Linear(size_list[i],size_list[i+1]))
        layers.append(nn.BatchNorm1d(size_list[i+1]))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(0.2))
        
    layers.append(nn.Linear(size_list[-2], size_list[-1]))
    mynet = nn.Sequential(*layers)
    print(mynet)
    return mynet

# Train and Test Function

In [7]:
def train(epoch, model, optimizer, train_loader, scheduler, args):
    model.train()
    
    t0 = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} Batch: {} [{}/{} ({:.0f}%, time:{:.2f}s)]\tLoss: {:.6f}'.format(
                epoch, batch_idx, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), time.time() - t0,
                loss.data))
            t0 = time.time()
    #scheduler.step()

def test(model, test_loader, args):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.cross_entropy(output, target, size_average=False).data # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return "{:.4f}%".format(100. * correct / len(test_loader.dataset))

# Arguments

In [11]:
class Argument():
    def __init__(self):
        self.batch_size = 256
        self.epochs = 29
        self.lr = 0.001
        self.cuda = True
        self.data_dir = "./hw1p2/"
        self.K = 15
        self.seed = 1001
        self.momentum = 0.9
        self.log_interval = 1000
        self.weights_dir = "./weights/"
        
args = Argument()

# Dataloading

In [12]:
torch.cuda.manual_seed(args.seed)

In [8]:
librispeech_loader = LibriSpeech(args.data_dir)

kwargs = {'num_workers': 1, 'pin_memory': True, 'drop_last': True} if args.cuda else {}

train_loader = torch.utils.data.DataLoader(
    MyDataset(librispeech_loader.train, k=args.K),
    batch_size=args.batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    MyDataset(librispeech_loader.dev, k=args.K),
    batch_size=args.batch_size, shuffle=True, **kwargs)


# Model initialization

In [13]:
# model = MyModel() # I did not ran my model this way, will give error if you try to load my pre-compiled weights(*.pth) file
model = get_model(args.K) # Preferred way to load my model if you want to load my pre-compiled weights(*.pth) file
model.apply(init_xavier)
if args.cuda:
    model.cuda()

Sequential(
  (0): Linear(in_features=403, out_features=403, bias=True)
  (1): BatchNorm1d(403, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Dropout(p=0.2, inplace=False)
  (4): Linear(in_features=403, out_features=1024, bias=True)
  (5): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Dropout(p=0.2, inplace=False)
  (8): Linear(in_features=1024, out_features=2048, bias=True)
  (9): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU()
  (11): Dropout(p=0.2, inplace=False)
  (12): Linear(in_features=2048, out_features=2048, bias=True)
  (13): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (14): ReLU()
  (15): Dropout(p=0.2, inplace=False)
  (16): Linear(in_features=2048, out_features=1024, bias=True)
  (17): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (18): ReLU()
  (19): Dr

# Optimizer and Scheduler

In [10]:
optimizer = optim.Adam(model.parameters(), lr=args.lr)
scheduler = StepLR(optimizer, step_size=5, gamma=0.5)

# Training

In [None]:
for epoch in range(1, args.epochs + 1):
    print(datetime.now())
    print('LR: ', scheduler.get_last_lr())
    train(epoch, model, optimizer, train_loader, scheduler, args)
    acc_str = test(model, test_loader, args)
    if not os.path.exists(args.weights_dir):
        os.makedirs(args.weights_dir)
    torch.save(model.state_dict(), "{}/hw1p2_{:03d}.pth".format(args.weights_dir, epoch))

# Prepare Kaggle Submission

## Load best weight
#### ** Please note that you need to load model first (Model Initialization) before executing the below steps

In [14]:
model.load_state_dict(torch.load(args.weights_dir+'/hw1p2_029.pth'))

<All keys matched successfully>

## Evaluate Function

In [16]:
def eval_model(model, test_loader):
    with torch.no_grad():
        model.eval()
        pred = []

        for batch_idx, (data, target) in enumerate(test_loader):   
            data = data.cuda()
            outputs = model(data)
            predicted = outputs.data.max(1, keepdim=True)[1]
            pred.append(predicted.cpu().numpy()[0])

        return np.array(pred)

## Test Data Loading

In [17]:
librispeech_loader = LibriSpeech(args.data_dir)
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

eval_loader = torch.utils.data.DataLoader(
    MyDataset(librispeech_loader.test, k=args.K),
    batch_size=1, **kwargs)

## Prediction

In [None]:
pred = eval_model(model, eval_loader)

## Save Prediction for Kaggle Submission

In [None]:
with open('submission.csv', 'w') as w:
    w.write('id,label\n')
    for i in range(len(pred)):
            w.write(str(i)+','+str(pred[i][0])+'\n')