
# README

## Model Hyperparameters
* Layers -> [1240, 2048, 2048, 1024, 1024, 512, 138]

* ReLU activations

* BatchNorm1d in between layers, after the activations

* Dropout rate -> [0, 0, 0.1, 0, 0.2, 0, 0]

* Context size k = 15 frames on both sides

* Adam optimizer, with the default learning rate 1e-3

* Two runs of tuning learning rate: 1st --> step_size = 8, 2nd --> step_size = 4

* epoch = 12

* batch_size = 128

## Experimentation

The script consists of environmental setup, data loading, model building, and result output. 

Data Loading: 

* Create training and validation dataset (9: 1)

* Create data loaders that generate a batch of frames for training

Model Building:

Two rounds of model tuning are carried out with different step_size for scheduler. In the first round, step_size = 8 is used and Xavier is utilized for paremeter initialization. In the 2nd round, it uses the 1st model as the pre-trained model and the step_size of scheduler is set as 4. 

Result Output:

Read the test file and write the model predictions to file.


## Guidelines for Running
The script can only function under CUDA environment. Please change the directory paths (DATA_PATH, MODEL_PATH, SUBMISSION_PATH), for data, model, and submission files respectively for your environment. 


### Environmental Setup


In [1]:
DATA_PATH = "data/"
MODEL_PATH = "model/"
SUBMISSION_PATH = "submission/"
num_workers = 4
batch_size = 128
K = 15

In [2]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
import torch.nn as nn
import os
from torch.autograd import Variable
import torch.nn.functional as F
import time

In [3]:
device = torch.device('cuda:0')
# set SEED
os.environ["SEED"] = "999"
torch.manual_seed(999)
np.random.seed(0)

In [4]:
torch.cuda.is_available()

True

### Data loading

In [5]:
def load_data(x_path,y_path=""):
    x = np.load(x_path, allow_pickle=True)
    print ("Number of utterances " + str(x.shape[0]))
    print ("Number of dimentions " + str(x[0].shape[1]))
    print ("Avg length of utterances " + str(np.mean([i.shape[0] for i in x])))
    if y_path:
        y = np.load(y_path, allow_pickle=True)
        return x, y
    return x

In [6]:
dev_x, dev_y = load_data(DATA_PATH+"dev.npy",y_path=DATA_PATH+"dev_labels.npy")

Number of utterances 1100
Number of dimentions 40
Avg length of utterances 614.3963636363636


In [7]:
train_x, train_y = load_data(DATA_PATH+"train.npy",y_path=DATA_PATH+"train_labels.npy")

Number of utterances 24500
Number of dimentions 40
Avg length of utterances 628.1107346938776


In [8]:
data_x = np.append(train_x, dev_x)
data_y = np.append(train_y, dev_y)

In [9]:
class MyDataset(Dataset):
    def __init__(self, k, x, y=None):
        super().__init__()
        self.k = k
        self._x = x.copy()
        if y is not None:
            self._y = y.copy()
        else:
            self._y = None
        self.n_dim = len(self._x[0][0])
        self.output_dim = self.n_dim * (2*self.k+1)
        self.utterance_count = len(self._x)
        self.utterance_start_pos = self.get_utterance_start_pos()
        self.frame_count = self.utterance_start_pos[-1] + len(self._x[-1])
        self.outcomes = np.arange(138)
        self.zero_padding()

    def __len__(self):
        return self.frame_count
    
    def __getitem__(self, index):
        utterance_id, frame_id = self.refactor_index(index)
        x_item = self._x[utterance_id][frame_id:(frame_id + self.k * 2 + 1)]
        if self._y is not None:
            return torch.from_numpy(x_item), self._y[utterance_id][frame_id]
        else:
            return x_item
    
    def get_utterance_start_pos(self):
        utterance_start_pos = []
        size = 0
        for i in range(len(self._x)):
            utterance_start_pos.append(size)
            size += len(self._x[i])
        return utterance_start_pos
    
    def zero_padding(self):
        for i in range(len(self._x)):
            self._x[i] = np.concatenate([np.zeros((self.k, self.n_dim)), self._x[i], np.zeros((self.k, self.n_dim))])
            if self._y is not None:
                self._y[i] = torch.tensor(self._y[i]).long()
        
    def refactor_index(self, i):
        left = 0
        right = len(self.utterance_start_pos)
        while (left < right - 1):
            mid = (right + left)//2
            mid_val = self.utterance_start_pos[mid]
            if  mid_val == i:
                return mid, 0
            elif i < mid_val:
                right = mid
            else:
                left = mid
        return left, i - self.utterance_start_pos[left]

In [10]:
train_dataset = MyDataset(K, data_x, data_y)
train_set_size = int(len(train_dataset)*0.9)
train_set, val_set = torch.utils.data.random_split(train_dataset, [train_set_size, len(train_dataset) - train_set_size])

print (len(train_set))
print (len(val_set))

14458094
1606455


In [11]:
print (train_dataset[0][0].shape)
print (train_dataset[0][1].shape)
#print (train_dataset[0][0])
#print (train_dataset[0][1])

torch.Size([31, 40])
torch.Size([])


In [12]:
train_dataloader = DataLoader(
    train_set,         # The dataset
    batch_size=batch_size,    # Batch size
    shuffle=True,      # Shuffles the dataset at every epoch
    pin_memory=True,   
    num_workers=num_workers)

val_dataloader = DataLoader(
    val_set,         # The dataset
    batch_size=batch_size,    # Batch size
    shuffle=False,      # Shuffles the dataset at every epoch
    pin_memory=True,   
    num_workers=num_workers)

### Model Config

In [13]:
class MLPModel(nn.Module):
    def __init__(self, num_features, out_features):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(num_features, 2048),
            nn.ReLU(),
            nn.BatchNorm1d(2048),

            nn.Linear(2048, 2048),
            nn.ReLU(),
            nn.BatchNorm1d(2048),
            nn.Dropout(0.1),

            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024),
            
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024),
            nn.Dropout(0.2),

            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),

            nn.Linear(512, out_features),
        )

    def forward(self, x):
        return self.model(x)

In [14]:
def init_xavier(m):
    if type(m) == nn.Linear:
        fan_in = m.weight.size()[1]
        fan_out = m.weight.size()[0]
        std = np.sqrt(2.0 / (fan_in + fan_out))
        m.weight.data.normal_(0,std)

### Build 1st Model

In [15]:
model = MLPModel(train_dataset.output_dim, len(train_dataset.outcomes))
model.apply(init_xavier)
model = model.float()
model.cuda()
print (model)

AdamOptimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(AdamOptimizer, step_size=8, gamma=0.1)
loss_fn = nn.CrossEntropyLoss()

MLPModel(
  (model): Sequential(
    (0): Linear(in_features=1240, out_features=2048, bias=True)
    (1): ReLU()
    (2): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Linear(in_features=2048, out_features=2048, bias=True)
    (4): ReLU()
    (5): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=2048, out_features=1024, bias=True)
    (8): ReLU()
    (9): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): Linear(in_features=1024, out_features=1024, bias=True)
    (11): ReLU()
    (12): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): Dropout(p=0.2, inplace=False)
    (14): Linear(in_features=1024, out_features=512, bias=True)
    (15): ReLU()
    (16): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (17): Linear(in_features=

In [None]:
mean_train_losses = []
mean_valid_losses = []
valid_acc_list = []
epochs = 12
best_model = None
for epoch in range(epochs):
    model.train()
    train_losses = []
    scheduler.step()
    for x_batch, y_batch in tqdm(train_dataloader):
        AdamOptimizer.zero_grad()
        x_batch = x_batch.view(-1, (2*K+1)*40).cuda()
        y_batch = y_batch.long().cuda()
        outputs = model(x_batch.float())
        loss = loss_fn(outputs, y_batch)
        loss.backward()
        AdamOptimizer.step()
        train_losses.append(loss.item())
    
    model.eval()
    valid_losses = []
    correct = 0
    total = 0
    with torch.no_grad():
        for x_batch, y_batch in tqdm(val_dataloader):
            x_batch = x_batch.view(-1, (2*K+1)*40).cuda()
            y_batch = y_batch.long().cuda()
            outputs = model(x_batch.float())
            loss = loss_fn(outputs, y_batch)
            valid_losses.append(loss.item())
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == y_batch).sum().item()
            total += y_batch.size(0)
    
    mean_train_losses.append(np.mean(train_losses))
    mean_valid_losses.append(np.mean(valid_losses))
    accuracy = 100*correct/total
    if (best_model is None) or (accuracy > max(valid_acc_list)):
        best_model = model
    valid_acc_list.append(accuracy)
    print('epoch {}: train loss : {:.4f}, valid loss : {:.4f}, valid acc : {:.2f}%'\
         .format(epoch+1, np.mean(train_losses), np.mean(valid_losses), accuracy))
model_name = ('best_%d.pt'%int(time.time()))
torch.save(best_model.state_dict(), MODEL_PATH+model_name)

### Build 2nd Model based on 1st

In [22]:
model = MLPModel(train_dataset.output_dim, len(train_dataset.outcomes))
model.load_state_dict(torch.load(MODEL_PATH+model_name))
model.cuda()
AdamOptimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(AdamOptimizer, step_size=4, gamma=0.1)

In [23]:
mean_train_losses = []
mean_valid_losses = []
valid_acc_list = []
epochs = 12
best_model = None
for epoch in range(epochs):
    model.train()
    train_losses = []
    scheduler.step()
    for x_batch, y_batch in tqdm(train_dataloader):
        AdamOptimizer.zero_grad()
        x_batch = x_batch.view(-1, (2*K+1)*40).cuda()
        y_batch = y_batch.long().cuda()
        outputs = model(x_batch.float())
        loss = loss_fn(outputs, y_batch)
        loss.backward()
        AdamOptimizer.step()
        train_losses.append(loss.item())
    
    model.eval()
    valid_losses = []
    correct = 0
    total = 0
    with torch.no_grad():
        for x_batch, y_batch in tqdm(val_dataloader):
            x_batch = x_batch.view(-1, (2*K+1)*40).cuda()
            y_batch = y_batch.long().cuda()
            outputs = model(x_batch.float())
            loss = loss_fn(outputs, y_batch)
            valid_losses.append(loss.item())
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == y_batch).sum().item()
            total += y_batch.size(0)
    
    mean_train_losses.append(np.mean(train_losses))
    mean_valid_losses.append(np.mean(valid_losses))
    accuracy = 100*correct/total
    if (best_model is None) or (accuracy > max(valid_acc_list)):
        best_model = model
    valid_acc_list.append(accuracy)
    print('epoch {}: train loss : {:.4f}, valid loss : {:.4f}, valid acc : {:.2f}%'\
         .format(epoch+1, np.mean(train_losses), np.mean(valid_losses), accuracy))
torch.save(best_model.state_dict(), MODEL_PATH+'best_%d.pt'%int(time.time()))



HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 1: train loss : 1.5741, valid loss : 1.4948, valid acc : 59.75%


HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 2: train loss : 1.5429, valid loss : 1.4777, valid acc : 60.15%


HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 3: train loss : 1.5147, valid loss : 1.4464, valid acc : 61.06%


HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 4: train loss : 1.3974, valid loss : 1.3393, valid acc : 63.82%


HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 5: train loss : 1.3781, valid loss : 1.3297, valid acc : 64.03%


HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 6: train loss : 1.3686, valid loss : 1.3219, valid acc : 64.25%


HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 7: train loss : 1.3614, valid loss : 1.3176, valid acc : 64.30%


HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 8: train loss : 1.3461, valid loss : 1.3095, valid acc : 64.58%


HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 9: train loss : 1.3441, valid loss : 1.3084, valid acc : 64.59%


HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 10: train loss : 1.3434, valid loss : 1.3076, valid acc : 64.61%


HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 11: train loss : 1.3425, valid loss : 1.3068, valid acc : 64.61%


HBox(children=(IntProgress(value=0, max=112954), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12551), HTML(value='')))


epoch 12: train loss : 1.3406, valid loss : 1.3065, valid acc : 64.62%


### Result output

In [24]:
final_model = best_model

In [25]:
test_x = load_data(DATA_PATH+"test.npy")
test_dataset = MyDataset(K, test_x)
test_dataloader = DataLoader(
    test_dataset, # The dataset
    batch_size=batch_size,      # Batch size
    shuffle=False,      # Shuffles the dataset at every epoch
    pin_memory=True,   
    num_workers=num_workers)

Number of utterances 361
Number of dimentions 40
Avg length of utterances 619.3684210526316


In [26]:
final_model.eval()
test_preds = torch.LongTensor().cuda()

with torch.no_grad():
    for x_batch in tqdm(test_dataloader):
        x_batch = x_batch.view(-1, (2*K+1)*40).cuda()
        outputs = final_model(x_batch.float())
        _, pred = torch.max(outputs.data, 1)
        test_preds = torch.cat((test_preds, pred), dim=0)
        
out_df = pd.DataFrame()
out_df['id'] = np.arange(0, len(test_dataset))
out_df['label'] = test_preds.cpu().numpy()
out_df.head()

HBox(children=(IntProgress(value=0, max=1747), HTML(value='')))




Unnamed: 0,id,label
0,0,108
1,1,108
2,2,108
3,3,108
4,4,108


In [27]:
file_name = SUBMISSION_PATH+"submission_%d.csv"%int(time.time())
out_df.to_csv(file_name,index=False)