In [1]:
import torch
import numpy as np
import json
import matplotlib.pyplot as plt
import torch.utils.data
import torch.functional as F
from torch.autograd import Variable

In [2]:
with open("train.json", encoding='utf-8') as f:
    line = f.readline()
    train_data = json.loads(line)

with open("test.json", encoding="utf-8") as f:
    line = f.readline()
    test_data = json.loads(line)

In [3]:
audio_data = [each["audio_embedding"] for each in train_data]
label_data = [each["is_turkey"] for each in train_data]

In [4]:
class Mydataset(torch.utils.data.Dataset):
    def __init__(self, audio_data, label_data):
        audio_tensor = [torch.FloatTensor(x) for x in audio_data]
        audio_tensor = torch.nn.utils.rnn.pad_sequence(audio_tensor)
        audio_tensor.transpose_(0, 1)
        self._audio_data = audio_tensor
        self._label_data = torch.FloatTensor(label_data)
    def __getitem__(self, index):
        return self._audio_data[index], self._label_data[index]
    def __len__(self):
        return len(self._audio_data)
    
test_set = [[x["vid_id"], torch.Tensor(x["audio_embedding"])] for x in test_data]

class Mytestset(torch.utils.data.Dataset):
    def __init__(self, audio_data_with_vid):
        self._audio_data = [x[1] for x in audio_data_with_vid]
        self._audio_data = torch.nn.utils.rnn.pad_sequence(self._audio_data)
        self._audio_data.transpose_(0, 1)
        self._vid = [x[0] for x in audio_data_with_vid]
        
    def __getitem__(self, index):
        return self._audio_data[index], self._vid[index]
    
    def __len__(self):
        return len(self._audio_data)    

In [5]:
dataset = Mydataset(audio_data, label_data)
data_size = len(dataset)
train_size = int(0.8 * data_size)
valid_size = data_size - train_size
train_dataset, valid_dataset = torch.utils.data.random_split(dataset, [train_size, valid_size])
test_dataset = Mytestset(test_set)

train_dataloader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=len(train_dataset))
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, shuffle=True, batch_size=len(valid_dataset))
all_dataloader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size = len(dataset))
test_dataloader = torch.utils.data.DataLoader(test_dataset)

In [6]:
class globalPool(torch.nn.Module):
    def __init__(self, isAvg):
        super(globalPool, self).__init__()
        self._isAvg = isAvg
    def forward(self, x : torch.Tensor):
        assert x.dim() == 3, "only useful for dim = 3"
        B, S, F = x.size()
        x = x.view(B, -1, S, F)
        if self._isAvg:
            return torch.mean(x, 2)
        else:
            max_pool, _ = torch.max(x, 2)
            return max_pool

class Model(torch.nn.Module):
    def __init__(self, hidden_size=256):
        super(Model, self).__init__()
        self._bn = torch.nn.Sequential(
            torch.nn.BatchNorm1d(num_features=10),
            torch.nn.Dropout(0.5)
        )
        self._LSTM = torch.nn.GRU(input_size=128, hidden_size=hidden_size, batch_first=True, bidirectional=True)
        self._classify = torch.nn.Sequential(
            torch.nn.ReLU(),
            # torch.nn.BatchNorm1d(num_features=10),
            # torch.nn.Dropout(0.25),
            torch.nn.Linear(hidden_size * 2, 1),
            torch.nn.Sigmoid()
        )
        
    def forward(self, x):
        x = self._bn(x)
        x, _ = self._LSTM(x)
        x = self._classify(x)
        return x
    
class Model_new(torch.nn.Module):
    def __init__(self, hidden_size=256):
        super(Model_new, self).__init__()
        self._bn = torch.nn.BatchNorm1d(num_features=10)
        self._gru1 = torch.nn.LSTM(input_size=128, hidden_size=128, batch_first=True, bidirectional=True)
        self._gru2 = torch.nn.LSTM(input_size=256, hidden_size=64, batch_first=True, bidirectional=True)
        self._avgpool = globalPool(isAvg=True)
        self._maxpool = globalPool(isAvg=False)
        self._fc = torch.nn.Sequential(
            torch.nn.Linear(256, 64),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(64, 1),
            torch.nn.Sigmoid()
        )
    
    def forward(self, x : torch.Tensor):
        x = self._bn(x)
        x, _ = self._gru1(x)
        x, _ = self._gru2(x)
        avgp = self._avgpool(x)
        maxp = self._maxpool(x)
        x = torch.cat([avgp, maxp], dim=2)
        x.squeeze_(1)
        x = self._fc(x)
        return x
        

In [7]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
# device = torch.device("cpu")
mymodel = Model_new()
mymodel.to(device)
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(mymodel.parameters())
# mymodel = mymodel.double()

In [8]:
epoch = 1000
# def train(epoch):
for i in range(epoch):
    mymodel.train()
    for idx, (data, target) in enumerate(train_dataloader):
        data, target = Variable(data), Variable(target)
        data, target = data.to(device), target.to(device)
        data, target = data.float(), target.float()
        optimizer.zero_grad()
        y_pred = mymodel(data)
        if y_pred.dim() == 3:
            y_pred = y_pred[:, -1, 0]
        elif y_pred.dim() == 2:
            y_pred = y_pred.view(y_pred.size()[0])
        
        loss = criterion(y_pred, target)
        loss.backward()
        optimizer.step()

    mymodel.eval()
    accu = 0
    for idx, (data, target) in enumerate(valid_dataloader):
        data, target = Variable(data), Variable(target)
        data, target = data.to(device), target.to(device)
        data, target = data.float(), target.float()
        y_pred = mymodel(data)
        if y_pred.dim() == 3:
            y_pred = y_pred[:, -1, 0]
        elif y_pred.dim() == 2:
            y_pred = y_pred.view(y_pred.size()[0])
        accu += ((y_pred>0.5).float()==target).sum().item()
#    print("Valid mode: accuracy {}/{}".format(accu, len(valid_dataset)))
#    print("epoch: {}, loss: {}".format(i, loss.item()))

In [9]:
epoch = 1000
mymodel = Model_new()
mymodel.to(device)
for i in range(epoch):
    mymodel.train()
    for idx, (data, target) in enumerate(all_dataloader):
        data, target = Variable(data), Variable(target)
        data, target = data.to(device), target.to(device)
        data, target = data.float(), target.float()
        optimizer.zero_grad()
        y_pred = mymodel(data)
        if y_pred.dim() == 3:
            y_pred = y_pred[:, -1, 0]
        elif y_pred.dim() == 2:
            y_pred = y_pred.view(y_pred.size()[0])
        loss = criterion(y_pred, target)
        loss.backward()
        optimizer.step()
#       print("epoch: {}, loss: {}".format(i, loss.item()))

In [10]:
import csv
mymodel.eval()
with open("submission.csv", 'w') as f:
    mywriter = csv.writer(f)
    mywriter.writerow(["vid_id", "is_turkey"])
    for idx, (data, vid) in enumerate(test_dataloader):
        data = Variable(data)
        data = data.to(device)
        data = data.float()
        y_pred = mymodel(data)
        y_pred = y_pred.view(y_pred.size()[0])
        print(y_pred)
        y_pred = y_pred.item()
        mywriter.writerow([vid[0], y_pred])