In [1]:
from transformers import BertTokenizer, BertModel

import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F

from dataset import Dataset

import sys
import numpy as np
import pickle

np.set_printoptions(threshold=sys.maxsize)

In [2]:
class KimCNN(nn.Module):
    def __init__(self, embed_num, embed_dim, class_num, kernel_num, kernel_sizes, dropout):
        super(KimCNN, self).__init__()
        V = embed_num
        D = embed_dim
        C = class_num
        Co = kernel_num
        Ks = kernel_sizes
        
        self.static = static
        self.embed = nn.Embedding(V, D)
        self.convs1 = nn.ModuleList([nn.Conv2d(1, Co, (K, D)) for K in Ks])
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(len(Ks) * Co, C)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = x.unsqueeze(1)  # (N, Ci, W, D)
        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1]  # [(N, Co, W), ...]*len(Ks)
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]  # [(N, Co), ...]*len(Ks)
        x = torch.cat(x, 1)
        x = self.dropout(x)  # (N, len(Ks)*Co)
        logit = self.fc1(x)  # (N, C)
        output = self.sigmoid(logit)
        return output

In [3]:
class MLE(nn.Module):
    def __init__(self):
        super(MLE, self).__init__()
        
        layers = []
        layers.append(nn.Dropout(0.1))
        layers.append(nn.Linear(768, 6))
        layers.append(nn.Dropout(0.1))
        layers.append(nn.Sigmoid())
        self.net = nn.Sequential(*layers)
    
    def forward(self, tokens, masks=None):
        
        output = self.net(tokens)
        return output

In [4]:
processed_train_path = "../data/pre_processed/"
train_dataset = Dataset(processed_train_path)

In [10]:
# kernel_num = 3
# kernel_sizes = [2, 3, 4]
# dropout = 0.5

# classifier = KimCNN(
#     embed_num=512,
#     embed_dim=768,
#     class_num=6,
#     kernel_num=kernel_num,
#     kernel_sizes=kernel_sizes,
#     dropout=dropout
# )

classifier = MLE()

n_epochs = 1
lr = 0.01

optimizer = torch.optim.Adam(classifier.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss()

In [11]:
for epoch in range(n_epochs):
    train_loss = 0
    print(f"Current epoch {epoch} out of {n_epochs} ")
    for file_index in range(len(train_dataset)-1):
        train_loss = 0
        batch, labels = train_dataset[file_index]
        for batch_index in range(batch.shape[0]):
            
            batch_sample = batch[batch_index].T.float()
            batch_sample = torch.mean(batch_sample, 1)
            
            labels_sample, indices = labels[batch_index].max(0)
            labels_sample = torch.tensor([indices.data], dtype=torch.long)
            
            optimizer.zero_grad()
            classifier.train()

            output = classifier(batch_sample)
            output = output.reshape(1, 6)
            
            loss = loss_fn(output, labels_sample)
            train_loss += loss
            loss.backward()
            optimizer.step()
        print(f'Train loss: {train_loss}')

Current epoch 0 out of 1 
Train loss: 174.8590545654297
Train loss: 170.3855743408203
Train loss: 170.1218719482422
Train loss: 168.822509765625
Train loss: 1.7294697761535645
Train loss: 169.4300994873047
Train loss: 166.4652099609375
Train loss: 165.3844757080078


ValueError: too many dimensions 'str'