In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import torch
from torch import nn, Tensor
import json
import math

import numpy as np
import pickle
batch_size = 64

In [None]:
!gdown --id 1-J0uZgsbCvlTa7eu9VQ89V_onUXVy_eW

Downloading...
From: https://drive.google.com/uc?id=1-J0uZgsbCvlTa7eu9VQ89V_onUXVy_eW
To: /content/preprocessedForTransformer.zip
100% 8.11M/8.11M [00:00<00:00, 71.5MB/s]


In [None]:
! unzip "preprocessedForTransformer.zip"

Archive:  preprocessedForTransformer.zip
replace augmented_pretrain.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
with open('dev.json', 'r') as f: 
    dev_data1 = json.load(f)
with open('test.json', 'r') as f: 
    test_data1 = json.load(f)

with open('word_segmented_dev.json', 'r') as f: 
    dev_data2 = json.load(f)
with open('word_segmented_test.json', 'r') as f: 
    test_data2 = json.load(f)

In [None]:
dev_tokens_tensor1 = torch.tensor(dev_data1['tokens'], dtype=torch.long)
dev_y_tensor1 = torch.tensor(dev_data1['sentiments']).long()

dev_tokens_tensor2 = torch.tensor(dev_data2['tokens'], dtype=torch.long)
dev_y_tensor2 = torch.tensor(dev_data2['sentiments']).long()


test_tokens_tensor1 = torch.tensor(test_data1['tokens'], dtype=torch.long)
test_y_tensor1 = torch.tensor(test_data1['sentiments']).long()

test_tokens_tensor2 = torch.tensor(test_data2['tokens'], dtype=torch.long)
test_y_tensor2 = torch.tensor(test_data2['sentiments']).long()

In [None]:
import math
from typing import Tuple
from torch import nn, Tensor
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import dataset


class TransformerModel(nn.Module):

    def __init__(self, ntoken: int, d_model: int, nhead: int, d_hid: int,
                 nlayers: int, dropout: float, sequence_size: int, class_num: int):
        super().__init__()
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(d_model, dropout, sequence_size)
        encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntoken, d_model)
        self.d_model = d_model
        self.decoder = nn.Linear(d_model, ntoken)
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(d_model, class_num)
        # self.avgpool = nn.AvgPool1d(sequence_size)
        self.weighted_pool = nn.Linear(sequence_size,1)
        self.init_weights()

    def init_weights(self) -> None:
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src: Tensor, pretraining = False) -> Tensor:
        src = self.encoder(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        if pretraining:
          output = self.decoder(output)
        else:
          output = self.linear(output)
          # output = self.avgpool(output.permute(2,1,0)).permute(2,1,0)
          output = self.weighted_pool(output.permute(2,1,0)).permute(2,1,0)
        output = F.log_softmax(output,dim =-1)
        return output.squeeze()


class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:

        x = x + self.pe[:x.size(0)]
        return self.dropout(x)
        

In [None]:
def save_pred(model, dir, test_tokens_tensor):
    full_dir = 'drive/MyDrive/DeepLearningProject_Group3/output_for_ensemble/'+dir
    predicted = []
    model1.eval()
    with torch.no_grad():
        token_ids = torch.transpose(test_tokens_tensor,0,1)

        logits = model(token_ids)
    
    a = logits.numpy()
    a = np.exp(a)
    with open(full_dir, "wb") as file:
        pickle.dump(a, file)

In [None]:
dev_dir1 = 'transformer_output_dev.pkl'
test_dir1 = 'transformer_output_test.pkl'

dev_dir2 = 'transformer_output_word_segmented_dev.pkl'
test_dir2 = 'transformer_output_word_segmented_test.pkl'

path1 = '/content/drive/MyDrive/DeepLearningProject_Group3/checkpoint/Transformer/Transformer.pth'
model1 = torch.load(path1, map_location=torch.device('cpu'))

path2 = '/content/drive/MyDrive/DeepLearningProject_Group3/checkpoint/Transformer/Transformer_word_segmented.pth'
model2 = torch.load(path2, map_location=torch.device('cpu'))

save_pred(model1, dev_dir1, dev_tokens_tensor1)
save_pred(model1, test_dir1, test_tokens_tensor1)
save_pred(model2, dev_dir2, dev_tokens_tensor2)
save_pred(model2, test_dir2, test_tokens_tensor2)