# Import dependencies

In [1]:
import os
import sys

sys.path.insert(0, os.path.dirname(os.getcwd())) 

In [27]:
import time
import gc
import json

import numpy as np
import pandas as pd

from transformers import BertTokenizer, BertModel
from transformers import logging

from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, Dataset
from torch.nn.utils.rnn import pad_sequence

from torch import cuda

from ate_models.ATE_BERT_Dropout_Linear import ATE_BERT_Dropout_Linear

In [3]:
device = 'cuda' if cuda.is_available() else 'cpu'
logging.set_verbosity_error() 

In [4]:
print(torch.cuda.get_device_name(0))
print(f"Memory: {torch.cuda.get_device_properties(0).total_memory // 1024 ** 3} GB")

NVIDIA GeForce RTX 2060 SUPER
Memory: 8 GB


In [5]:
def clear_memory():
    torch.cuda.empty_cache()

    with torch.no_grad():
        torch.cuda.empty_cache()

    gc.collect()

# Load Data

In [6]:
DATASET = 'ATE_SemEval16_Restaurants_train.json'

In [7]:
df = pd.json_normalize(json.load(open(DATASET)))

In [8]:
df.head()

Unnamed: 0,text,tokens,iob_aspect_tags
0,Judging from previous posts this used to be a ...,"[Judging, from, previous, posts, this, used, t...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ..."
1,"We, there were four of us, arrived at noon - t...","[We, ,, there, were, four, of, us, ,, arrived,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"They never brought us complimentary noodles, i...","[They, never, brought, us, complimentary, nood...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,The food was lousy - too sweet or too salty an...,"[The, food, was, lousy, -, too, sweet, or, too...","[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"
4,The food was lousy - too sweet or too salty an...,"[The, food, was, lousy, -, too, sweet, or, too...","[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"


# Train & Validate

In [9]:
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 4

EPOCHS = 2

LEARNING_RATE = 1e-5

TRAIN_SPLIT = 0.8

NO_RUNS = 10

In [23]:
model = torch.load('../../../results/ATE/SemEval16 - Task 5 - Restaurants/models/bert_pre_trained_dropout_cnn_bilstm_linear.pth')

In [33]:
class InputDataset(Dataset):
    def __init__(self, df, tokenizer):
        self.data = df
        self.text = df.text
        self.tokenizer = tokenizer
        self.targets = self.data.iob_aspect_tags

    def __len__(self):
        return len(self.text)

    def __getitem__(self, idx):
        tokens = self.data['tokens'].iloc[idx]
        tags = self.data['iob_aspect_tags'].iloc[idx]

        data_tokens = []
        data_tags = []

        for i in range(len(tokens)):
            t = self.tokenizer.tokenize(tokens[i])
            data_tokens += t
            data_tags += [int(tags[i])] * len(t)
        
        data_ids = self.tokenizer.convert_tokens_to_ids(data_tokens)

        ids_tensor = torch.tensor(data_ids)
        tags_tensor = torch.tensor(data_tags)

        return data_tokens, ids_tensor, tags_tensor, self.data['text'], len(self.data['tokens'])
    
    def __len__(self):
        return len(self.data)

In [11]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [34]:
def create_mini_batch(samples):
    ids_tensors = [s[1] for s in samples]
    ids_tensors[0] = torch.nn.ConstantPad1d((0, 512 - len(ids_tensors[0])), 0)(ids_tensors[0])
    ids_tensors = pad_sequence(ids_tensors, batch_first=True).to(device)

    tags_tensors = [s[2] for s in samples]
    tags_tensors[0] = torch.nn.ConstantPad1d((0, 512 - len(tags_tensors[0])), 0)(tags_tensors[0])
    tags_tensors = pad_sequence(tags_tensors, batch_first=True).to(device)
    
    masks_tensors = torch.zeros(ids_tensors.shape, dtype=torch.long).to(device)
    masks_tensors = masks_tensors.masked_fill(ids_tensors != 0, 1).to(device)
    
    return ids_tensors, tags_tensors, masks_tensors, [s[3] for s in samples], [s[4] for s in samples]

In [37]:
def validation(model, dataloader):
    model.eval()
    
    fin_targets=[]
    fin_outputs=[]

    with torch.no_grad():
        for _, data in enumerate(dataloader, 0):
            ids_tensors, tags_tensors, masks_tensors, text, toklen = data
            ids_tensors = ids_tensors.to(device)
            tags_tensors = tags_tensors.to(device)
            masks_tensors = masks_tensors.to(device)

            outputs = model(ids_tensors, masks_tensors)
            
            _, predictions = torch.max(outputs, dim=2)

            # print(ids_tensors)
            # print(predictions)
            # print(tags_tensors)

            # print("\n")

            for leni in toklen:
                fin_outputs += list([int(p) for pred in predictions for p in pred[:leni]])
                fin_targets += list([int(tag) for tags_tensor in tags_tensors for tag in tags_tensor[:leni]])

    return fin_outputs, fin_targets

In [17]:
results = pd.DataFrame(columns=['accuracy','precision_score_micro','precision_score_macro','recall_score_micro','recall_score_macro','f1_score_micro','f1_score_macro', 'execution_time'])

In [14]:
from sklearn.metrics import classification_report

In [38]:
for i in range(1):
    # clear cache cuda
    torch.cuda.empty_cache()
    with torch.no_grad():
        torch.cuda.empty_cache()
    gc.collect()

    start_time = time.time()

    print(f"Run {i + 1}/{NO_RUNS}")

    training_set = InputDataset(df, tokenizer)

    train_dataloader = DataLoader(
        training_set,
        sampler = RandomSampler(training_set),
        batch_size = TRAIN_BATCH_SIZE,
        drop_last = True,
        collate_fn=create_mini_batch
    )

    outputs, targets = validation(model, train_dataloader)    

    accuracy = accuracy_score(targets, outputs)
    precision_score_micro = precision_score(targets, outputs, average='micro')
    precision_score_macro = precision_score(targets, outputs, average='macro')
    recall_score_micro = recall_score(targets, outputs, average='micro')
    recall_score_macro = recall_score(targets, outputs, average='macro')
    f1_score_micro = f1_score(targets, outputs, average='micro')
    f1_score_macro = f1_score(targets, outputs, average='macro')

    print(f"Accuracy: {accuracy}")
    print(f"Precision score micro: {precision_score_micro}")
    print(f"Precision score macro: {precision_score_macro}")
    print(f"Recall score micro: {recall_score_micro}")
    print(f"Recall score macro: {recall_score_macro}")
    print(f"F1 score micro: {f1_score_micro}")
    print(f"F1 score macro: {f1_score_macro}")

    print(classification_report(targets, outputs))

Run 1/10
Accuracy: 0.9950384072983227
Precision score micro: 0.9950384072983227
Precision score macro: 0.5677189241072833
Recall score micro: 0.9950384072983227
Recall score macro: 0.376206026764524
F1 score micro: 0.9950384072983227
F1 score macro: 0.4003947281924738
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   5102420
           1       0.28      0.01      0.01     17780
           2       0.43      0.12      0.19      7992

    accuracy                           1.00   5128192
   macro avg       0.57      0.38      0.40   5128192
weighted avg       0.99      1.00      0.99   5128192

