#### for colab

In [None]:
# !pip install --upgrade torch
# !pip install transformers
# !pip install easydict
# !pip install colab-ssh --upgrade
# !pip install openpyxl

# from colab_ssh import launch_ssh_cloudflared, init_git_cloudflared
# launch_ssh_cloudflared(password='0000')

In [None]:
# from google.colab import drive

# # mount Google Drive
# drive.mount('/content/drive', force_remount=True)
# GDRIVE_HOME = '/content/drive/MyDrive'

## Experiment options

In [None]:
## Experiment Option
from easydict import EasyDict
import torch

opt = EasyDict()
opt.dataset_series = 'sentihood' # SemEval-16, sentihood
opt.dataset_domain = 'laptop' # restaurant / laptop / anything if sentihood
opt.subtask = 'sub1' # sub1: sentence, sub2: document(full review) only sub1
opt.task = 'category' # category, term
opt.num_classes = 3 # negative, positive, neutral, (+ conflict)
opt.max_length = 200
opt.model_name = 'bert_attscore'
# model_name: {bert_base, bert_attscore, bert_attscore_rnn, bert_attscore_bi_rnn, bert_attscore_rnn_add_asp,
#    bert_attscore_rnn_add_sep1, bert_attscore_rnn_add_sep_both, bert_attscore_forcls_rnn}
opt.pos = False # not use
opt.lastid = False # not use
opt.top_k = 3 # how many top-k attention score words to use
opt.valset_ratio = 0.2
opt.batch_size = 16
opt.num_layers = 6 # only use bert_intermediate. how many intermediate layers to use?
opt.num_epochs = 12
opt.runs = 5
opt.seed = 42
opt.log_step = 100
opt.patience = 5
opt.device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

print(opt.device)

## Load Dataset

In [None]:
import os, sys
# research_root = os.path.join(GDRIVE_HOME, 'research')
# sys.path.append(research_root)

if opt.dataset_series == 'SemEval-16':
    path = 'dataset/{}/semeval16_{}_{}.csv'.format(opt.dataset_series, opt.subtask, opt.dataset_domain)
    path_test = 'dataset/{}/semeval16_{}_{}_test.csv'.format(opt.dataset_series, opt.subtask, opt.dataset_domain)
elif opt.dataset_series == 'sentihood':
    path = 'dataset/{}/sentihood_train.csv'.format(opt.dataset_series)
    path_test = 'dataset/{}/sentihood_test.csv'.format(opt.dataset_series)

import pandas as pd

df_train = pd.read_csv(path)
df_test = pd.read_csv(path_test)

print('length of train set: {:,}'.format(len(df_train)))
print('length of test set: {:,}'.format(len(df_test)))

# df_train.head()

In [None]:
df_train

In [None]:
if 'attscore' in opt.model_name: # remove some noise('., -, _')
    from data_utils import clean_sentence, preprocess
    df_train = clean_sentence(df=df_train, clean_func=preprocess)
    df_test = clean_sentence(df=df_test, clean_func=preprocess)

In [None]:
from data_utils import Category_Classification_Dataset as Dataset
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
trainset = Dataset(df=df_train, tokenizer=tokenizer, opt=opt, pos_encoding=False)
testset = Dataset(df=df_test, tokenizer=tokenizer, opt=opt, pos_encoding=False)

# print(trainset.get_sample(423))
# print('-'*30)
# print(trainset[423])

In [None]:
from data_utils import custom_random_split as rs

train_set, val_set, test_set = rs(dataset=trainset, testset=testset,
                                  val_ratio=opt.valset_ratio, random_seed=opt.seed)

In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset=train_set, batch_size=opt.batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_set, batch_size=opt.batch_size, shuffle=False)
test_loader = DataLoader(dataset=test_set, batch_size=opt.batch_size, shuffle=False)

## Model

use top-k attention words + some tokens + pooling

- top-k: 3, 4
- additional tokens: [SEP_1], [SEP_2], both [SEP], [CLS], pair words(aspect words)
- pooling: 'mean' or 'bi-gru'

In [None]:
opt.model_name

In [None]:
from models.bert_intermediate import *
#from models.bert_pos import *
from models.bert_attscores import *

if opt.model_name == 'bert_base':
    model = Bert_Base(opt.num_classes)
elif opt.model_name == 'bert_attscore':
    model = Bert_AttScore(opt=opt, embed_dim=768, fc_hid_dim=128, top_k=opt.top_k, att_head='all', att_pooling='mean')
elif opt.model_name == 'bert_attscore_rnn':
    model = Bert_AttScore_RNN(opt=opt, embed_dim=768, rnn_hid_dim=256, fc_hid_dim=128, bidirectional=False,
                              top_k=opt.top_k, att_head='all', att_pooling='gru')
elif opt.model_name == 'bert_attscore_bi_rnn':
    model = Bert_AttScore_RNN(opt=opt, embed_dim=768, rnn_hid_dim=256, fc_hid_dim=128, bidirectional=True,
                              top_k=opt.top_k, att_head='all', att_pooling='gru')
elif opt.model_name == 'bert_attscore_rnn_add_sep1':
    model = Bert_AttScore_RNN_add(opt=opt, embed_dim=768, rnn_hid_dim=256, fc_hid_dim=128, bidirectional=True,
                                 top_k=opt.top_k, att_head='all', additional_token='sep1', att_pooling='gru')
elif opt.model_name == 'bert_attscore_rnn_add_sep2':
    model = Bert_AttScore_RNN_add(opt=opt, embed_dim=768, rnn_hid_dim=256, fc_hid_dim=128, bidirectional=True,
                                 top_k=opt.top_k, att_head='all', additional_token='sep2', att_pooling='gru')
elif opt.model_name == 'bert_attscore_rnn_add_sep_both':
    model = Bert_AttScore_RNN_add(opt=opt, embed_dim=768, rnn_hid_dim=256, fc_hid_dim=128, bidirectional=True,
                                 top_k=opt.top_k, att_head='all', additional_token='sep_both', att_pooling='gru')
elif opt.model_name == 'bert_attscore_rnn_add_asp':
    model = Bert_AttScore_RNN_add(opt=opt, embed_dim=768, rnn_hid_dim=256, fc_hid_dim=128, bidirectional=True,
                                 top_k=opt.top_k, att_head='all', additional_token='asp', att_pooling='gru')
elif opt.model_name == 'bert_attscore_rnn_add_cls':
    model = Bert_AttScore_RNN_add(opt=opt, embed_dim=768, rnn_hid_dim=256, fc_hid_dim=128, bidirectional=True,
                                 top_k=opt.top_k, att_head='all', additional_token='cls', att_pooling='gru')
elif opt.model_name == 'bert_attscore_forcls_rnn':
    model = Bert_AttScore_forCLS_RNN(opt=opt, embed_dim=768, rnn_hid_dim=256, fc_hid_dim=128, bidirectional=True,
                                 top_k=opt.top_k, att_head='all', att_pooling='gru')

In [None]:
from models.parameters import get_parameters
total, params = get_parameters(model)

## Train

In [None]:
import torch.nn as nn
import torch.optim as optim
from custom_trainer import *

optimizer = optim.AdamW(params, lr=2e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.8) # can't use for multiple runs
criterion = nn.CrossEntropyLoss()

result_dict, best_path = runs(trainer=trainer, train_loader=train_loader, val_loader=val_loader, test_loader=test_loader,
                             model=model, criterion=criterion, optimizer=optimizer, scheduler=False, opt=opt)