# RoBERTa ABSA

## Setup

In [None]:
!pip install transformers --quiet

In [None]:
from transformers import RobertaTokenizer, RobertaModel
from transformers.trainer_utils import set_seed
import torch
from torch import nn
import pandas as pd
import random
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch.nn.functional as F

set_seed(1234)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


## Import files

In [None]:
rest14_train = pd.read_csv('preproc_roberta_14_restaurant_train.csv')
rest14_train['token_ids'] = rest14_train['token_ids'].apply(eval)
rest14_train['labels'] = rest14_train['labels'].apply(eval)
l_train=rest14_train['labels'].tolist()
max_len_train=max(len(i) for i in l_train)
print(max_len_train)

rest14_test = pd.read_csv('preproc_roberta_14_restaurant_test.csv')
rest14_test['token_ids'] = rest14_test['token_ids'].apply(eval)
rest14_test['labels'] = rest14_test['labels'].apply(eval)
l_test=rest14_test['labels'].tolist()
max_len_test=max(len(i) for i in l_test)
print(max_len_test)

86
71


In [None]:
lap14_train = pd.read_csv('preproc_roberta_14_laptop_train.csv')
lap14_train['token_ids'] = lap14_train['token_ids'].apply(eval)
lap14_train['labels'] = lap14_train['labels'].apply(eval)
l_train=lap14_train['labels'].tolist()
max_len_train=max(len(i) for i in l_train)
print(max_len_train)

lap14_test = pd.read_csv('preproc_roberta_14_laptop_test.csv')
lap14_test['token_ids'] = lap14_test['token_ids'].apply(eval)
lap14_test['labels'] = lap14_test['labels'].apply(eval)
l_test=lap14_test['labels'].tolist()
max_len_test=max(len(i) for i in l_test)
print(max_len_test)

91
84


In [None]:
rest16_train = pd.read_csv('preproc_roberta_16_restaurant_train.csv')
rest16_train['token_ids'] = rest16_train['token_ids'].apply(eval)
rest16_train['labels'] = rest16_train['labels'].apply(eval)
l_train=rest16_train['labels'].tolist()
max_len_train=max(len(i) for i in l_train)
print(max_len_train)

rest16_test = pd.read_csv('preproc_roberta_16_restaurant_test.csv')
rest16_test['token_ids'] = rest16_test['token_ids'].apply(eval)
rest16_test['labels'] = rest16_test['labels'].apply(eval)
l_test=rest16_test['labels'].tolist()
max_len_test=max(len(i) for i in l_test)
print(max_len_test)

83
98


## Define classes / functions

In [None]:
class CustomBertTokenClassifierLinear(nn.Module):
    def __init__(self, bert, num_labels):
        super().__init__()
        self.bert = bert
        self.embedding_dim = bert.config.to_dict()['hidden_size']
        self.dropout = nn.Dropout(0.5)
        self.num_labels = num_labels

        """uncomment below for Bert-linear"""
        self.fc = nn.Linear(self.embedding_dim, num_labels)

        '''OR uncomment below for Bert-Gru'''
        # self.gru = nn.GRU(input_size=self.embedding_dim, hidden_size=256, batch_first=True)
        # self.fc = nn.Linear(256, num_labels)


    def forward(self, input_ids, attention_mask, labels=None):
        # text = [batch size, sent len]
        bert_outs = self.bert(input_ids, attention_mask)  # outputs = {last_hidden_state, hidden_states}
        embedding = bert_outs.last_hidden_state  # embedding = [batch size, sent len, emb dim=768]
        #all_hidden = bert_outs.hidden_states  # all_hidden = tuple of tensors size=13, each tensor = [batch size, sent_len, hidden_dim=768]
        #print('embedding:', embedding.shape)
        #print('all_hidden:', f"Tuple length:{len(all_hidden)}", [t.shape for t in all_hidden])
        dropped = self.dropout(embedding)

        """uncomment below for Bert-linear"""
        logits = self.fc(dropped) # output = [batch size, sent len, output dim]
        
        '''OR uncomment below for Bert-Gru'''
        # output,h_n = self.gru(dropped)
        # logits = self.fc(output)

        loss = None
        if labels is not None:
            # print(labels.size())
            loss_fct = nn.CrossEntropyLoss()
            # Only keep active parts of the loss
            if attention_mask is not None:
                active_loss = attention_mask.view(-1) == 1
                active_logits = logits.view(-1, self.num_labels)
                active_labels = torch.where(
                    active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
                )
                loss = loss_fct(active_logits, active_labels)
            else:
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
        
        return logits, loss


In [None]:
class CustomBertTokenClassifierGRU(nn.Module):
    def __init__(self, bert, num_labels):
        super().__init__()
        self.bert = bert
        self.embedding_dim = bert.config.to_dict()['hidden_size']
        self.dropout = nn.Dropout(0.5)
        self.num_labels = num_labels

        """uncomment below for Bert-linear"""
        # self.fc = nn.Linear(self.embedding_dim, num_labels)

        '''OR uncomment below for Bert-Gru'''
        self.gru = nn.GRU(input_size=self.embedding_dim, hidden_size=256, batch_first=True)
        self.fc = nn.Linear(256, num_labels)


    def forward(self, input_ids, attention_mask, labels=None):
        # text = [batch size, sent len]
        bert_outs = self.bert(input_ids, attention_mask)  # outputs = {last_hidden_state, hidden_states}
        embedding = bert_outs.last_hidden_state  # embedding = [batch size, sent len, emb dim=768]
        #all_hidden = bert_outs.hidden_states  # all_hidden = tuple of tensors size=13, each tensor = [batch size, sent_len, hidden_dim=768]
        #print('embedding:', embedding.shape)
        #print('all_hidden:', f"Tuple length:{len(all_hidden)}", [t.shape for t in all_hidden])
        dropped = self.dropout(embedding)

        """uncomment below for Bert-linear"""
        # logits = self.fc(dropped) # output = [batch size, sent len, output dim]
        
        '''OR uncomment below for Bert-Gru'''
        output,h_n = self.gru(dropped)
        logits = self.fc(output)

        loss = None
        if labels is not None:
            # print(labels.size())
            loss_fct = nn.CrossEntropyLoss()
            # Only keep active parts of the loss
            if attention_mask is not None:
                active_loss = attention_mask.view(-1) == 1
                active_logits = logits.view(-1, self.num_labels)
                active_labels = torch.where(
                    active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
                )
                loss = loss_fct(active_logits, active_labels)
            else:
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
        
        return logits, loss

In [None]:
# Sample code for training the pytorch model
def train_model(epochs, model, optimizer, train_dataloader):
    for ep in range(epochs):
        model.train()
        total_loss = 0
        for step, batch in tqdm(enumerate(train_dataloader), total=len(train_dataloader)):
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            optimizer.zero_grad()
            logits, loss = model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
            #label_ids = b_labels.to('cpu').numpy()
            total_loss += loss.item()
            # Backward pass
            loss.backward()
            optimizer.step()
        print(f'Loss = {total_loss / len(train_dataloader)}')
        
def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()
    total_loss = 0
    for step, batch in tqdm(enumerate(val_dataloader), total=len(val_dataloader)):
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        with torch.no_grad():
            _, loss = model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
        #label_ids = b_labels.to('cpu').numpy()
        total_loss += loss.item()
    print(f'Loss = {total_loss / len(val_dataloader)}')
    return

In [None]:
def evaluate_prob(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()
    total_loss = 0
    all_pred = []
    for step, batch in tqdm(enumerate(val_dataloader), total=len(val_dataloader)):
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        with torch.no_grad():
            logits, loss = model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
        #all_logits.append(logits)
        #label_ids = b_labels.to('cpu').numpy()
        logits = logits.cpu().numpy()
        all_pred.append(np.argmax(logits,axis=2))
        # all_logits.append((np.argmax(logits,axis=2),b_labels.cpu().numpy()))
        total_loss += loss.item()
    print(f'Loss = {total_loss / len(val_dataloader)}')
    return np.concatenate(all_pred, axis=0)

## Restaurant-14

In [None]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW

In [None]:
# Simple output mapping:

'''Uncomment below for 16 restaurant data '''
# label_mapping = {'O': 0, 'B-pos': 1, 'B-neg': 2, 'B-neu': 3, 'I-pos': 4, 'I-neg': 5, 'I-neu': 6}

'''OR Uncomment below for 14 restaurant/laptop data '''
label_mapping = {'O': 0, 'B-pos': 1, 'B-neg': 2, 'B-neu': 3, 'B-con': 4, 'I-pos': 5, 'I-neg': 6, 'I-neu': 7, 'I-con': 8}


def generate_input_masks_labels(df, MAX_LEN):
    input_ids  = list(df['token_ids'])
    attention_masks = [torch.ones((len(x)), dtype=torch.long) for x in input_ids]
    labels = [[label_mapping[x] for x in seq] for seq in df['labels']]
    input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post", value=0)
    attention_masks = pad_sequences(attention_masks, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post", value=0)
    labels = pad_sequences(labels, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post", value=0)
    input_ids = torch.tensor(input_ids).long()
    attention_masks = torch.tensor(attention_masks).long()
    labels = torch.tensor(labels).long()
    return input_ids, attention_masks, labels

In [None]:
input_ids, attention_masks, labels = generate_input_masks_labels(rest14_train, 100)

batch_size = 10
train_data = TensorDataset(input_ids, attention_masks, labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

In [None]:
input_ids_test, attention_masks_test, labels_test = generate_input_masks_labels(rest14_test, 100)

test_batch_size = 10
test_data = TensorDataset(input_ids_test, attention_masks_test, labels_test)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=test_batch_size)

### Linear, 3 epochs

#### Code

In [None]:
pred_log = []
epochs = 3
num_labels = len(label_mapping)
#num_labels = 3
bert = RobertaModel.from_pretrained("roberta-base")
model = CustomBertTokenClassifierLinear(bert, num_labels)
model.to(device)
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.05},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=5e-5)
for ep in range(epochs):
    print(f"=== Training phase {ep+1} ====")
    train_model(1, model, optimizer, train_dataloader)
    print(f"=== Eval phase {ep+1} ====")
    evaluate(model,test_dataloader)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  1%|          | 2/305 [00:00<00:23, 13.03it/s]

=== Training phase 1 ====


100%|██████████| 305/305 [00:22<00:00, 13.81it/s]
  6%|▋         | 5/80 [00:00<00:01, 49.92it/s]

Loss = 0.302656863635925
=== Eval phase 1 ====


100%|██████████| 80/80 [00:01<00:00, 48.87it/s]
  1%|          | 2/305 [00:00<00:21, 13.99it/s]

Loss = 0.17548350755823777
=== Training phase 2 ====


100%|██████████| 305/305 [00:22<00:00, 13.82it/s]
  8%|▊         | 6/80 [00:00<00:01, 50.24it/s]

Loss = 0.16225250835423588
=== Eval phase 2 ====


100%|██████████| 80/80 [00:01<00:00, 49.45it/s]
  1%|          | 2/305 [00:00<00:21, 13.88it/s]

Loss = 0.13107971445424482
=== Training phase 3 ====


100%|██████████| 305/305 [00:22<00:00, 13.83it/s]
  8%|▊         | 6/80 [00:00<00:01, 50.54it/s]

Loss = 0.11314320065172725
=== Eval phase 3 ====


100%|██████████| 80/80 [00:01<00:00, 49.35it/s]

Loss = 0.11376870670937933





In [None]:
pred = evaluate_prob(model,test_dataloader)

100%|██████████| 80/80 [00:01<00:00, 48.80it/s]

Loss = 0.11376870670937933





In [None]:
from sklearn.metrics import confusion_matrix
#all_pred = np.concatenate(pred, axis=0)
#print(pred.shape)
test_labels_np = labels_test.cpu().numpy().flatten()
pred = pred.flatten()

att_mask = attention_masks_test.cpu().numpy().flatten() == 1
test_labels_final = test_labels_np[att_mask]
pred_final = pred[att_mask]

#print(test_labels_np.shape)
pd.DataFrame(confusion_matrix(test_labels_final, pred_final))

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,13115,19,9,9,0,44,4,11,0
1,52,637,8,16,0,15,0,0,0
2,21,24,124,23,0,1,0,2,0
3,22,41,11,116,0,4,0,1,0
4,0,10,3,1,0,0,0,0,0
5,51,15,0,1,0,626,4,6,0
6,12,1,2,1,0,26,65,11,0
7,24,3,1,2,0,103,12,99,0
8,0,0,0,0,0,2,0,0,0


#### Results

In [None]:
from sklearn.metrics import classification_report

'''Uncomment below for 16 restaurant data '''
# print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6], target_names=["B-pos",'B-neg','B-neu','I-pos','I-neg','I-neu']))

'''OR Uncomment below for 14 restaurant data '''
print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6,7,8], target_names=["B-pos",'B-neg','B-neu','B-con','I-pos','I-neg','I-neu','I-con']))

              precision    recall  f1-score   support

       B-pos       0.85      0.88      0.86       728
       B-neg       0.78      0.64      0.70       195
       B-neu       0.69      0.59      0.64       195
       B-con       0.00      0.00      0.00        14
       I-pos       0.76      0.89      0.82       703
       I-neg       0.76      0.55      0.64       118
       I-neu       0.76      0.41      0.53       244
       I-con       0.00      0.00      0.00         2

   micro avg       0.79      0.76      0.77      2199
   macro avg       0.58      0.49      0.52      2199
weighted avg       0.78      0.76      0.76      2199



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score 
micro_precision = precision_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
micro_recall = recall_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
micro_f1 = f1_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
print(f'micro_precision: {micro_precision:.3f}')
print(f'micro_recall: {micro_recall:.3f}')
print(f'micro_f1: {micro_f1:.3f}')

micro_precision: 0.789
micro_recall: 0.758
micro_f1: 0.773


### GRU, 5 epochs

#### Code

In [None]:
pred_log = []
epochs = 5
num_labels = len(label_mapping)
#num_labels = 3
bert = RobertaModel.from_pretrained("roberta-base")
model = CustomBertTokenClassifierGRU(bert, num_labels)
model.to(device)
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.05},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=5e-5)
for ep in range(epochs):
    print(f"=== Training phase {ep+1} ====")
    train_model(1, model, optimizer, train_dataloader)
    print(f"=== Eval phase {ep+1} ====")
    evaluate(model,test_dataloader)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  1%|          | 2/305 [00:00<00:22, 13.18it/s]

=== Training phase 1 ====


100%|██████████| 305/305 [00:23<00:00, 12.83it/s]
  6%|▋         | 5/80 [00:00<00:01, 43.92it/s]

Loss = 0.34529461099720393
=== Eval phase 1 ====


100%|██████████| 80/80 [00:01<00:00, 43.42it/s]
  1%|          | 2/305 [00:00<00:23, 13.08it/s]

Loss = 0.20572028018068522
=== Training phase 2 ====


100%|██████████| 305/305 [00:23<00:00, 12.87it/s]
  6%|▋         | 5/80 [00:00<00:01, 43.89it/s]

Loss = 0.1729907085783169
=== Eval phase 2 ====


100%|██████████| 80/80 [00:01<00:00, 43.54it/s]
  1%|          | 2/305 [00:00<00:23, 13.12it/s]

Loss = 0.1754452728317119
=== Training phase 3 ====


100%|██████████| 305/305 [00:23<00:00, 12.88it/s]
  6%|▋         | 5/80 [00:00<00:01, 44.07it/s]

Loss = 0.13565689369913986
=== Eval phase 3 ====


100%|██████████| 80/80 [00:01<00:00, 43.50it/s]
  1%|          | 2/305 [00:00<00:23, 12.85it/s]

Loss = 0.18671674370998517
=== Training phase 4 ====


100%|██████████| 305/305 [00:23<00:00, 12.81it/s]
  6%|▋         | 5/80 [00:00<00:01, 42.76it/s]

Loss = 0.10397010206237252
=== Eval phase 4 ====


100%|██████████| 80/80 [00:01<00:00, 42.11it/s]
  1%|          | 2/305 [00:00<00:23, 12.96it/s]

Loss = 0.17748159288894386
=== Training phase 5 ====


100%|██████████| 305/305 [00:23<00:00, 12.76it/s]
  6%|▋         | 5/80 [00:00<00:01, 44.10it/s]

Loss = 0.08091730241526346
=== Eval phase 5 ====


100%|██████████| 80/80 [00:01<00:00, 42.84it/s]

Loss = 0.1732613062951714





In [None]:
pred = evaluate_prob(model,test_dataloader)

100%|██████████| 80/80 [00:01<00:00, 42.88it/s]

Loss = 0.1732613062951714





In [None]:
from sklearn.metrics import confusion_matrix
#all_pred = np.concatenate(pred, axis=0)
#print(pred.shape)
test_labels_np = labels_test.cpu().numpy().flatten()
pred = pred.flatten()

att_mask = attention_masks_test.cpu().numpy().flatten() == 1
test_labels_final = test_labels_np[att_mask]
pred_final = pred[att_mask]

#print(test_labels_np.shape)
pd.DataFrame(confusion_matrix(test_labels_final, pred_final))

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,13108,27,15,8,0,42,5,6,0
1,45,636,30,6,0,11,0,0,0
2,22,6,164,3,0,0,0,0,0
3,19,71,59,44,0,2,0,0,0
4,0,9,5,0,0,0,0,0,0
5,47,21,4,0,0,607,17,7,0
6,21,0,7,0,0,14,71,5,0
7,21,7,1,3,0,152,26,34,0
8,0,0,0,0,0,1,1,0,0


#### Results

In [None]:
from sklearn.metrics import classification_report

'''Uncomment below for 16 restaurant data '''
# print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6], target_names=["B-pos",'B-neg','B-neu','I-pos','I-neg','I-neu']))

'''OR Uncomment below for 14 restaurant data '''
print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6,7,8], target_names=["B-pos",'B-neg','B-neu','B-con','I-pos','I-neg','I-neu','I-con']))

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       B-pos       0.82      0.87      0.85       728
       B-neg       0.58      0.84      0.68       195
       B-neu       0.69      0.23      0.34       195
       B-con       0.00      0.00      0.00        14
       I-pos       0.73      0.86      0.79       703
       I-neg       0.59      0.60      0.60       118
       I-neu       0.65      0.14      0.23       244
       I-con       0.00      0.00      0.00         2

   micro avg       0.73      0.71      0.72      2199
   macro avg       0.51      0.44      0.44      2199
weighted avg       0.72      0.71      0.68      2199



In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score 
micro_precision = precision_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
micro_recall = recall_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
micro_f1 = f1_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
print(f'micro_precision: {micro_precision:.3f}')
print(f'micro_recall: {micro_recall:.3f}')
print(f'micro_f1: {micro_f1:.3f}')

micro_precision: 0.732
micro_recall: 0.708
micro_f1: 0.719


## Laptops-14

In [None]:
# Simple output mapping:

'''Uncomment below for 16 restaurant data '''
# label_mapping = {'O': 0, 'B-pos': 1, 'B-neg': 2, 'B-neu': 3, 'I-pos': 4, 'I-neg': 5, 'I-neu': 6}

'''OR Uncomment below for 14 restaurant/laptop data '''
label_mapping = {'O': 0, 'B-pos': 1, 'B-neg': 2, 'B-neu': 3, 'B-con': 4, 'I-pos': 5, 'I-neg': 6, 'I-neu': 7, 'I-con': 8}


def generate_input_masks_labels(df, MAX_LEN):
    input_ids  = list(df['token_ids'])
    attention_masks = [torch.ones((len(x)), dtype=torch.long) for x in input_ids]
    labels = [[label_mapping[x] for x in seq] for seq in df['labels']]
    input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post", value=0)
    attention_masks = pad_sequences(attention_masks, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post", value=0)
    labels = pad_sequences(labels, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post", value=0)
    input_ids = torch.tensor(input_ids).long()
    attention_masks = torch.tensor(attention_masks).long()
    labels = torch.tensor(labels).long()
    return input_ids, attention_masks, labels

In [None]:
input_ids, attention_masks, labels = generate_input_masks_labels(lap14_train, 100)

batch_size = 10
train_data = TensorDataset(input_ids, attention_masks, labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

In [None]:
input_ids_test, attention_masks_test, labels_test = generate_input_masks_labels(lap14_test, 100)

test_batch_size = 10
test_data = TensorDataset(input_ids_test, attention_masks_test, labels_test)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=test_batch_size)

### Linear, 3 epochs

#### Code

In [None]:
pred_log = []
epochs = 3
num_labels = len(label_mapping)
#num_labels = 3
bert = RobertaModel.from_pretrained("roberta-base")
model = CustomBertTokenClassifierLinear(bert, num_labels)
model.to(device)
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.05},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=5e-5)
for ep in range(epochs):
    print(f"=== Training phase {ep+1} ====")
    train_model(1, model, optimizer, train_dataloader)
    print(f"=== Eval phase {ep+1} ====")
    evaluate(model,test_dataloader)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  1%|          | 2/305 [00:00<00:26, 11.59it/s]

=== Training phase 1 ====


100%|██████████| 305/305 [00:22<00:00, 13.78it/s]
  8%|▊         | 6/80 [00:00<00:01, 50.51it/s]

Loss = 0.22254130876638362
=== Eval phase 1 ====


100%|██████████| 80/80 [00:01<00:00, 49.10it/s]
  1%|          | 2/305 [00:00<00:22, 13.76it/s]

Loss = 0.15025797667913138
=== Training phase 2 ====


100%|██████████| 305/305 [00:22<00:00, 13.75it/s]
  6%|▋         | 5/80 [00:00<00:01, 49.24it/s]

Loss = 0.12112832844532172
=== Eval phase 2 ====


100%|██████████| 80/80 [00:01<00:00, 49.17it/s]
  1%|          | 2/305 [00:00<00:21, 13.82it/s]

Loss = 0.12612104575964622
=== Training phase 3 ====


100%|██████████| 305/305 [00:22<00:00, 13.81it/s]
  8%|▊         | 6/80 [00:00<00:01, 50.41it/s]

Loss = 0.08421014273752932
=== Eval phase 3 ====


100%|██████████| 80/80 [00:01<00:00, 49.30it/s]

Loss = 0.11161646461114287





In [None]:
pred = evaluate_prob(model,test_dataloader)

100%|██████████| 80/80 [00:01<00:00, 49.32it/s]

Loss = 0.11161646461114287





In [None]:
from sklearn.metrics import confusion_matrix
#all_pred = np.concatenate(pred, axis=0)
#print(pred.shape)
test_labels_np = labels_test.cpu().numpy().flatten()
pred = pred.flatten()

att_mask = attention_masks_test.cpu().numpy().flatten() == 1
test_labels_final = test_labels_np[att_mask]
pred_final = pred[att_mask]

#print(test_labels_np.shape)
pd.DataFrame(confusion_matrix(test_labels_final, pred_final))

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,12637,13,1,13,0,1,1,10,0
1,46,258,11,25,0,1,0,0,0
2,21,13,72,21,0,0,0,1,0
3,24,20,18,107,0,0,0,0,0
4,1,10,5,0,0,0,0,0,0
5,38,11,0,2,0,130,16,29,0
6,15,1,3,3,0,5,66,26,0
7,31,1,3,11,0,6,28,161,0
8,0,0,0,0,0,4,1,0,0


#### Results

In [None]:
from sklearn.metrics import classification_report

'''Uncomment below for 16 restaurant data '''
# print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6], target_names=["B-pos",'B-neg','B-neu','I-pos','I-neg','I-neu']))

'''OR Uncomment below for 14 restaurant data '''
print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6,7,8], target_names=["B-pos",'B-neg','B-neu','B-con','I-pos','I-neg','I-neu','I-con']))

              precision    recall  f1-score   support

       B-pos       0.79      0.76      0.77       341
       B-neg       0.64      0.56      0.60       128
       B-neu       0.59      0.63      0.61       169
       B-con       0.00      0.00      0.00        16
       I-pos       0.88      0.58      0.70       226
       I-neg       0.59      0.55      0.57       119
       I-neu       0.71      0.67      0.69       241
       I-con       0.00      0.00      0.00         5

   micro avg       0.72      0.64      0.67      1245
   macro avg       0.52      0.47      0.49      1245
weighted avg       0.72      0.64      0.67      1245



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score 
micro_precision = precision_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
micro_recall = recall_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
micro_f1 = f1_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
print(f'micro_precision: {micro_precision:.3f}')
print(f'micro_recall: {micro_recall:.3f}')
print(f'micro_f1: {micro_f1:.3f}')

micro_precision: 0.717
micro_recall: 0.638
micro_f1: 0.675


### GRU, 5 epochs

#### Code

In [None]:
pred_log = []
epochs = 5
num_labels = len(label_mapping)
#num_labels = 3
bert = RobertaModel.from_pretrained("roberta-base")
model = CustomBertTokenClassifierGRU(bert, num_labels)
model.to(device)
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.05},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=5e-5)
for ep in range(epochs):
    print(f"=== Training phase {ep+1} ====")
    train_model(1, model, optimizer, train_dataloader)
    print(f"=== Eval phase {ep+1} ====")
    evaluate(model,test_dataloader)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  1%|          | 2/305 [00:00<00:29, 10.38it/s]

=== Training phase 1 ====


100%|██████████| 305/305 [00:23<00:00, 12.83it/s]
  6%|▋         | 5/80 [00:00<00:01, 43.76it/s]

Loss = 0.31989542898096024
=== Eval phase 1 ====


100%|██████████| 80/80 [00:01<00:00, 43.40it/s]
  1%|          | 2/305 [00:00<00:23, 13.14it/s]

Loss = 0.2080003712559119
=== Training phase 2 ====


100%|██████████| 305/305 [00:23<00:00, 12.80it/s]
  6%|▋         | 5/80 [00:00<00:01, 44.31it/s]

Loss = 0.16523960487397968
=== Eval phase 2 ====


100%|██████████| 80/80 [00:01<00:00, 43.09it/s]
  1%|          | 2/305 [00:00<00:23, 12.98it/s]

Loss = 0.16851402504835278
=== Training phase 3 ====


100%|██████████| 305/305 [00:23<00:00, 12.81it/s]
  6%|▋         | 5/80 [00:00<00:01, 44.53it/s]

Loss = 0.12176911494099214
=== Eval phase 3 ====


100%|██████████| 80/80 [00:01<00:00, 43.26it/s]
  1%|          | 2/305 [00:00<00:23, 13.02it/s]

Loss = 0.13858057105680927
=== Training phase 4 ====


100%|██████████| 305/305 [00:23<00:00, 12.81it/s]
  6%|▋         | 5/80 [00:00<00:01, 43.73it/s]

Loss = 0.09230624371681545
=== Eval phase 4 ====


100%|██████████| 80/80 [00:01<00:00, 43.33it/s]
  1%|          | 2/305 [00:00<00:23, 13.10it/s]

Loss = 0.16929568868363276
=== Training phase 5 ====


100%|██████████| 305/305 [00:23<00:00, 12.80it/s]
  6%|▋         | 5/80 [00:00<00:01, 43.94it/s]

Loss = 0.08224186900331348
=== Eval phase 5 ====


100%|██████████| 80/80 [00:01<00:00, 43.11it/s]

Loss = 0.13648305323440582





In [None]:
pred = evaluate_prob(model,test_dataloader)

100%|██████████| 80/80 [00:01<00:00, 42.85it/s]

Loss = 0.13648305323440582





In [None]:
from sklearn.metrics import confusion_matrix
#all_pred = np.concatenate(pred, axis=0)
#print(pred.shape)
test_labels_np = labels_test.cpu().numpy().flatten()
pred = pred.flatten()

att_mask = attention_masks_test.cpu().numpy().flatten() == 1
test_labels_final = test_labels_np[att_mask]
pred_final = pred[att_mask]

#print(test_labels_np.shape)
pd.DataFrame(confusion_matrix(test_labels_final, pred_final))

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,12573,29,13,24,0,11,5,21,0
1,35,269,13,20,0,3,0,1,0
2,13,9,92,11,0,1,1,1,0
3,12,23,41,92,0,0,1,0,0
4,0,11,4,1,0,0,0,0,0
5,33,8,1,4,0,141,13,26,0
6,18,0,2,3,0,9,75,12,0
7,25,1,2,8,0,10,55,140,0
8,0,0,0,0,0,2,1,2,0


#### Results

In [None]:
from sklearn.metrics import classification_report

'''Uncomment below for 16 restaurant data '''
# print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6], target_names=["B-pos",'B-neg','B-neu','I-pos','I-neg','I-neu']))

'''OR Uncomment below for 14 restaurant data '''
print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6,7,8], target_names=["B-pos",'B-neg','B-neu','B-con','I-pos','I-neg','I-neu','I-con']))

              precision    recall  f1-score   support

       B-pos       0.77      0.79      0.78       341
       B-neg       0.55      0.72      0.62       128
       B-neu       0.56      0.54      0.55       169
       B-con       0.00      0.00      0.00        16
       I-pos       0.80      0.62      0.70       226
       I-neg       0.50      0.63      0.56       119
       I-neu       0.69      0.58      0.63       241
       I-con       0.00      0.00      0.00         5

   micro avg       0.67      0.65      0.66      1245
   macro avg       0.48      0.49      0.48      1245
weighted avg       0.67      0.65      0.65      1245



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score 
micro_precision = precision_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
micro_recall = recall_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
micro_f1 = f1_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6,7,8], average='micro')
print(f'micro_precision: {micro_precision:.3f}')
print(f'micro_recall: {micro_recall:.3f}')
print(f'micro_f1: {micro_f1:.3f}')

micro_precision: 0.667
micro_recall: 0.650
micro_f1: 0.659


## Restaurants-16

In [None]:
# Simple output mapping:

'''Uncomment below for 16 restaurant data '''
label_mapping = {'O': 0, 'B-pos': 1, 'B-neg': 2, 'B-neu': 3, 'I-pos': 4, 'I-neg': 5, 'I-neu': 6}

'''OR Uncomment below for 14 restaurant/laptop data '''
# label_mapping = {'O': 0, 'B-pos': 1, 'B-neg': 2, 'B-neu': 3, 'B-con': 4, 'I-pos': 5, 'I-neg': 6, 'I-neu': 7, 'I-con': 8}


def generate_input_masks_labels(df, MAX_LEN):
    input_ids  = list(df['token_ids'])
    attention_masks = [torch.ones((len(x)), dtype=torch.long) for x in input_ids]
    labels = [[label_mapping[x] for x in seq] for seq in df['labels']]
    input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post", value=0)
    attention_masks = pad_sequences(attention_masks, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post", value=0)
    labels = pad_sequences(labels, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post", value=0)
    input_ids = torch.tensor(input_ids).long()
    attention_masks = torch.tensor(attention_masks).long()
    labels = torch.tensor(labels).long()
    return input_ids, attention_masks, labels

In [None]:
input_ids, attention_masks, labels = generate_input_masks_labels(rest16_train, 100)

batch_size = 10
train_data = TensorDataset(input_ids, attention_masks, labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

In [None]:
input_ids_test, attention_masks_test, labels_test = generate_input_masks_labels(rest16_test, 100)

test_batch_size = 10
test_data = TensorDataset(input_ids_test, attention_masks_test, labels_test)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=test_batch_size)

### Linear, 5 epochs

#### Code

In [None]:
pred_log = []
epochs = 5
num_labels = len(label_mapping)
#num_labels = 3
bert = RobertaModel.from_pretrained("roberta-base")
model = CustomBertTokenClassifierLinear(bert, num_labels)
model.to(device)
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.05},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=5e-5)
for ep in range(epochs):
    print(f"=== Training phase {ep+1} ====")
    train_model(1, model, optimizer, train_dataloader)
    print(f"=== Eval phase {ep+1} ====")
    evaluate(model,test_dataloader)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  1%|          | 2/200 [00:00<00:14, 14.07it/s]

=== Training phase 1 ====


100%|██████████| 200/200 [00:14<00:00, 13.79it/s]
  9%|▉         | 6/68 [00:00<00:01, 50.47it/s]

Loss = 0.2649430753290653
=== Eval phase 1 ====


100%|██████████| 68/68 [00:01<00:00, 50.00it/s]
  1%|          | 2/200 [00:00<00:14, 13.84it/s]

Loss = 0.15637477952986956
=== Training phase 2 ====


100%|██████████| 200/200 [00:14<00:00, 13.83it/s]
  9%|▉         | 6/68 [00:00<00:01, 50.50it/s]

Loss = 0.12279128772206604
=== Eval phase 2 ====


100%|██████████| 68/68 [00:01<00:00, 49.84it/s]
  1%|          | 2/200 [00:00<00:14, 14.02it/s]

Loss = 0.16985366991995013
=== Training phase 3 ====


100%|██████████| 200/200 [00:14<00:00, 13.85it/s]
  9%|▉         | 6/68 [00:00<00:01, 50.85it/s]

Loss = 0.08110550518613309
=== Eval phase 3 ====


100%|██████████| 68/68 [00:01<00:00, 49.97it/s]
  1%|          | 2/200 [00:00<00:14, 13.86it/s]

Loss = 0.1659327730851467
=== Training phase 4 ====


100%|██████████| 200/200 [00:14<00:00, 13.74it/s]
  7%|▋         | 5/68 [00:00<00:01, 48.10it/s]

Loss = 0.06300255735171958
=== Eval phase 4 ====


100%|██████████| 68/68 [00:01<00:00, 49.34it/s]
  1%|          | 2/200 [00:00<00:14, 13.86it/s]

Loss = 0.1734730192042394
=== Training phase 5 ====


100%|██████████| 200/200 [00:14<00:00, 13.71it/s]
  9%|▉         | 6/68 [00:00<00:01, 50.43it/s]

Loss = 0.039921687602763996
=== Eval phase 5 ====


100%|██████████| 68/68 [00:01<00:00, 49.43it/s]

Loss = 0.15763524018556757





In [None]:
pred = evaluate_prob(model,test_dataloader)

100%|██████████| 68/68 [00:01<00:00, 49.37it/s]

Loss = 0.15763524018556757





In [None]:
from sklearn.metrics import confusion_matrix
#all_pred = np.concatenate(pred, axis=0)
#print(pred.shape)
test_labels_np = labels_test.cpu().numpy().flatten()
pred = pred.flatten()

att_mask = attention_masks_test.cpu().numpy().flatten() == 1
test_labels_final = test_labels_np[att_mask]
pred_final = pred[att_mask]

#print(test_labels_np.shape)
pd.DataFrame(confusion_matrix(test_labels_final, pred_final))

Unnamed: 0,0,1,2,3,4,5,6
0,10944,54,40,0,91,21,0
1,73,373,8,5,9,0,0
2,34,3,77,0,1,0,0
3,7,5,14,4,0,0,0
4,109,9,0,0,398,7,4
5,18,0,0,0,6,19,5
6,11,0,0,0,3,0,6


#### Results

In [None]:
from sklearn.metrics import classification_report

'''Uncomment below for 16 restaurant data '''
print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6], target_names=["B-pos",'B-neg','B-neu','I-pos','I-neg','I-neu']))

'''OR Uncomment below for 14 restaurant data '''
# print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6,7,8], target_names=["B-pos",'B-neg','B-neu','B-con','I-pos','I-neg','I-neu','I-con']))

              precision    recall  f1-score   support

       B-pos       0.84      0.80      0.82       468
       B-neg       0.55      0.67      0.61       115
       B-neu       0.44      0.13      0.21        30
       I-pos       0.78      0.76      0.77       527
       I-neg       0.40      0.40      0.40        48
       I-neu       0.40      0.30      0.34        20

   micro avg       0.75      0.73      0.74      1208
   macro avg       0.57      0.51      0.52      1208
weighted avg       0.75      0.73      0.74      1208



'OR Uncomment below for 14 restaurant data '

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score 
micro_precision = precision_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6], average='micro')
micro_recall = recall_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6], average='micro')
micro_f1 = f1_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6], average='micro')
print(f'micro_precision: {micro_precision:.3f}')
print(f'micro_recall: {micro_recall:.3f}')
print(f'micro_f1: {micro_f1:.3f}')

micro_precision: 0.755
micro_recall: 0.726
micro_f1: 0.740


### GRU, 5 epochs

#### Code

In [None]:
pred_log = []
epochs = 5
num_labels = len(label_mapping)
#num_labels = 3
bert = RobertaModel.from_pretrained("roberta-base")
model = CustomBertTokenClassifierGRU(bert, num_labels)
model.to(device)
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.05},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=5e-5)
for ep in range(epochs):
    print(f"=== Training phase {ep+1} ====")
    train_model(1, model, optimizer, train_dataloader)
    print(f"=== Eval phase {ep+1} ====")
    evaluate(model,test_dataloader)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  1%|          | 2/200 [00:00<00:15, 12.47it/s]

=== Training phase 1 ====


100%|██████████| 200/200 [00:15<00:00, 12.83it/s]
  7%|▋         | 5/68 [00:00<00:01, 43.93it/s]

Loss = 0.3938207312300801
=== Eval phase 1 ====


100%|██████████| 68/68 [00:01<00:00, 43.37it/s]
  1%|          | 2/200 [00:00<00:15, 13.18it/s]

Loss = 0.21254135629929163
=== Training phase 2 ====


100%|██████████| 200/200 [00:15<00:00, 12.86it/s]
  7%|▋         | 5/68 [00:00<00:01, 44.25it/s]

Loss = 0.1934179037064314
=== Eval phase 2 ====


100%|██████████| 68/68 [00:01<00:00, 43.69it/s]
  1%|          | 2/200 [00:00<00:14, 13.23it/s]

Loss = 0.1823943656157045
=== Training phase 3 ====


100%|██████████| 200/200 [00:15<00:00, 12.84it/s]
  7%|▋         | 5/68 [00:00<00:01, 42.95it/s]

Loss = 0.13276214151643217
=== Eval phase 3 ====


100%|██████████| 68/68 [00:01<00:00, 43.44it/s]
  1%|          | 2/200 [00:00<00:15, 13.16it/s]

Loss = 0.19774929699761903
=== Training phase 4 ====


100%|██████████| 200/200 [00:15<00:00, 12.80it/s]
  7%|▋         | 5/68 [00:00<00:01, 44.21it/s]

Loss = 0.09687226405367255
=== Eval phase 4 ====


100%|██████████| 68/68 [00:01<00:00, 43.61it/s]
  1%|          | 2/200 [00:00<00:15, 13.20it/s]

Loss = 0.17575550712097218
=== Training phase 5 ====


100%|██████████| 200/200 [00:15<00:00, 12.83it/s]
  7%|▋         | 5/68 [00:00<00:01, 44.08it/s]

Loss = 0.08105004204902798
=== Eval phase 5 ====


100%|██████████| 68/68 [00:01<00:00, 43.59it/s]

Loss = 0.19440187497393174





In [None]:
pred = evaluate_prob(model,test_dataloader)

100%|██████████| 68/68 [00:01<00:00, 43.47it/s]

Loss = 0.19440187497393174





In [None]:
from sklearn.metrics import confusion_matrix
#all_pred = np.concatenate(pred, axis=0)
#print(pred.shape)
test_labels_np = labels_test.cpu().numpy().flatten()
pred = pred.flatten()

att_mask = attention_masks_test.cpu().numpy().flatten() == 1
test_labels_final = test_labels_np[att_mask]
pred_final = pred[att_mask]

#print(test_labels_np.shape)
pd.DataFrame(confusion_matrix(test_labels_final, pred_final))

Unnamed: 0,0,1,2,3,4,5,6
0,10798,130,53,0,122,47,0
1,35,403,15,0,15,0,0
2,22,19,72,0,0,2,0
3,6,13,11,0,0,0,0
4,88,18,1,0,410,10,0
5,12,0,2,0,10,24,0
6,11,0,1,0,6,2,0


#### Results

In [None]:
from sklearn.metrics import classification_report

'''Uncomment below for 16 restaurant data '''
print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6], target_names=["B-pos",'B-neg','B-neu','I-pos','I-neg','I-neu']))

'''OR Uncomment below for 14 restaurant data '''
# print(classification_report(test_labels_final,pred_final,labels=[1,2,3,4,5,6,7,8], target_names=["B-pos",'B-neg','B-neu','B-con','I-pos','I-neg','I-neu','I-con']))

              precision    recall  f1-score   support

       B-pos       0.69      0.86      0.77       468
       B-neg       0.46      0.63      0.53       115
       B-neu       0.00      0.00      0.00        30
       I-pos       0.73      0.78      0.75       527
       I-neg       0.28      0.50      0.36        48
       I-neu       0.00      0.00      0.00        20

   micro avg       0.66      0.75      0.70      1208
   macro avg       0.36      0.46      0.40      1208
weighted avg       0.64      0.75      0.69      1208



  _warn_prf(average, modifier, msg_start, len(result))


'OR Uncomment below for 14 restaurant data '

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score 
micro_precision = precision_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6], average='micro')
micro_recall = recall_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6], average='micro')
micro_f1 = f1_score(test_labels_final,pred_final, labels=[1,2,3,4,5,6], average='micro')
print(f'micro_precision: {micro_precision:.3f}')
print(f'micro_recall: {micro_recall:.3f}')
print(f'micro_f1: {micro_f1:.3f}')

micro_precision: 0.656
micro_recall: 0.752
micro_f1: 0.701
