In [1]:
!pip install transformers



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os
import os.path
import json
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
from torch.optim import lr_scheduler

import logging
logging.basicConfig(level=logging.ERROR)

import warnings
warnings.filterwarnings("ignore")

In [11]:
"""
initializing hyper parameters
"""
train_maxlen = 140
dev_maxlen = 140
batch_size = 16
epochs = 10
bert_model = 'bert-base-uncased'
learning_rate = 3e-5

In [4]:
class Tokenize_dataset:
  """
  This class tokenizes the dataset using bert tokenizer
  """

  def __init__(self, text, targets, tokenizer, max_len):
    self.text = text
    self.tokenizer = tokenizer
    self.max_len = max_len
    self.targets = targets

  def __len__(self):
    return len(self.targets)

  def __getitem__(self, item):
    text = str(self.text[item])
    targets = self.targets[item]
    """
    Using encode_plus instead of encode as it helps to provide additional information that we need
    """
    inputs = self.tokenizer.encode_plus(
        text,
        add_special_tokens = True,
        max_length = self.max_len,
        pad_to_max_length = True
    )

    ids = inputs["input_ids"]
    mask = inputs["attention_mask"]
    token_type_ids = inputs["token_type_ids"]

    return {
        "ids": torch.tensor(ids, dtype=torch.long),
        "mask": torch.tensor(mask, dtype=torch.long),
        "token_type_ids": torch.tensor(token_type_ids, dtype=torch.long),
        "targets": torch.tensor(targets, dtype=torch.long)
    }

In [5]:
def loss_function(outputs, targets):
	"""
	This function defines the loss function we use in the model which since is multiclass is crossentropy
	"""
	return nn.CrossEntropyLoss()(outputs, targets)

In [6]:
def train_function(data_loader, model, optimizer, device, location, aspect):
  """
  Function defines the training that we will happen over the entire dataset
  """
  model.train()

  running_loss = 0.0
  """
  looping over the entire training dataset
  """
  for i, data in enumerate(data_loader):
    mask = data["mask"].to(device, dtype=torch.long)
    ids = data["ids"].to(device, dtype=torch.long)
    token_type_ids = data["token_type_ids"].to(device, dtype=torch.long)
    target = data["targets"].to(device, dtype=torch.long)
    optimizer.zero_grad()

    output = model(ids=ids, mask=mask, token_type_ids=token_type_ids)
    
    loss = loss_function(output, target)
    loss.backward()
    optimizer.step()
    """
    calculating loss and running loss
    """
    running_loss += loss.item()
    if i % 10 == 0 and i!=0:
      temp = f'Batch index = {i}\tRunning Loss = {running_loss/10}'
      print(temp)
      path = "/content/drive/MyDrive/dataset/loss/" + str(location) + str(aspect)
      if os.path.exists(path)==True:
        continue
      else:
        os.mkdir(path)
      running_loss = 0.0

In [7]:
def eval_function(data_loader, model, device, location, aspect):
  """
  This function defines the loop over the dev set.
  """
  model.eval()
  correct_labels = 0
  tot = 0
  """
  no_grad as this is evaluation set and we dont want the model to update weights
  """
  with torch.no_grad():
    for i, data in enumerate(data_loader):
      mask = data["mask"].to(device, dtype=torch.long)
      ids = data["ids"].to(device, dtype=torch.long)
      token_type_ids = data["token_type_ids"].to(device, dtype=torch.long)
      target = data["targets"].to(device, dtype=torch.long)
      outputs = model(ids=ids, mask=mask, token_type_ids=token_type_ids)

      max_probs, predicted = torch.max(outputs, 1)
      tot = tot + target.size(0)
      correct_labels = correct_labels + torch.sum(predicted==target)

      print(f"Batch Index: {i}\tPredicted: {predicted}\tTargets: {target}")
    """
    basic metrics for accuracy calculation
    """
    accuracy = correct_labels / tot * 100
    print(accuracy)
  return accuracy

In [8]:
class CompleteModel(nn.Module):
  """
  The model architecture is defined here which is a fully connected layer + normalization on top of a BERT model
  """

  def __init__(self, bert):
    super(CompleteModel, self).__init__()
    self.bert = BertModel.from_pretrained(bert)
    self.drop = nn.Dropout(p=0.25)
    self.out = nn.Linear(self.bert.config.hidden_size, 3) # Number of output classes = 3, positive, negative and N(none)

  def forward(self, ids, mask, token_type_ids):
    _, pooled_output = self.bert(ids, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False)
    output = self.drop(pooled_output)
    return self.out(output)

In [9]:
with open('/content/drive/MyDrive/dataset/sentihood-train.json', 'r') as f:
    training_json = json.load(f)

In [10]:
"""
Getting all the aspects and locations
"""
aspect_set = set()
location_set = set()
for tot in training_json:
  for item in tot['opinions']:
    aspect_set.add(item['aspect'])
    location_set.add(item['target_entity'])

In [None]:
def run():
  """
  This function defines the model and optimizer, loads datasets for all aspects
  and training and evaluation happens
  """

  locations = list(location_set)
  aspects = list(aspect_set)
  """
  mapping sentiments to numbers for fully connected layer
  """
  sentiment_mapping = {
          'Positive': 0,
          'Negative': 1,
          'N': 2
      }
 
  # Training and validating models for every location aspect dataset

  for location in locations:
    for aspect in aspects:
      print(f"Training for:{location} for aspect {aspect}")
      training_set_path = '/content/drive/MyDrive/dataset/train/' + str(location) + '_' + str(aspect) + '.csv'
      validation_set_path = '/content/drive/MyDrive/dataset/dev/' + str(location) + '_' + str(aspect) + '.csv'
      df_train = pd.read_csv(training_set_path)
      df_valid = pd.read_csv(validation_set_path)
      
      df_train['sentiment'] = df_train['sentiment'].map(sentiment_mapping)
      df_valid['sentiment'] = df_valid['sentiment'].map(sentiment_mapping)
      df_train = df_train.reset_index(drop=True)
      df_valid = df_valid.reset_index(drop=True)

      tokenizer = BertTokenizer.from_pretrained(bert_model)

      train_dataset = Tokenize_dataset(
          text = df_train['text'].values,
          targets = df_train['sentiment'].values,
          tokenizer = tokenizer,
          max_len = train_maxlen
      )

      # Since the dataset for each csv is not balanced we used weightbalance sampler provided by pytorch

      class_counts = []
      for i in range(3):
        class_counts.append(df_train[df_train['sentiment']==i].shape[0])
      print(f"Class Counts: {class_counts}")
      
      num_samples = sum(class_counts)
      print(num_samples)
      labels = df_train['sentiment'].values

      class_weights = []
      for i in range(len(class_counts)):
        if class_counts[i] != 0:
          class_weights.append(num_samples/class_counts[i])
        else:
          class_weights.append(0)

      weights = [class_weights[labels[i]] for i in range(int(num_samples))]
      sampler = torch.utils.data.sampler.WeightedRandomSampler(torch.DoubleTensor(weights), int(num_samples))
      

      train_data_loader = torch.utils.data.DataLoader(
          train_dataset,
          batch_size = batch_size,
          shuffle = False,
          sampler = sampler
      )

      valid_dataset = Tokenize_dataset(
          text = df_valid['text'].values,
          targets = df_valid['sentiment'].values,
          tokenizer = tokenizer,
          max_len = dev_maxlen
      )

      valid_data_loader = torch.utils.data.DataLoader(
          valid_dataset,
          batch_size = batch_size,
          shuffle = False
      )

      device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
      print(f"Device: {device}")

      model = CompleteModel(bert_model).to(device)
      optimizer = AdamW(model.parameters(), lr=learning_rate)

      scheduler = lr_scheduler.StepLR(
          optimizer,
          step_size = 1,
          gamma = 0.8
      )

      for epoch in range(epochs):
        train_function(data_loader=train_data_loader, model=model, optimizer=optimizer, device=device, location=location, aspect=aspect)
        accuracy = eval_function(data_loader=valid_data_loader, model=model, device=device, location=location, aspect=aspect)

        print("\nEpoch = "+ str(epoch)+"\tAccuracy Score = " + str(accuracy))
        print("\nLearning Rate = " + str(scheduler.get_lr()[0])+"\n")

        scheduler.step()
      """
      saving one model per location-aspect for every 10 epochs
      """
      torch.save(model, '/content/drive/MyDrive/dataset/loss/' + str(location) + str(aspect) + '/'+ str(epoch) + '.bin')

if __name__ == "__main__":
  run()

Training for:LOCATION1 for aspect touristy
Class Counts: [44, 1, 2932]
2977
Device: cuda:0


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Batch index = 10	Loss = 1.071900224685669
Batch index = 20	Loss = 1.7355456322431564
Batch index = 30	Loss = 2.282261258363724
Batch index = 40	Loss = 2.6083164900541305
Batch index = 50	Loss = 0.2400014616549015
Batch index = 60	Loss = 0.3642148651182652
Batch index = 70	Loss = 0.43279282972216604
Batch index = 80	Loss = 0.4922499626874924
Batch index = 90	Loss = 0.5296515837311745
Batch index = 100	Loss = 0.567054895684123
Batch index = 110	Loss = 0.5868158840574325
Batch index = 120	Loss = 0.6233734810724855
Batch index = 130	Loss = 0.6647617862559855
Batch index = 140	Loss = 0.6788122127298266
Batch index = 150	Loss = 0.7038229787256569
Batch index = 160	Loss = 0.735215171193704
Batch index = 170	Loss = 0.7981156831141561
Batch index = 180	Loss = 0.8449646721128374
Batch Index: 0	Predicted: tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')	Targets: tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
Batch Index: 1	Predicted: tensor([2, 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Batch index = 10	Loss = 1.1622355103492736
Batch index = 20	Loss = 0.876230251789093
Batch index = 30	Loss = 1.6018608927726745
Batch index = 40	Loss = 2.2016585618257523
Batch index = 50	Loss = 2.580909404158592
Batch index = 60	Loss = 2.8308530911803245
Batch index = 70	Loss = 2.983714130520821
Batch index = 80	Loss = 3.089212755858898
Batch index = 90	Loss = 3.1723247447982432
Batch index = 100	Loss = 3.234402641095221
Batch index = 110	Loss = 3.2661169709637763
Batch index = 120	Loss = 3.287692947126925
Batch index = 130	Loss = 3.3060604500584305
Batch index = 140	Loss = 3.3247397352941332
Batch index = 150	Loss = 3.347232561465353
Batch index = 160	Loss = 3.36783410217613
Batch index = 170	Loss = 3.3838811564724893
Batch index = 180	Loss = 3.4171453876886515
Batch Index: 0	Predicted: tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')	Targets: tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
Batch Index: 1	Predicted: tensor([2, 2, 0, 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Batch index = 10	Loss = 1.211588406562805
Batch index = 20	Loss = 1.0924819231033325
Batch index = 30	Loss = 2.1898816585540772
Batch index = 40	Loss = 3.230982279777527
Batch index = 50	Loss = 4.197346144914627
Batch index = 60	Loss = 4.9627776682376865
Batch index = 70	Loss = 5.591233801841736
Batch index = 80	Loss = 6.042959150671959
Batch index = 90	Loss = 6.434128932654858
Batch index = 100	Loss = 6.801645981520414
Batch index = 110	Loss = 7.114677011221647
Batch index = 120	Loss = 7.349052233994007
Batch index = 130	Loss = 7.576311073824764
Batch index = 140	Loss = 7.785023608803749
Batch index = 150	Loss = 8.044464138895274
Batch index = 160	Loss = 8.280480767786504
Batch index = 170	Loss = 8.47865954786539
Batch index = 180	Loss = 8.60601914972067
Batch Index: 0	Predicted: tensor([1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')	Targets: tensor([1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
Batch Index: 1	Predicted: tensor([2, 2, 2, 0, 2, 2,

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Batch index = 10	Loss = 1.1477528929710388
Batch index = 20	Loss = 0.8873028099536896
Batch index = 30	Loss = 1.6647513687610627
Batch index = 40	Loss = 2.241547966003418
Batch index = 50	Loss = 2.645350828766823
Batch index = 60	Loss = 2.9078269466757773
Batch index = 70	Loss = 3.09424332678318
Batch index = 80	Loss = 3.3236456736922264
Batch index = 90	Loss = 3.461835168302059
Batch index = 100	Loss = 3.5373960491269827
Batch index = 110	Loss = 3.6469217389822006
Batch index = 120	Loss = 3.741796679608524
Batch index = 130	Loss = 3.853363782726228
Batch index = 140	Loss = 3.942976850271225
Batch index = 150	Loss = 4.060614745877683
Batch index = 160	Loss = 4.113996071368456
Batch index = 170	Loss = 4.184554341807962
Batch index = 180	Loss = 4.237934969924391
Batch Index: 0	Predicted: tensor([2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 0, 0, 2], device='cuda:0')	Targets: tensor([2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 0, 2], device='cuda:0')
Batch Index: 1	Predicted: tensor([0, 2, 2, 2, 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Batch index = 10	Loss = 1.1262228846549989
Batch index = 20	Loss = 0.8531758248806
Batch index = 30	Loss = 1.529628223180771
Batch index = 40	Loss = 1.9694223701953888
Batch index = 50	Loss = 2.35651476085186
Batch index = 60	Loss = 2.63184128254652
Batch index = 70	Loss = 2.8118251368403433
Batch index = 80	Loss = 3.0188569955527784
Batch index = 90	Loss = 3.101713017746806
Batch index = 100	Loss = 3.2290004786103963
Batch index = 110	Loss = 3.3183819951489566
Batch index = 120	Loss = 3.3881095254793765
Batch index = 130	Loss = 3.479808523505926
Batch index = 140	Loss = 3.5260109715163708
Batch index = 150	Loss = 3.6178775910288095
Batch index = 160	Loss = 3.649981802329421
Batch index = 170	Loss = 3.6959823983721436
Batch index = 180	Loss = 3.713035555370152
Batch Index: 0	Predicted: tensor([2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')	Targets: tensor([2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
Batch Index: 1	Predicted: tensor([2, 2, 2, 2, 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Batch index = 10	Loss = 1.2324208855628966
Batch index = 20	Loss = 1.0676846861839295
Batch index = 30	Loss = 2.0326128125190737
Batch index = 40	Loss = 2.8691036343574523
Batch index = 50	Loss = 3.566255158185959
Batch index = 60	Loss = 4.208842980861664
Batch index = 70	Loss = 4.720824593305588
Batch index = 80	Loss = 5.079012252390385
Batch index = 90	Loss = 5.337410315871239
Batch index = 100	Loss = 5.565425369143486
Batch index = 110	Loss = 5.7752450130879875
Batch index = 120	Loss = 6.070436572283507
Batch index = 130	Loss = 6.226179636269808
Batch index = 140	Loss = 6.394138890504837
Batch index = 150	Loss = 6.555366117879748
Batch index = 160	Loss = 6.71021144464612
Batch index = 170	Loss = 6.85419483333826
Batch index = 180	Loss = 6.961378657259047
Batch Index: 0	Predicted: tensor([2, 2, 0, 0, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')	Targets: tensor([2, 2, 0, 0, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
Batch Index: 1	Predicted: tensor([2, 2, 0, 0, 2, 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Batch index = 10	Loss = 1.213627576828003
Batch index = 20	Loss = 1.0568696975708007
Batch index = 30	Loss = 2.0352676451206206
Batch index = 40	Loss = 2.88390337228775
Batch index = 50	Loss = 3.6879307836294175
Batch index = 60	Loss = 4.353524419665336
Batch index = 70	Loss = 4.9597958624362946
Batch index = 80	Loss = 5.4001571729779245
Batch index = 90	Loss = 5.8706737637519835
Batch index = 100	Loss = 6.1652614489197735
Batch index = 110	Loss = 6.357194195687771
Batch index = 120	Loss = 6.454338600859046
Batch index = 130	Loss = 6.612257695943117
Batch index = 140	Loss = 6.812004979513586
Batch index = 150	Loss = 7.019197012297809
Batch index = 160	Loss = 7.200843927077949
Batch index = 170	Loss = 7.307109682075679
Batch index = 180	Loss = 7.358723742328584
Batch Index: 0	Predicted: tensor([2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1], device='cuda:0')	Targets: tensor([2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1], device='cuda:0')
Batch Index: 1	Predicted: tensor([2, 0, 2, 1, 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Batch index = 10	Loss = 1.2379223227500915
Batch index = 20	Loss = 1.0833566069602967
Batch index = 30	Loss = 2.145863777399063
Batch index = 40	Loss = 3.134298914670944
Batch index = 50	Loss = 4.0053207814693454
Batch index = 60	Loss = 4.755489772558212
Batch index = 70	Loss = 5.356444925069809
Batch index = 80	Loss = 5.980497011542321
Batch index = 90	Loss = 6.582192051410675
Batch index = 100	Loss = 7.197134256362915
Batch index = 110	Loss = 7.660218504071236
Batch index = 120	Loss = 8.081085273623467
Batch index = 130	Loss = 8.545769493281842
Batch index = 140	Loss = 8.943157930672168
Batch index = 150	Loss = 9.351823188364506
Batch index = 160	Loss = 9.675331734120846
Batch index = 170	Loss = 10.069480952620506
Batch index = 180	Loss = 10.401105301082135
Batch Index: 0	Predicted: tensor([0, 2, 2, 2, 0, 2, 1, 2, 2, 1, 2, 2, 1, 0, 2, 2], device='cuda:0')	Targets: tensor([2, 2, 2, 2, 0, 2, 1, 2, 2, 1, 2, 2, 1, 0, 2, 2], device='cuda:0')
Batch Index: 1	Predicted: tensor([0, 2, 2, 2, 0

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Batch index = 10	Loss = 1.0195064485073089
Batch index = 20	Loss = 0.7160825788974762
Batch index = 30	Loss = 1.158755287528038
Batch index = 40	Loss = 1.3820420406758784
Batch index = 50	Loss = 1.5354115344583987
Batch index = 60	Loss = 1.6585704371333123
Batch index = 70	Loss = 1.7175590524449944
Batch index = 80	Loss = 1.7954592613503337
Batch index = 90	Loss = 1.8434578224085272
Batch index = 100	Loss = 1.861612037103623
Batch index = 110	Loss = 1.8723706820048391
Batch index = 120	Loss = 1.9297374031040817
Batch index = 130	Loss = 1.9446732610929758
Batch index = 140	Loss = 1.9550852654501796


In [None]:
with open('/content/drive/MyDrive/dataset/sentihood-test.json', 'r') as fp:
  testing_set = json.load(fp)

In [None]:
locations = list(location_set)
aspects = list(aspect_set)
# models_set is a dictionary containing models corresponding to all the location-aspects

reverse_mapping = {
    0: 'Positive',
    1: 'Negative',
    2: 'None'
}
models_set = {}

for location in locations:
  for aspect in aspects:
    model = torch.load('/content/drive/MyDrive/dataset/loss/'+str(location)+str(aspect)+'/9.bin')
    models_set[str(location)+str(aspect)] = model

In [None]:
""" using the model to predict the test set
"""
MAX_LEN = 140

tokenizer = BertTokenizer.from_pretrained(bert_model)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for data in testing_set:
  id = data['id']
  text = data['text'].strip()
  data['model_pred'] = []

  for location in locations:
    if location in text:
      for aspect in aspects:
        inputs = tokenizer.encode_plus(
            text,
            add_special_tokens = True,
            max_length = MAX_LEN,
            pad_to_max_length = True
        )
        ids = torch.tensor(inputs["input_ids"], dtype=torch.long).unsqueeze(0)
        mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).unsqueeze(0)
        token_type_ids = torch.tensor(inputs["token_type_ids"], dtype=torch.long).unsqueeze(0)

        ids = ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        token_type_ids = token_type_ids.to(device, dtype=torch.long)

        model = models_set[f"{location}{aspect}"]
        outputs = model(ids=ids, mask=mask, token_type_ids=token_type_ids)
        prob_max, predicted = torch.max(outputs, 1)

        predicted = predicted.detach().cpu().numpy()

        # Add the predicted to the json only if it is not N(none)
        # Reverse mapping from numbers to sentiments
        if predicted[0] != 2:
          result = {
              "sentiment": reverse_mapping[predicted[0]],
              "aspect": aspect,
          }
          data['model_pred'].append(result)

with open('/content/drive/MyDrive/dataset/preds_second.jsonl', mode='w', encoding='utf-8') as fp:
  for each in testing_set:
    json_record = json.dumps(each, ensure_ascii=False)
    fp.write(json_record + '\n')