In [1]:
import numpy as np
import pandas as pd
import os
pd.set_option('display.max_colwidth', 200)
import torch
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader,TensorDataset, RandomSampler, SequentialSampler

import torch.nn as nn
import torch.nn.functional as F

from collections import defaultdict
import numpy as np 

Reference
https://curiousily.com/posts/sentiment-analysis-with-bert-and-hugging-face-using-pytorch-and-python/

In [2]:
!pip install transformers --quiet
import transformers
from transformers import AutoTokenizer, AutoModel

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import torch
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

In [5]:
class BertAsc(nn.Module):
    """Full model"""
    def __init__(self, model_name="activebus/BERT_Review", n_classes=3):
        super(BertAsc, self).__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(p=0.2)
        self.linear = nn.Linear(self.bert.config.hidden_size, n_classes)

    #############################################
    def forward(self, input_ids, attention_mask):
      output = self.bert(
        input_ids=input_ids,
        attention_mask=attention_mask
      )
      x = output["pooler_output"]
      x = self.dropout(x)
      x = self.linear(x)
  
      return F.softmax(x, dim=1)

In [6]:
# load dataset

def create_data_loader(df, tokenizer, batch_size, max_len=164):
  ds = ReviewDataset(
    reviews=df.X.to_numpy(),
    targets=df.Sentiment.to_numpy(),
    tokenizer=tokenizer,
    max_len=max_len
  )
  return DataLoader(
    ds,
    batch_size=batch_size,
    num_workers=4
  )

def split_dataframe(df, test_size=0.2):
  df_train, df_val = train_test_split(
  df,
  test_size=test_size,
  random_state=42)
  return df_train, df_val


# Dataloader

class ReviewDataset(Dataset):

  def __init__(self, reviews, targets, tokenizer, max_len=164):
    self.reviews = reviews
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len
  
  def __len__(self):
    return len(self.reviews)
  
  def __getitem__(self, item):
    review = str(self.reviews[item])
    target = self.targets[item]

    encoding = self.tokenizer.encode_plus(
        review, 
        add_special_tokens=True, 
        return_attention_mask=True,
        return_token_type_ids=False,
        return_tensors="pt",
        padding='max_length',
        max_length=self.max_len
        )

    return {
      'review_text': review,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'targets': torch.tensor(target, dtype=torch.long)
    }

In [28]:
# train functions

def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):
    model = model.train()

    losses = []
    correct_predictions = 0
    
    for d in data_loader:
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      targets = d["targets"].to(device)

      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask
      )

      _, preds = torch.max(outputs, dim=1)
      loss = loss_fn(outputs, targets)

      correct_predictions += torch.sum(preds == targets)
      losses.append(loss.item())

      loss.backward()
      nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
      optimizer.step()
      scheduler.step()
      optimizer.zero_grad()
    
    return (correct_predictions.double() / n_examples), np.mean(losses)


In [29]:
def eval_model(model, data_loader, loss_fn, device, n_examples):
    model = model.eval()

    losses = []
    correct_predictions = 0

    with torch.no_grad():
      for d in data_loader:
        input_ids = d["input_ids"].to(device)
        attention_mask = d["attention_mask"].to(device)
        targets = d["targets"].to(device)

        outputs = model(
          input_ids=input_ids,
          attention_mask=attention_mask
        )
        _, preds = torch.max(outputs, dim=1)

        loss = loss_fn(outputs, targets)

        correct_predictions += torch.sum(preds == targets)
        losses.append(loss.item())

    return (correct_predictions.double() / n_examples), np.mean(losses) 

In [9]:
def predict_model(model, data_loader, loss_fn, device, n_examples):
    model = model.eval()

    with torch.no_grad():
      for d in data_loader:
        input_ids = d["input_ids"].to(device)
        attention_mask = d["attention_mask"].to(device)
        targets = d["targets"].to(device)

        outputs = model(
          input_ids=input_ids,
          attention_mask=attention_mask
        )
        _, preds = torch.max(outputs, dim=1)

    return preds

In [32]:

class Classifier:
    """The Classifier"""

    def __init__(self):
        self.train = pd.read_csv('/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/traindata.csv',sep='\t', header=None)
        mapping = {'positive':1,'negative':2,'neutral':0}
        self.train[0]=self.train[0].map(mapping)
        self.train.columns = ['Sentiment','Aspect','Target_word','Placing','Review']
        self.train['X'] = self.train.apply(lambda x: (str(x['Aspect']) + '#' + x['Target_word'] + '/' + x['Review']),axis=1)
        self.train_data = self.train[['X','Sentiment']]

        self.PRETRAIN_MODEL = "activebus/BERT_Review"


        BS = 10
        self.EPOCHS = 1
        MAX_LEN = 164
        self.device = get_default_device()

        # preprocess
        self.tokenizer = AutoTokenizer.from_pretrained(self.PRETRAIN_MODEL)
        self.df_train, self.df_val = split_dataframe(self.train_data, test_size = 0.1)
        self.train_dataloader = create_data_loader(self.df_train, tokenizer, BS, 
                                        max_len= MAX_LEN)
        
        self.val_data_loader = create_data_loader(self.df_val, tokenizer, BS,
                                      max_len = MAX_LEN)
        
        # set upd training
        self.model = BertAsc(model_name="activebus/BERT_Review", n_classes=3)
        # self.model = self.model.train()

        self.model = self.model.to(device)
        self.optimizer = AdamW(self.model.parameters(), lr=2e-5)
        total_steps = len(self.train_dataloader) * EPOCHS
        self.scheduler = transformers.get_linear_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=0,
            num_training_steps=total_steps
        )
        self.loss_fn = nn.CrossEntropyLoss().to(self.device)

        self.history = defaultdict(list)
        self.best_accuracy = 0

    #############################################
    def train(self, trainfile, devfile=None):

        for epoch in range(self.EPOCHS):
          print(f'Epoch {epoch + 1}/{self.EPOCHS}')
          print('-' * 10)
          train_acc, train_loss = train_epoch(
              self.model,
              self.train_dataloader,    
              self.loss_fn, 
              self.optimizer, 
              self.device, 
              self.scheduler, 
              len(self.df_train)
         )

          val_acc, val_loss = eval_model(
              self.model,
              self.val_data_loader,
              loss_fn, 
              device, 
              len(self.df_val)
          )

          print(f'Val   loss {val_loss} accuracy {val_acc}')
          print()

          self.history['train_acc'].append(train_acc)
          self.history['train_loss'].append(train_loss)
          self.history['val_acc'].append(val_acc)
          self.history['val_loss'].append(val_loss)

          if val_acc > self.best_accuracy:
             torch.save(self.model.state_dict(), 'best_model_state.bin')
             best_accuracy = val_acc

    def predict(self, datafile):
        device = get_default_device()
        self.model = BertAsc(model_name="activebus/BERT_Review", n_classes=3)
        self.model.load_state_dict(torch.load('best_model_state.bin'))
        self.model = self.model.to(device)
        self.model = self.model.eval()
        """Predicts class labels for the input instances in file 'datafile'
        Returns the list of predicted labels
        """
        # We load the data 
        test = pd.read_csv('/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/devdata.csv',sep='\t', header=None)
        mapping = {'positive':1,'negative':2,'neutral':0}
        test[0]=test[0].map(mapping)
        test.columns = ['Sentiment','Aspect','Target_word','Placing','Review']
        test['X'] = test.apply(lambda x: (str(x['Aspect']) + '#' + x['Target_word'] + '/' + x['Review']),axis=1)
        test_data = test[['X','Sentiment']]

        # PARAMS
        #self.PRETRAIN_MODEL = "activebus/BERT_Review"
        BS = 10
        EPOCHS = 7
        MAX_LEN = 164
        loss_fn = nn.CrossEntropyLoss().to(device)
        #self.optimizer = AdamW(self.model.parameters(), lr=2e-5)


        # preprocess
        tokenizer = AutoTokenizer.from_pretrained(self.PRETRAIN_MODEL)
        test_dataloader = create_data_loader(test_data, tokenizer, 10, max_len = 164)
        #self.pred = predict_model(self.model, test_dataloader, loss_fn, device, len(test_data))
        
        predictions = []

        with torch.no_grad():
          for d in test_dataloader:
            texts = d["review_text"]
            input_ids = d["input_ids"].to(device)
            attention_mask = d["attention_mask"].to(device)
            targets = d["targets"].to(device)

            outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask)

            preds = outputs.argmax(1)
            
            preds = preds.cpu().numpy()
   
            predictions += [
                            {2:'positive',1:'neutral', 0:'negative'}[x] for x in preds
            ]
        
        return  predictions

In [33]:
import time, sys
import numpy as np


def set_reproducible():
    # The below is necessary to have reproducible behavior.
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '0'
    # The below is necessary for starting Numpy generated random numbers
    # in a well-defined initial state.
    np.random.seed(17)
    # The below is necessary for starting core Python generated random numbers
    # in a well-defined state.
    rn.seed(12345)



def load_label_output(filename):
    with open(filename, 'r', encoding='UTF-8') as f:
        return [line.strip().split("\t")[0] for line in f if line.strip()]



def eval_list(glabels, slabels):
    if (len(glabels) != len(slabels)):
        print("\nWARNING: label count in system output (%d) is different from gold label count (%d)\n" % (
        len(slabels), len(glabels)))
    n = min(len(slabels), len(glabels))
    incorrect_count = 0
    for i in range(n):
        if slabels[i] != glabels[i]: incorrect_count += 1
    acc = (n - incorrect_count) / n
    return acc*100



def train_and_eval(classifier, trainfile, devfile, testfile, run_id):
    print(f"\nRUN: {run_id}")
    print("  %s.1. Training the classifier..." % str(run_id))
    classifier.train(trainfile, devfile)
    print()
    print("  %s.2. Eval on the dev set..." % str(run_id), end="")
    slabels = classifier.predict(devfile)
    glabels = load_label_output(devfile)
    devacc = eval_list(glabels, slabels)
    print(" Acc.: %.2f" % devacc)
    testacc = -1
    if testfile is not None:
        # Evaluation on the test data
        print("  %s.3. Eval on the test set..." % str(run_id), end="")
        slabels = classifier.predict(testfile)
        glabels = load_label_output(testfile)
        testacc = eval_list(glabels, slabels)
        print(" Acc.: %.2f" % testacc)
    print()
    return (devacc, testacc)


#if __name__ == "__main__":
set_reproducible()
n_runs = 1
trainfile =  "/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/traindata.csv"
devfile =  "/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/devdata.csv"
testfile = None

# Runs
start_time = time.perf_counter()
devaccs = []
testaccs = []
for i in range(1, n_runs+1):
    classifier =  Classifier()
    devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i)
    devaccs.append(np.round(devacc,2))
    testaccs.append(np.round(testacc,2))
print('\nCompleted %d runs.' % n_runs)
total_exec_time = (time.perf_counter() - start_time)
print("Dev accs:", devaccs)
print("Test accs:", testaccs)
print()
print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs)))
print("Mean Test Acc.: %.2f (%.2f)" % (np.mean(testaccs), np.std(testaccs)))
print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))

Some weights of the model checkpoint at activebus/BERT_Review were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).



RUN: 1
  1.1. Training the classifier...


TypeError: ignored

In [14]:
model = BertAsc(model_name="activebus/BERT_Review", n_classes=3)
model.load_state_dict(torch.load('best_model_state.bin'))

Some weights of the model checkpoint at activebus/BERT_Review were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [16]:
device = get_default_device()
model = model.to(device)

In [17]:
test = pd.read_csv('/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/devdata.csv',sep='\t', header=None)
mapping = {'positive':1,'negative':2,'neutral':0}
test[0] = test[0].map(mapping)
test.columns = ['Sentiment','Aspect','Target_word','Placing','Review']
test['X'] = test.apply(lambda x: (str(x['Aspect']) + '#' + x['Target_word'] + '/' + x['Review']),axis=1)
test_data = test[['X','Sentiment']]

In [18]:
# PARAMS
#self.PRETRAIN_MODEL = "activebus/BERT_Review"
BS = 10
EPOCHS = 7
MAX_LEN = 164
loss_fn = nn.CrossEntropyLoss().to(device)
#self.optimizer = AdamW(self.model.parameters(), lr=2e-5)


# preprocess
PRETRAIN_MODEL = "activebus/BERT_Review"
tokenizer = AutoTokenizer.from_pretrained(PRETRAIN_MODEL)
test_dataloader = create_data_loader(test_data, tokenizer, 10, max_len = 164)
#self.pred = predict_model(self.model, test_dataloader, loss_fn, device, len(test_data))

predictions = []

In [19]:
with torch.no_grad():
  for d in test_dataloader:
    texts = d["review_text"]
    input_ids = d["input_ids"].to(device)
    attention_mask = d["attention_mask"].to(device)
    targets = d["targets"].to(device)

    outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask)

    preds = outputs.argmax(1)
    
    preds = preds.cpu().numpy()

    predictions += [
                    {2:'positive',1:'neutral', 0:'negative'}[x] for x in preds
    ]


In [20]:
test = pd.read_csv('/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/devdata.csv', sep='\t', header=None)
temp = test.iloc[:,0]

In [21]:
from sklearn.metrics import classification_report
target_names = ['positive', 'neutral', 'negative']
print(classification_report(temp, predictions, target_names=target_names))

              precision    recall  f1-score   support

    positive       0.00      0.00      0.00        98
     neutral       0.02      0.43      0.04        14
    negative       0.15      0.06      0.09       264

    accuracy                           0.06       376
   macro avg       0.06      0.16      0.04       376
weighted avg       0.10      0.06      0.06       376



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
