<a href="https://colab.research.google.com/github/fwkhan/CE888/blob/main/transformers/Roberta_retrainedALLinOneBert_70_score.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Installing Necessities

In [None]:
# run this cell, then restart the runtime before continuing
!pip install nlp
!pip install transformers



In [None]:
import tensorflow as tf
# Get the GPU device name.
device_name = tf.test.gpu_device_name()
# The device name should look like the following:
if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    raise SystemError('GPU device not found')

Found GPU at: /device:GPU:0


In [None]:
import numpy as np
import pandas as pd
import random
import requests

import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.classify import ClassifierI
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import seaborn as sns

from tqdm.notebook import tqdm

from transformers import BertForSequenceClassification, BertTokenizer
from transformers import AdamW,get_linear_schedule_with_warmup

from sklearn.metrics import recall_score
from sklearn.metrics import f1_score 

import torch
from torch.utils.data import TensorDataset
from torch.utils.data import RandomSampler,SequentialSampler,DataLoader

import warnings
warnings.filterwarnings('ignore')

nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
############################ SENTIMENT ANALYSIS #################################################
SENTIMENT_TRAIN_TEXT = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/train_text.txt'
SENTIMENT_VALIDATION_TEXT = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/val_text.txt'
SENTIMENT_TEST_TEXT = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/test_text.txt'

SENTIMENT_TRAIN_LABEL = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/train_labels.txt'
SENTIMENT_VALIDATION_LABEL = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/val_labels.txt'
SENTIMENT_TEST_LABEL = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/test_labels.txt'

############################ HATE #################################################
HATE_TRAIN_TEXT = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/hate/train_text.txt'
HATE_VALIDATION_TEXT = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/hate/val_text.txt'
HATE_TEST_TEXT = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/hate/test_text.txt'

HATE_TRAIN_LABEL = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/hate/train_labels.txt'
HATE_VALIDATION_LABEL = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/hate/val_labels.txt'
HATE_TEST_LABEL = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/hate/test_labels.txt'

############################ OFFENSIVE LANGUAGE#################################################
OFFENSE_TRAIN_TEXT = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/offensive/train_text.txt'
OFFENSE_VALIDATION_TEXT = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/offensive/val_text.txt'
OFFENSE_TEST_TEXT = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/offensive/test_text.txt'

OFFENSE_TRAIN_LABEL = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/offensive/train_labels.txt'
OFFENSE_VALIDATION_LABEL = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/offensive/val_labels.txt'
OFFENSE_TEST_LABEL = 'https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/offensive/test_labels.txt'

In [None]:
def preprocess(df): 
    lemmatizer  = WordNetLemmatizer()
    ignore_words = ['user', 'st'] 
    df['processed_tweets'] = df['tweet'].replace('[^a-zA-Z]',' ', regex=True,
                                                  inplace=False)
    df['processed_tweets'] = df['processed_tweets'].apply(lambda x: [w.lower() for w in x.split()])
    df['processed_tweets'] = df['processed_tweets'].apply(lambda tweet: ([word for word in tweet if not word in stopwords.words("english")]))
    df['processed_tweets'] = df['processed_tweets'].apply(lambda tweet: ([lemmatizer.lemmatize(word) for word in tweet]))

    df['processed_tweets'] = df['processed_tweets'].apply(lambda tweet: ' '.join([word for word in tweet if len(word)>2]))

    df['processed_tweets'] = df['processed_tweets'].apply(lambda x: ' '.join([word for word in x.split() if not word in ignore_words]))
    
    df["sentence_length"] = df.tweet.apply(lambda x: len(str(x).split()))
    return df


# Wrapper to convert text data to pandas Dataframe
def txt_to_df(data, label, classification_task):
    tweet = []
    sentiments = []
    for sentence in data.split('\n'):
        tweet.append(sentence)
    for sentiment in label.split('\n'):
        try:
            sentiments.append(int(sentiment))
        except ValueError:
            pass
    df= pd.DataFrame(tweet[:-1], columns=['tweet'])
    df['label'] = sentiments
    if classification_task == 'Sentiment_analysis':
      df['sentiment'] = df.label.apply(lambda x: 'Negative'if x==0 else ('Neutral' if x==1 else 'Positive'))
    if classification_task == 'hate_analysis':
      df['sentiment'] = df.label.apply(lambda x: 'Not-hate'if x==0 else 'hate')
    if classification_task == 'offensive_analysis':
      df['sentiment'] = df.label.apply(lambda x: 'Not-offensive 'if x==0 else 'offensive')
    return df


def prepare_dataset(TRAIN_TEXT, TRAIN_LABEL, VAL_TEXT, VAL_LABEL, TEST_TEXT, TEST_LABEL, classification_task):
  # Reading Train, Vvalidation & Test data from tweeteval Github Repo.
  train_tweets_txt = requests.get(TRAIN_TEXT).text
  train_labels_txt = requests.get(TRAIN_LABEL).text

  val_tweets_txt = requests.get(VAL_TEXT).text
  val_labels_txt = requests.get(VAL_LABEL).text

  test_tweets_txt = requests.get(TEST_TEXT).text
  test_labels_txt = requests.get(TEST_LABEL).text

  # Converting text data to pandas Dataframe
  train_df = txt_to_df(train_tweets_txt, train_labels_txt, classification_task)
  val_df = txt_to_df(val_tweets_txt, val_labels_txt, classification_task)
  test_df = txt_to_df(test_tweets_txt, test_labels_txt, classification_task)

  train_df = preprocess(train_df)
  val_df = preprocess(val_df)
  test_df = preprocess(test_df)  

  return train_df, val_df, test_df



In [None]:
# model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
# tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

In [None]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification

In [None]:
def initialize_bert(num_of_class):
  model = RobertaForSequenceClassification.from_pretrained('roberta-base',
                                      num_labels = num_of_class,
                                      output_attentions = False,
                                      output_hidden_states =  False)

  tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
  return model, tokenizer

In [None]:
def encode_data(tokenizer, df, max_sequence_length=256):
  encoder = tokenizer.batch_encode_plus(df.tweet.values,
                                            add_special_tokens = True,
                                            pad_to_max_length = True,
                                            #  max_length = 256,
                                            max_length = max_sequence_length,
                                            truncation=True,
                                            return_tensors = 'pt')


  return encoder

def extract_inputId_attentionMask(df, encoder):
  input_ids = encoder['input_ids']
  attention_masks = encoder["attention_mask"]
  labels = torch.tensor(df.label.values)
  return input_ids, attention_masks, labels

In [None]:
def get_tesnsor_dataset(input_ids, attention_masks, labels):
  return TensorDataset(input_ids, attention_masks, labels)


In [None]:
def dataloader_object(data, batch_size=16):
  dataloader = DataLoader(
    data,
    sampler= RandomSampler(data),
    batch_size = batch_size)
  return dataloader

In [None]:
def freeze_bert_layers(model):
  for param in model.bert.parameters():
    param.requires_grad = False

In [None]:
# Get all of the model's parameters as a list of tuples.

def print_model_params(model):
  params = list(model.named_parameters())
  print('The BERT model has {:} different named parameters.\n'.format(len(params)))
  print('==== Embedding Layer ====\n')
  for p in params[0:5]:
      print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
  print('\n==== First Transformer ====\n')
  for p in params[5:21]:
      print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
  print('\n==== Output Layer ====\n')
  for p in params[-4:]:
      print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

In [None]:
# For the purposes of fine-tuning, the authors recommend choosing from the following values:
# Batch size: 16, 32 (We chose 32 when creating our DataLoaders).
# Learning rate (Adam): 5e-5, 3e-5, 2e-5 (We’ll use 2e-5).
# Number of epochs: 2, 3, 4 (We’ll use 4).

def initialize_optimizer(model, dataloader, lr=1e-5, epochs=2):
  optimizer = AdamW(model.parameters(),lr,eps = 1e-8)

  scheduler = get_linear_schedule_with_warmup(
              optimizer,
      num_warmup_steps = 0,
    num_training_steps = len(dataloader)*epochs 
  )
  return optimizer, scheduler

In [None]:
def f1_score_func(predictions,y_labelled):
    preds_flatten = np.argmax(predictions,axis=1).flatten()
    labels_flatten = y_labelled.flatten()
    return f1_score(labels_flatten,preds_flatten,average = 'macro')

In [None]:
def recall_score_func(predictions,y_labelled):
    preds_flatten = np.argmax(predictions,axis=1).flatten()
    labels_flatten = y_labelled.flatten()
    return recall_score(labels_flatten,preds_flatten,average = 'macro')

In [None]:
def load_model_to_device(model):
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  model.to(device)
  print(f"Loading:{device}")
  return device

In [None]:
def evaluate(model, device, dataloader_val):
    model.eval()
    
    loss_val_total = 0
    predictions,true_vals = [],[]
    
    for batch in tqdm(dataloader_val):
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':  batch[0],
                  'attention_mask':batch[1],
                  'labels': batch[2]
                 }
        with torch.no_grad():
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total +=loss.item()
        
        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
        
        
    loss_val_avg = loss_val_total/len(dataloader_val)  
    
    predictions = np.concatenate(predictions,axis=0)
    true_vals = np.concatenate(true_vals,axis=0) 
    return loss_val_avg,predictions,true_vals

1.   score 71.28 --- 70.02
2.   batch size train-16, val 32, test -32
1.   lr = 1e-5
2.   1 Epoch
1.   max_length = 256


1.   score 71.19(epoch-1)
2.   batch size train-16, val 16, test -16
1.   lr = 1e-5
2.   1 Epoch
1.   max_length = 125



1.   score (epoch-1)
2.   batch size train-16, val 16, test -16
1.   lr = 2e-5
2.   1 Epoch
1.   max_length = 125


In [None]:
def init_training(model, optimizer, scheduler, epochs, device, dataloader_train, dataloader_val): 
  for epoch in tqdm(range(1,epochs+1)):
      model.train()
      
      loss_train_total=0
      
      progress_bar = tqdm(dataloader_train,desc = "Epoch: {:1d}".format(epoch),leave = False,disable = False)
      
      
      for batch in progress_bar:
          model.zero_grad()
          
          batch = tuple(b.to(device) for b in batch)
          
          inputs = {
              "input_ids":batch[0],
              "attention_mask":batch[1],
              "labels":batch[2]
              
          }
          outputs = model(**inputs)
          
          loss = outputs[0]
          loss_train_total +=loss.item()
          loss.backward()
          
          torch.nn.utils.clip_grad_norm(model.parameters(),1.0)
          
          optimizer.step()
          scheduler.step()
          
          
          progress_bar.set_postfix({'training_loss':'{:.3f}'.format(loss.item()/len(batch))})
      
      tqdm.write('\nEpoch {epoch}')
      
      loss_train_avg = loss_train_total/len(dataloader_train)
      tqdm.write(f'Training Loss: {loss_train_avg}')
      val_loss,predictions,true_vals = evaluate(model,device, dataloader_val)

      if classification_task == 'SENTIMENT_ANALYSIS':
        test_score = recall_score_func(predictions,true_vals)
      else:
        test_score = f1_score_func(predictions,true_vals)
      

      tqdm.write(f'Val Loss:{val_loss}\n Test Score:{test_score}')
      

In [None]:
def evaluate_wrapper(model, device, dataloader_test):
  val_loss,predictions,true_vals = evaluate(model,device, dataloader_test)

  if classification_task == 'SENTIMENT_ANALYSIS':
    test_score = recall_score_func(predictions,true_vals)
  else:
    test_score = f1_score_func(predictions,true_vals)  

  tqdm.write(f'Val Loss:{val_loss}\n Test Score:{test_score}')

In [None]:
def fineTune_bert(batch_size, lr, epochs, max_length):

  num_of_class= len(train_df.sentiment.unique())

  seed_val = 17
  random.seed(seed_val)
  np.random.seed(seed_val)
  torch.manual_seed(seed_val)
  torch.cuda.manual_seed_all(seed_val)


  model, tokenizer = initialize_bert(num_of_class)

  encoder_train = encode_data(tokenizer, train_df, max_length)
  encoder_eval = encode_data(tokenizer, val_df, max_length)
  encoder_test = encode_data(tokenizer, test_df, max_length)

  input_ids_train, attention_masks_train, labels_train = extract_inputId_attentionMask(train_df, encoder_train)
  input_ids_eval, attention_masks_eval, labels_eval = extract_inputId_attentionMask(val_df, encoder_eval)
  input_ids_test, attention_masks_test, labels_test = extract_inputId_attentionMask(test_df, encoder_test)

  data_train = get_tesnsor_dataset(input_ids_train,attention_masks_train,labels_train)
  data_eval = get_tesnsor_dataset(input_ids_eval,attention_masks_eval,labels_eval)
  data_test = get_tesnsor_dataset(input_ids_test,attention_masks_test,labels_test)

  dataloader_train = dataloader_object(data_train, batch_size) 
  dataloader_eval = dataloader_object(data_eval, batch_size) 
  dataloader_test = dataloader_object(data_test, batch_size)

  # freeze_bert_layers(model)
  print_model_params(model)
  optimizer, scheduler = initialize_optimizer(model,dataloader_train, lr, epochs)
  device = load_model_to_device(model)

  init_training(model,optimizer,  scheduler, epochs, device, dataloader_train, dataloader_eval)
  evaluate_wrapper(model, device, dataloader_test)

For the purposes of fine-tuning, the authors recommend choosing from the following values:
Batch size: 16, 32 (We chose 16 when creating our DataLoaders)
  

For the purposes of fine-tuning, the authors recommend choosing from the following values:
Batch size: 16, 32 (We chose 32 when creating our DataLoaders).
Learning rate (Adam): 5e-5, 3e-5, 2e-5 (We’ll use 1e-5).
Number of epochs: 2, 3, 4 (We’ll use 4).

In [None]:
import torch

classification_task_dict = {'SENTIMENT_ANALYSIS' : 'Sentiment_analysis',
                      'HATE_ANALYSIS' : 'hate_analysis',
                      'OFFENSIVE_LANGUAGE' : 'offensive_analysis'
                      }
class_dict = {'SENTIMENT_ANALYSIS' :['Negative', 'Neutral', 'Positive'],
              'HATE_ANALYSIS' : ['Not-hate', 'hate'],
              'OFFENSIVE_LANGUAGE' : ['Not-offensive', 'offensive']}

config = {'batch_size' : 15,
          'lr' : 1e-5,
          'epochs' : 1,
          'max_length' : 256
        }

#         score 71.28 --- 70.02
# batch size train-16, val 32, test -32
# lr = 1e-5
# 1 Epoch
# max_length = 256

for classification_task, task in classification_task_dict.items():
  torch.cuda.empty_cache()
  print('=========================================')
  print('CLASSIFICATION TASK: {}'.format(classification_task))
  print('=========================================')
  if classification_task == 'SENTIMENT_ANALYSIS':
    continue
    train_df, val_df, test_df = prepare_dataset(SENTIMENT_TRAIN_TEXT, SENTIMENT_TRAIN_LABEL,
                        SENTIMENT_VALIDATION_TEXT, SENTIMENT_VALIDATION_LABEL,
                        SENTIMENT_TEST_TEXT, SENTIMENT_TEST_LABEL, classification_task_dict['SENTIMENT_ANALYSIS']
                        )

  if classification_task == 'HATE_ANALYSIS':
    continue
    train_df, val_df, test_df = prepare_dataset(HATE_TRAIN_TEXT, HATE_TRAIN_LABEL,
                        HATE_VALIDATION_TEXT, HATE_VALIDATION_LABEL,
                        HATE_TEST_TEXT, HATE_TEST_LABEL, classification_task_dict['HATE_ANALYSIS']
                        )
    
  if classification_task == 'OFFENSIVE_LANGUAGE':
    # continue
    train_df, val_df, test_df = prepare_dataset(OFFENSE_TRAIN_TEXT, OFFENSE_TRAIN_LABEL,
                        OFFENSE_VALIDATION_TEXT, OFFENSE_VALIDATION_LABEL,
                        OFFENSE_TEST_TEXT, OFFENSE_TEST_LABEL, classification_task_dict['OFFENSIVE_LANGUAGE']
                        )
  fineTune_bert(config['batch_size'], config['lr'], config['epochs'], config['max_length'])


CLASSIFICATION TASK: SENTIMENT_ANALYSIS
CLASSIFICATION TASK: HATE_ANALYSIS
CLASSIFICATION TASK: OFFENSIVE_LANGUAGE


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

The BERT model has 201 different named parameters.

==== Embedding Layer ====

roberta.embeddings.word_embeddings.weight               (50265, 768)
roberta.embeddings.position_embeddings.weight             (514, 768)
roberta.embeddings.token_type_embeddings.weight             (1, 768)
roberta.embeddings.LayerNorm.weight                           (768,)
roberta.embeddings.LayerNorm.bias                             (768,)

==== First Transformer ====

roberta.encoder.layer.0.attention.self.query.weight       (768, 768)
roberta.encoder.layer.0.attention.self.query.bias             (768,)
roberta.encoder.layer.0.attention.self.key.weight         (768, 768)
roberta.encoder.layer.0.attention.self.key.bias               (768,)
roberta.encoder.layer.0.attention.self.value.weight       (768, 768)
roberta.encoder.layer.0.attention.self.value.bias             (768,)
roberta.encoder.layer.0.attention.output.dense.weight     (768, 768)
roberta.encoder.layer.0.attention.output.dense.bias           (

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='Epoch: 1', max=795.0, style=ProgressStyle(description_wid…


Epoch {epoch}
Training Loss: 0.48204981476630804


HBox(children=(FloatProgress(value=0.0, max=89.0), HTML(value='')))


Val Loss:0.43300070454565326
 Test Score:0.7787692372697433



HBox(children=(FloatProgress(value=0.0, max=58.0), HTML(value='')))


Val Loss:0.3714181547021044
 Test Score:0.8063609076677323
