In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import torch
import wandb
import spacy
import random
import logging
import pytextrank
from tqdm import tqdm
from torch.optim import Adam
from torch.nn import DataParallel
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
from transformers import TrainingArguments, Trainer
from sklearn.model_selection import train_test_split
from datasets import load_dataset,concatenate_datasets
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification

In [2]:
# Summarize the input to how many tokens, default to 512 for BERT use.
# Since LongFormer could accept 4096 tokens, we could skip TextRank if LongFormer
seed_val = 42
ENABLE_TEXT_RANK = False
TEXT_RANK_LENGTH = 512
LEARNING_RATE = 2e-5
NUM_EPOCHS = 4

MODEL = ["T5base","roberta","Distilbert","FlanT5small"] # Choose from "T5base", "roberta", "Distilbert", "FlanT5small"

NEW_BATCH_SIZE = 8 if MODEL in ["roberta", "Distilbert", "FlanT5small"] else 4 

#wandb.login(key='bf24a38a046a0448057459477a5d48fbc6eb2f6a')
#wandb.init()

In [3]:
# If there's a GPU available...
# If GPU not available, training will cost SEVERAL DAYS, not recommended running on CPU
if torch.cuda.is_available():    
    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
# If not...
else:
    print('No GPU available, using the CPU insteadp(not recommended).')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: NVIDIA GeForce GTX 1070


In [4]:
dataset = load_dataset("argilla/banking_sentiment_setfit")
test = concatenate_datasets([dataset['train'], dataset['test']])

test[:5]

Found cached dataset parquet (/home/fangkangmi/.cache/huggingface/datasets/argilla___parquet/argilla--banking_sentiment_setfit-4a60f83f113675bf/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/2 [00:00<?, ?it/s]

{'text': ['are you sending me my card?',
  "Two days ago I did a transfer to another account within the country.  It doesn't appear the transfer went through.  I have verified the account number several times.  Could you please check on this for me?",
  "Why didn't I receive the right amount of cash?",
  "Is there a reason why my virtual card won't work?",
  'Why is my balance the same after a transfer?'],
 'label': [1, 1, 0, 0, 1]}

In [5]:
# Set the seed value all over the place to make this reproducible.
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [6]:
model_options = {
    "T5base": "michelecafagna26/t5-base-finetuned-sst2-sentiment",
    "roberta":"cardiffnlp/twitter-roberta-base-sentiment-latest",
    "Distilbert": "distilbert-base-uncased-finetuned-sst-2-english",
    "FlanT5small": "cardiffnlp/flan-t5-small-tweet-sentiment"
}

In [7]:
def set_model_and_tokenizer(model):
    return AutoModelForSeq2SeqLM.from_pretrained(model_options.get(model)) if model in ["T5base", "FlanT5small"] \
           else AutoModelForSequenceClassification.from_pretrained(model_options.get(model))\
           ,AutoTokenizer.from_pretrained(model_options.get(model))

# Tokenize

By now the test dataset, model and tokenizer has been loaded. The next step is to tokenize the dataset.

In [8]:
#T5 tokenization
def T5_tokenization(dataset):
    dataset['text'] = list(map(lambda s: 'sentiment ' + s, dataset['text']))
    return tokenizer(dataset['text'], max_length=128, padding=True, return_tensors="pt")

def T5_get_sentiment(tensor):
    preds = model.generate(tensor)
    decoded_preds = tokenizer.batch_decode(sequences=preds, skip_special_tokens=True)
    return decoded_preds

#RoBERTa tokenization
def RoBERTa_DistilBERT_tokenization(dataset):
    return tokenizer(dataset, max_length=128, padding=True, return_tensors="pt")
class RoBERTa_DistilBERT_Dataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


In [9]:
if('T5base' in MODEL):
    model, tokenizer = set_model_and_tokenizer('T5base')
    T5_test = test.map(T5_tokenization, batched = True)['input_ids']
    T5_predict = T5_get_sentiment(torch.tensor(T5_test))
    T5_predict_digit = [1 if i == 'p' else 0 for i in T5_predict]
    # Print the classification report
    # 1 means positive 0 means negative
    report = classification_report(T5_predict_digit, test['label'])
    print(report)


Loading cached processed dataset at /home/fangkangmi/.cache/huggingface/datasets/argilla___parquet/argilla--banking_sentiment_setfit-4a60f83f113675bf/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-5828b1a747a225b2.arrow


              precision    recall  f1-score   support

           0       0.88      0.36      0.51       124
           1       0.15      0.70      0.25        20

    accuracy                           0.41       144
   macro avg       0.52      0.53      0.38       144
weighted avg       0.78      0.41      0.48       144



In [10]:
if('roberta' in MODEL):
    model, tokenizer = set_model_and_tokenizer('roberta')
    tokenized_RoBERTa_test = RoBERTa_DistilBERT_tokenization(test['text'])
    RoBERTa_dataset_test = RoBERTa_DistilBERT_Dataset(tokenized_RoBERTa_test,test['label'])
    RoBERTa_dataloader = DataLoader(RoBERTa_dataset_test, batch_size=8, shuffle=True)

    # Iterate over the test dataset

    torch.cuda.empty_cache()
    model.eval()
    model.to('cuda')
    predictions = []
    true_labels = []

    for batch in RoBERTa_dataloader:
        batch = {k: v.to('cuda') for k, v in batch.items()}

        # Disable gradient calculation
        with torch.no_grad():
            # Perform inference
            outputs = model(**batch)

        predicted_values = outputs.logits

        predictions.extend(predicted_values)
        true_labels.extend(batch['labels'].tolist())
    # Convert logits to predictions
    # 1 and 2 means positive and 0 means negative
    predictions = [torch.argmax(item).item() for item in predictions]
    predictions = [1 if (i == 1 or i == 2) else i for i in predictions]
    # Generate classification report
    report = classification_report(test['label'], predictions)
    print(report)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


              precision    recall  f1-score   support

           0       0.33      0.37      0.35        51
           1       0.63      0.59      0.61        93

    accuracy                           0.51       144
   macro avg       0.48      0.48      0.48       144
weighted avg       0.53      0.51      0.52       144



In [24]:
if('Distilbert' in MODEL):
    model, tokenizer = set_model_and_tokenizer('Distilbert')
    tokenized_Distilbert_test = RoBERTa_DistilBERT_tokenization(test['text'])
    Distilbert_dataset_test = RoBERTa_DistilBERT_Dataset(tokenized_Distilbert_test,test['label'])
    Distilbert_dataloader = DataLoader(Distilbert_dataset_test, batch_size=8, shuffle=True)

    # Iterate over the test dataset

    torch.cuda.empty_cache()
    model.eval()
    model.to('cuda')
    predictions = []
    true_label = []

    for batch in Distilbert_dataloader:
        batch = {k: v.to('cuda') for k, v in batch.items()}

        # Disable gradient calculation
        with torch.no_grad():
            # Perform inference
            outputs = model(**batch)

        predicted_values = outputs.logits
        predictions.extend(predicted_values)
        true_label.extend(batch['labels'].tolist())
    # Convert logits to predictions,  
    # 1 means positive and 0 means negative
    predictions = [torch.argmax(item).item() for item in predictions]
    # Generate classification report
    report = classification_report(test['label'], predictions)
    print(report)

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


              precision    recall  f1-score   support

           0       0.34      0.92      0.50        51
           1       0.43      0.03      0.06        93

    accuracy                           0.35       144
   macro avg       0.39      0.48      0.28       144
weighted avg       0.40      0.35      0.22       144



In [26]:
if('FlanT5small' in MODEL):
    model, tokenizer = set_model_and_tokenizer('FlanT5small')
    Flan_T5_test = test.map(T5_tokenization, batched = True)['input_ids']
    Flan_T5_predict = T5_get_sentiment(torch.tensor(Flan_T5_test))
    Flan_T5_predict_digit = [1 if i == 'positive' else 0 for i in Flan_T5_predict]
    # Print the classification report
    # 1 means positive 0 means negative
    report = classification_report(Flan_T5_predict_digit, test['label'])
    print(report)


Loading cached processed dataset at /home/fangkangmi/.cache/huggingface/datasets/argilla___parquet/argilla--banking_sentiment_setfit-4a60f83f113675bf/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-6c82bc1426f23e36.arrow


              precision    recall  f1-score   support

           0       0.96      0.40      0.57       122
           1       0.22      0.91      0.35        22

    accuracy                           0.48       144
   macro avg       0.59      0.66      0.46       144
weighted avg       0.85      0.48      0.53       144



In [37]:
test = concatenate_datasets([dataset['train'], dataset['test']])
predition_list = ['negative' if element == 0 else 'positive' for element in Flan_T5_predict_digit]
test_list = ['negative' if element == 0 else 'positive' for element in test['label']]
report = classification_report(predition_list, test_list)
print(report)


              precision    recall  f1-score   support

    negative       0.96      0.40      0.57       122
    positive       0.22      0.91      0.35        22

    accuracy                           0.48       144
   macro avg       0.59      0.66      0.46       144
weighted avg       0.85      0.48      0.53       144

