In [1]:
import os
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score, confusion_matrix
from transformers import RobertaModel, RobertaTokenizer, RobertaConfig
from torch.utils.data import Dataset, DataLoader
import wandb
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhritikakolkar[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
class EmotionDataset(Dataset):
    def __init__(self, tweet, emotion, tokenizer, max_length):
        self.tweet = tweet
        self.emotion = emotion
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.tweet)

    def __getitem__(self, idx):
        encoding = self.tokenizer.encode_plus(
            self.tweet[idx],
            max_length = self.max_length,
            add_special_tokens=True,
            padding = "max_length",
            return_attention_mask=True,
            return_tensors='pt',
            )

        item = {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'emotion_labels': torch.tensor(self.emotion[idx], dtype=torch.long)
        }

        return item

In [4]:
def preprocessing(df, config = None, train=True):
    if train:
        df = df.rename(columns={
            'tweet_text': 'tweet',
            'emotion_in_tweet_is_directed_at': 'entity', # entity means brand, product or service
            'is_there_an_emotion_directed_at_a_brand_or_product': 'emotion'
        })
        df = df[df["tweet"].notna()].reset_index(drop = True)
        df["tweet"] = df["tweet"].str.replace(pat = r'[^a-zA-Z0-9#@\!$%^&*(){}:\-\'\":;,\.?/\s]', repl = ' ', regex=True)
        df["tweet"] = df["tweet"].str.replace(pat = r'[:;,\.?/\s]{2,}', repl = ' ', regex= True)
        df["tweet"] = df["tweet"].str.strip()
        df = df.drop("entity", axis = 1)
        df["emotion"] = df["emotion"].replace(config["emotion_to_id"])
    else:
        df = df.rename(columns={
            'Tweet': 'tweet'
        })
        df = df[df["tweet"].notna()].reset_index(drop = True)
        df["tweet"] = df["tweet"].str.replace(pat = r'[^a-zA-Z0-9#@\!$%^&*(){}:\-\'\":;,\.?/\s]', repl = ' ', regex=True)
        df["tweet"] = df["tweet"].str.replace(pat = r'[:;,\.?/\s]{2,}', repl = ' ', regex= True)
        df["tweet"] = df["tweet"].str.strip()
    return df

In [5]:
class EmotionClassifier(nn.Module):
    def __init__(self, num_classes_emotion, pretrained_model_path):
        super(EmotionClassifier, self).__init__()
        self.model_config = RobertaConfig.from_pretrained(pretrained_model_path)
        self.pretrained_model = RobertaModel.from_pretrained(pretrained_model_path, config= self.model_config)
        self.dropout = nn.Dropout(0.3)
        # We can use single classifier with classes = num_classes_entity + num_classes_emotion, but using this approach for simplification
        self.classifier_emotion = nn.Linear(self.pretrained_model.config.hidden_size, num_classes_emotion)
    
    def forward(self, input_ids, attention_mask):
        pooler_output = self.pretrained_model(input_ids=input_ids, attention_mask=attention_mask).pooler_output
        pooler_output= nn.ReLU()(pooler_output)
        pooler_output= self.dropout(pooler_output)

        # Output for emotion classification
        emotion_output = self.classifier_emotion(pooler_output)
        # No need of output_probs as using nn.CrossEntropyLoss
        # emotion_output_probs = nn.Softmax(dim=1)(emotion_output)

        return emotion_output

In [6]:
def compute_metrics(predictions, targets, config):
    # Convert tensors to numpy arrays
    if isinstance(predictions, np.ndarray) and isinstance(targets, np.ndarray):
        predictions_np = predictions
        targets_np = targets
    else:
        predictions_np = predictions.cpu().numpy()
        targets_np = targets.cpu().numpy()

    labels = np.unique(predictions_np)
    # Compute metrics
    precision = precision_score(targets_np, predictions_np, average= config["average"], labels= labels, zero_division= config["zero_division"])
    recall = recall_score(targets_np, predictions_np, average= config["average"], labels= labels, zero_division= config["zero_division"])
    accuracy = accuracy_score(targets_np, predictions_np)
    f1 = f1_score(targets_np, predictions_np, average= config["average"], labels= labels, zero_division= config["zero_division"])

    return precision, recall, accuracy, f1

In [7]:
def make_config(config):
    train = preprocessing(pd.read_excel("../data/dataset.xlsx", sheet_name = "Train"), config= config, train=True)
    # test = preprocessing(pd.read_excel("data/dataset.xlsx", sheet_name = "Test"), train=False)
    x_train, x_test, y_train, y_test = train_test_split(train["tweet"].to_numpy(), train["emotion"].to_numpy(), test_size=config["test_size"], random_state= config["seed"])

    tokenizer = RobertaTokenizer.from_pretrained(config["pretrained_model_path"])
    tokenizer.save_pretrained(config["model_save_path"])
    tokenizer.save_pretrained(config["model_save_path"])
    
    train_dataset = EmotionDataset(tweet= x_train, emotion= y_train, tokenizer= tokenizer, max_length= config["max_length"])
    test_dataset = EmotionDataset(tweet= x_test, emotion= y_test, tokenizer= tokenizer, max_length= config["max_length"])

    train_dataloader = DataLoader(dataset= train_dataset, batch_size= 8)
    test_dataloader = DataLoader(dataset= test_dataset, batch_size= 8)

    model = EmotionClassifier(config["num_classes_emotion"], config["pretrained_model_path"])

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=config["learning_rate"])

    return model, train_dataloader, test_dataloader, criterion, optimizer

In [8]:
def train_and_eval(model, train_dataloader, test_dataloader, criterion, optimizer, config):
    # Tell wandb to watch what the model gets up to: gradients, weights, and more!
    model.to(config["device"])
    wandb.watch(model, criterion, log="all", log_freq=10)
    samples_count = 0
    global_step = 0
    best_f1_score = 0.0
    for epoch in tqdm(range(config["num_epochs"])):
        model.train()
        log_batch_loss = 0
        log_batch_emotion_output = np.array([])
        log_batch_emotion_labels = np.array([])
        for batch in train_dataloader:
            input_ids = batch['input_ids'].to(config["device"])
            attention_mask = batch['attention_mask'].to(config["device"])
            emotion_labels = batch['emotion_labels'].to(config["device"])

            
            optimizer.zero_grad()
            emotion_output = model(input_ids, attention_mask)

            loss = criterion(emotion_output, emotion_labels)

            loss.backward()
            optimizer.step()
            
            global_step += 1
            samples_count += len(emotion_output)

            log_batch_loss += float(loss)
            log_batch_emotion_output = np.hstack((log_batch_emotion_output, torch.argmax(emotion_output, dim=1).cpu().numpy()))
            log_batch_emotion_labels = np.hstack((log_batch_emotion_labels, emotion_labels.cpu().numpy()))
            if (global_step + 1) % config["log_batch_step"] == 0:
                precision, recall, accuracy, f1 = compute_metrics(log_batch_emotion_output, log_batch_emotion_labels, config)
                wandb.log({
                    "epoch": epoch,
                    "loss" : log_batch_loss/config["log_batch_step"],
                    "global_step" : global_step,
                    "samples_count" : samples_count, 
                    "train_precision": precision,
                    "train_recall": recall,
                    "train_f1_score": f1,
                    "train_accuracy": accuracy
                }, step = global_step)
                log_batch_loss = 0
                log_batch_emotion_output = np.array([])
                log_batch_emotion_labels = np.array([])

        model.eval()
        with torch.no_grad():
            loss = 0
            test_metrics = np.array([0.0, 0.0, 0.0, 0.0])
            for batch in test_dataloader:
                input_ids = batch['input_ids'].to(config["device"])
                attention_mask = batch['attention_mask'].to(config["device"])
                emotion_labels = batch['emotion_labels'].to(config["device"])

                emotion_output = model(input_ids, attention_mask)

                batch_metrics = np.array(compute_metrics(torch.argmax(emotion_output, dim=1), emotion_labels, config))
                test_metrics += batch_metrics
                loss += criterion(emotion_output, emotion_labels)
            test_precision, test_recall, test_accuracy, test_f1 = test_metrics/ len(test_dataloader)
            wandb.log({
                "epoch": epoch,
                "test_loss" : float(loss)/len(test_dataloader),
                "test_precision": test_precision,
                "test_recall": test_recall,
                "test_f1_score": test_f1,
                "test_accuracy": test_accuracy
            }, step = global_step)

        if epoch == 0 or (epoch+1)%config["model_save_epoch"] == 0 or epoch == config["num_epochs"]-1:
            torch.save(model.state_dict(), os.path.join(config["model_save_path"],f"iter_epoch_{str(epoch).zfill(3)}.bin"))
        
        if test_f1 >= best_f1_score:
            best_epoch = epoch
            best_f1_score = test_f1
            torch.save(model.state_dict(), os.path.join(config["model_save_path"],"pytorch_model.bin"))
    print(f"Best Epoch for best f1_score of {best_f1_score} is {best_epoch}. Model saved as pytorch_model.bin in the respective version")

In [9]:
def model_pipeline(hyperparameters, name):
    # tell wandb to get started
    with wandb.init(project="wysa", name= name, config=hyperparameters):
        # access all HPs through wandb.config, so logging matches execution!
        config = wandb.config
        # make the model, data, and optimization problem
        model, train_dataloader, test_dataloader, criterion, optimizer = make_config(config)
        
        # and use them to train the model
        train_and_eval(model, train_dataloader, test_dataloader, criterion, optimizer, config)
    return model

In [12]:
version = "test"
config = dict(
    seed = 0,
    log_batch_step = 10,
    num_epochs=1,
    num_classes_emotion=3,
    num_classes_entity=0,
    pretrained_model_path= "../weights/twitter-roberta-base-sentiment-latest",
    model_save_path = f"../weights/{version}",
    model_save_epoch = 2,
    model_architecture = "Roberta",
    model_class= "EmotionClassifier",
    dataset_class = "EmotionDataset",
    batch_size=8,
    learning_rate=2e-5,
    max_length = 70,
    test_size = 0.2,
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    emotion_to_id = {
        'No emotion toward brand or product': 0,
        'Positive emotion': 1,
        'Negative emotion': 2,
        "I can't tell": 2
    },
    entity_to_id = {
        'iPad': 0,
        'iPad or iPhone App': 1,
        'iPhone': 3,
        'Apple': 4,
        'Other Apple product or service': 5,
        'Android': 6,
        'Android App': 7,
        'Google': 8,
        'Other Google product or service': 9,
    },
    average="macro",
    zero_division=1.0
)

In [13]:
model = model_pipeline(config, version)

100%|██████████| 1/1 [02:49<00:00, 169.48s/it]


Best Epoch for best f1_score of 0.7383168984331773 is 0. Model saved as pytorch_model.bin in the respective version


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,▇▇▅▃▇▁▆▇▃▄▇▄▄█▄▃▄▄▃▆▂▃▇▁▅▅▄▃▆▅▆▃▅▁▄▃▇▃▅▃
samples_count,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_accuracy,▁
test_f1_score,▁
test_loss,▁
test_precision,▁
test_recall,▁
train_accuracy,▅▅▄▇▄█▃▂▅▆▄▅▅▁▄▆▆▆▆▄▇▇▃▇▄▃▆▅▃▄▅▄▄▆▅▆▄▆▅▆

0,1
epoch,0.0
global_step,859.0
loss,0.59638
samples_count,6870.0
test_accuracy,0.76609
test_f1_score,0.73832
test_loss,0.56324
test_precision,0.73104
test_recall,0.79631
train_accuracy,0.76923
