# Setup

In [None]:
!pip install -q transformers
!pip install -q datasets
!pip install -q emoji
!pip install -q ray
!pip install pickle5==0.0.10

In [None]:
import numpy as np
import random

from datasets import load_dataset
from datasets import load_metric
from transformers import pipeline
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments, RobertaForSequenceClassification, AutoTokenizer, AutoModelForSequenceClassification
from transformers import EarlyStoppingCallback

from sklearn.calibration import calibration_curve
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import matplotlib.pyplot as plt

# Dataset Installation
We make use of the [TweetEval](https://huggingface.co/datasets/tweet_eval) dataset, particularly its "emoji" subdataset.

In [None]:
dataset = load_dataset("tweet_eval", "emoji")

dataset
dataset['test'][0]

In [None]:
emoji_labels = dataset['train'].features['label'].names
num_labels = len(emoji_labels)
id2label = dict(zip(range(num_labels), emoji_labels))

## Tokenize Dataset

In [None]:
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=True, normalization=True)
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets

# Training
We leverage pre-trained base model as described in https://aclanthology.org/2020.emnlp-demos.2.pdf and https://huggingface.co/docs/transformers/model_doc/bertweet, which we fine-tune using Emoji data

## Hyperparameter Tuning Using Ray

In [None]:
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from sklearn.metrics import f1_score

training_args = TrainingArguments("test_trainer", 
                  evaluation_strategy="steps", eval_steps=500, num_train_epochs=3
                  )

tune_config = {
        "per_device_train_batch_size": 8,
        "per_device_eval_batch_size": 32,
        "classifier_dropout": tune.uniform(0, 0.5),
        "weight_decay": tune.uniform(0.0, 0.3),
        "learning_rate": tune.loguniform(1e-6, 1e-4)
    }

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"f1": f1_score(labels, predictions, average='macro')}

def model_init(trial):
  if trial == None:
    return AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=num_labels, problem_type="single_label_classification")
  else:
    model = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=num_labels, problem_type="single_label_classification", classifier_dropout=trial['classifier_dropout'])
    return model

trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=tokenized_datasets['train'].shard(num_shards=10, index=3),
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics,
)

br = trainer.hyperparameter_search(
    hp_space=lambda _: tune_config,
    direction="maximize", 
    backend="ray", 
    n_trials=1, # number of trials
    scheduler=ASHAScheduler(metric="objective", mode="max")
)

br


## Initiate Training

In [None]:
################## YOUR CODE HERE ##################
from sklearn.metrics import f1_score, classification_report, accuracy_score

hyp = br.hyperparameters
model = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", 
                                                           num_labels=num_labels, 
                                                           problem_type="single_label_classification",
                                                           classifier_dropout=hyp['classifier_dropout'])
model.config.id2label = id2label

# Setup training
training_args = TrainingArguments("test_trainer", 
                  num_train_epochs=1,
                  per_device_train_batch_size=8,
                  per_device_eval_batch_size=32,
                  evaluation_strategy="epoch",
                  save_strategy="epoch",                
                  learning_rate=hyp['learning_rate'],
                  weight_decay=hyp['weight_decay'],               
                  metric_for_best_model = 'f1',
                  load_best_model_at_end=True
                  )
metric = load_metric('accuracy')

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    ret = metric.compute(predictions=predictions, references=labels)
    ret['f1'] = f1_score(labels, predictions, average='macro')
    return ret

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics,
    callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]
)

trainer.train()

# Evaluation

In [None]:
trainer.evaluate(tokenized_datasets['test'])