# Finetuning Emot
Emot is a Emotion Recognition dataset with 5 possible labels: `sadness`, `anger`, `love`, `fear`, `happy`

In [1]:
import os, sys
# sys.path.append('../')
# os.chdir('../')

import random
import numpy as np
import pandas as pd
import torch
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm

from transformers import BertForSequenceClassification, BertConfig, BertTokenizer
from nltk.tokenize import TweetTokenizer

# from utils.forward_fn import forward_sequence_classification
# from utils.metrics import document_sentiment_metrics_fn
# from utils.data_utils import DocumentSentimentDataset, DocumentSentimentDataLoader
from utils.utils_forward_fn import forward_sequence_classification
from utils.utils_init_dataset import set_seed, load_dataset_loader
from utils.utils_semantic_use import USE
from utils.utils_data_utils import DocumentSentimentDataset, DocumentSentimentDataLoader, EmotionDetectionDataset, EmotionDetectionDataLoader
from utils.utils_metrics import document_sentiment_metrics_fn
from utils.utils_init_model import text_logit, fine_tuning_model, eval_model, init_model, logit_prob, load_word_index
from utils.get_args import get_args

In [2]:
###
# common functions
###
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    
def count_param(module, trainable=False):
    if trainable:
        return sum(p.numel() for p in module.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in module.parameters())
    
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def metrics_to_string(metric_dict):
    string_list = []
    for key, value in metric_dict.items():
        string_list.append('{}:{:.2f}'.format(key, value))
    return ' '.join(string_list)

In [3]:
# Set random seed
set_seed(26092020)

# Load Model

In [4]:
# Load Tokenizer and Config
tokenizer = BertTokenizer.from_pretrained('indobenchmark/indobert-base-p1')
config = BertConfig.from_pretrained('indobenchmark/indobert-base-p1')
config.num_labels = EmotionDetectionDataset.NUM_LABELS

# Instantiate model
model = BertForSequenceClassification.from_pretrained('indobenchmark/indobert-base-p1', config=config)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
# model

In [6]:
count_param(model)

124445189

# Prepare Dataset

In [7]:
train_dataset_path = './dataset/emot-emotion-twitter/train_preprocess.csv'
valid_dataset_path = './dataset/emot-emotion-twitter/valid_preprocess.csv'
test_dataset_path = './dataset/emot-emotion-twitter/test_preprocess_masked_label.csv'

In [8]:
train_dataset = EmotionDetectionDataset(train_dataset_path, tokenizer, lowercase=True)
valid_dataset = EmotionDetectionDataset(valid_dataset_path, tokenizer, lowercase=True)
test_dataset = EmotionDetectionDataset(test_dataset_path, tokenizer, lowercase=True)

train_loader = EmotionDetectionDataLoader(dataset=train_dataset, max_seq_len=512, batch_size=32, num_workers=16, shuffle=True)  
valid_loader = EmotionDetectionDataLoader(dataset=valid_dataset, max_seq_len=512, batch_size=32, num_workers=16, shuffle=False)  
test_loader = EmotionDetectionDataLoader(dataset=test_dataset, max_seq_len=512, batch_size=32, num_workers=16, shuffle=False)

In [9]:
w2i, i2w = EmotionDetectionDataset.LABEL2INDEX, EmotionDetectionDataset.INDEX2LABEL
print(w2i)
print(i2w)

{'sadness': 0, 'anger': 1, 'love': 2, 'fear': 3, 'happy': 4}
{0: 'sadness', 1: 'anger', 2: 'love', 3: 'fear', 4: 'happy'}


# Test model on sample sentences

In [10]:
text = 'Bahagia hatiku melihat pernikahan putri sulungku yang cantik jelita'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Bahagia hatiku melihat pernikahan putri sulungku yang cantik jelita | Label : sadness (26.509%)


In [11]:
text = 'Budi pergi ke pondok indah mall membeli cakwe'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Budi pergi ke pondok indah mall membeli cakwe | Label : sadness (27.766%)


In [12]:
text = 'Dasar anak sialan!! Kurang ajar!!'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Dasar anak sialan!! Kurang ajar!! | Label : love (24.563%)


# Fine Tuning & Evaluation

In [13]:
# !python3 -m pip install pytorch-lightning

In [14]:
# from pytorch_lightning import Trainer, seed_everything

# seed_everything(26092020, workers=True)

In [15]:
# from torch import Trainer
# from pytorch_lightning.callbacks import EarlyStopping

# from transformers import EarlyStoppingCallback, TrainingArguments

# args = TrainingArguments(
#     f"training_with_callbacks",
#     evaluation_strategy ='steps',
#     eval_steps = 50, # Evaluation and Save happens every 50 steps
#     save_total_limit = 5, # Only last 5 models are saved. Older ones are deleted.
#     learning_rate=3e-6,
#     # per_device_train_batch_size=batch_size,
#     # per_device_eval_batch_size=batch_size,
#     # devices=0 if torch.cuda.is_available() else None,  # limiting got iPython runs
#     num_train_epochs=15,
#     # weight_decay=0.01,
#     push_to_hub=False,
#     metric_for_best_model = 'acc',
#     load_best_model_at_end=True
# )

# trainer = Trainer(
#     model,
#     args,
#     accelerator="gpu",
#     callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]
#     # callbacks=[MyCallback],  # We can either pass the callback class this way or an instance of it (MyCallback())
# )

# early_stopping = EarlyStopping(
#     monitor='val_loss',
#     patience=3
# )

# trainer = Trainer(
#     accelerator="gpu",
#     devices=1 if torch.cuda.is_available() else None,
#     max_epochs=15,
#     callbacks=[early_stopping],
#     deterministic=True,
#     enable_progress_bar=True
# )

In [16]:
# trainer.fit(model, train_loader)

In [17]:
# import transformers
# from transformers import TrainingArguments, Trainer, EarlyStoppingCallback

# transformers.set_seed(26092020)

In [18]:
# args = TrainingArguments(
#    f"training_with_callbacks",
#    evaluation_strategy ='steps',
#    eval_steps = 50, # Evaluation and Save happens every 50 steps
#    # save_total_limit = 5, # Only last 5 models are saved. Older ones are deleted.
#    learning_rate=3e-6,
#    # per_device_train_batch_size=batch_size,
#    # per_device_eval_batch_size=batch_size,
#    num_train_epochs=15,
#    # weight_decay=0.01,
#    push_to_hub=False,
#    metric_for_best_model = 'acc',
#    load_best_model_at_end=True)

# trainer = Trainer(
#     model,
#     args,
#     # devices=1 if torch.cuda.is_available() else None,
#     compute_metrics=document_sentiment_metrics_fn,
#     callbacks = [EarlyStoppingCallback(early_stopping_patience=3)],
#     train_dataset=train_dataset,
#     eval_dataset=valid_dataset,
#     # optimizers = [optimizer]
# )

In [19]:
# trainer.train()
# model.state_dict()
class EarlyStopping(object):
    def __init__(self, mode='min', min_delta=0, patience=10, percentage=False):
        self.mode = mode
        self.min_delta = min_delta
        self.patience = patience
        self.best = None
        self.num_bad_epochs = 0
        self.is_better = None
        self._init_is_better(mode, min_delta, percentage)

        if patience == 0:
            self.is_better = lambda a, b: True
            self.step = lambda a: False

    def step(self, metrics):
        if self.best is None:
            self.best = metrics
            return False

        if np.isnan(metrics):
            return True

        if self.is_better(metrics, self.best):
            self.num_bad_epochs = 0
            self.best = metrics
            print('improvement!')
        else:
            self.num_bad_epochs += 1
            print(f'no improvement, bad_epochs counter: {self.num_bad_epochs}')

        if self.num_bad_epochs >= self.patience:
            return True

        return False

    def _init_is_better(self, mode, min_delta, percentage):
        if mode not in {'min', 'max'}:
            raise ValueError('mode ' + mode + ' is unknown!')
        if not percentage:
            if mode == 'min':
                self.is_better = lambda a, best: a < best - min_delta
            if mode == 'max':
                self.is_better = lambda a, best: a > best + min_delta
        else:
            if mode == 'min':
                self.is_better = lambda a, best: a < best - (
                            best * min_delta / 100)
            if mode == 'max':
                self.is_better = lambda a, best: a > best + (
                            best * min_delta / 100)

In [20]:
optimizer = optim.Adam(model.parameters(), lr=3e-6)
model = model.cuda()

In [21]:
# Train
import copy

es = EarlyStopping(patience=5)
n_epochs = 15

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0


for epoch in range(n_epochs):
    model.train()
    torch.set_grad_enabled(True)
 
    total_train_loss = 0
    list_hyp, list_label = [], []

    train_pbar = tqdm(train_loader, leave=True, total=len(train_loader))
    for i, batch_data in enumerate(train_pbar):
        # Forward model
        loss, batch_hyp, batch_label = forward_sequence_classification(model, batch_data[:-1], i2w=i2w, device='cuda')

        # Update model
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        tr_loss = loss.item()
        total_train_loss = total_train_loss + tr_loss

        # Calculate metrics
        list_hyp += batch_hyp
        list_label += batch_label

        train_pbar.set_description("(Epoch {}) TRAIN LOSS:{:.4f} LR:{:.8f}".format((epoch+1),
            total_train_loss/(i+1), get_lr(optimizer)))

    # Calculate train metric
    metrics = document_sentiment_metrics_fn(list_hyp, list_label)
    print("(Epoch {}) TRAIN LOSS:{:.4f} {} LR:{:.8f}".format((epoch+1),
        total_train_loss/(i+1), metrics_to_string(metrics), get_lr(optimizer)))

    # Evaluate on validation
    model.eval()
    torch.set_grad_enabled(False)
    
    total_loss, total_correct, total_labels = 0, 0, 0
    list_hyp, list_label = [], []

    pbar = tqdm(valid_loader, leave=True, total=len(valid_loader))
    for i, batch_data in enumerate(pbar):
        batch_seq = batch_data[-1]        
        loss, batch_hyp, batch_label = forward_sequence_classification(model, batch_data[:-1], i2w=i2w, device='cuda')
        
        # Calculate total loss
        valid_loss = loss.item()
        total_loss = total_loss + valid_loss

        # Calculate evaluation metrics
        list_hyp += batch_hyp
        list_label += batch_label
        metrics = document_sentiment_metrics_fn(list_hyp, list_label)

        pbar.set_description("VALID LOSS:{:.4f} {}".format(total_loss/(i+1), metrics_to_string(metrics)))
        
    metrics = document_sentiment_metrics_fn(list_hyp, list_label)
    print("(Epoch {}) VALID LOSS:{:.4f} {}".format((epoch+1),
        total_loss/(i+1), metrics_to_string(metrics)))
    
    epoch_loss = total_loss/(i+1)
    epoch_acc = metrics['ACC']
    
    if epoch_acc > best_acc:
        best_acc = epoch_acc
        best_model_wts = copy.deepcopy(model.state_dict())

    if es.step(epoch_loss):
        terminate_training = True
        print('early stop criterion is met, we can stop now')
        break

model.load_state_dict(best_model_wts)

(Epoch 1) TRAIN LOSS:1.4311 LR:0.00000300: 100%|██████████████████| 111/111 [00:16<00:00,  6.71it/s]


(Epoch 1) TRAIN LOSS:1.4311 ACC:0.41 F1:0.34 REC:0.36 PRE:0.41 LR:0.00000300


VALID LOSS:1.2349 ACC:0.54 F1:0.52 REC:0.52 PRE:0.55: 100%|█████████| 14/14 [00:01<00:00,  7.75it/s]


(Epoch 1) VALID LOSS:1.2349 ACC:0.54 F1:0.52 REC:0.52 PRE:0.55


(Epoch 2) TRAIN LOSS:1.0933 LR:0.00000300: 100%|██████████████████| 111/111 [00:16<00:00,  6.69it/s]


(Epoch 2) TRAIN LOSS:1.0933 ACC:0.60 F1:0.57 REC:0.58 PRE:0.60 LR:0.00000300


VALID LOSS:0.9608 ACC:0.61 F1:0.60 REC:0.61 PRE:0.60: 100%|█████████| 14/14 [00:01<00:00,  7.57it/s]


(Epoch 2) VALID LOSS:0.9608 ACC:0.61 F1:0.60 REC:0.61 PRE:0.60
improvement!


(Epoch 3) TRAIN LOSS:0.8281 LR:0.00000300: 100%|██████████████████| 111/111 [00:16<00:00,  6.74it/s]


(Epoch 3) TRAIN LOSS:0.8281 ACC:0.71 F1:0.70 REC:0.71 PRE:0.71 LR:0.00000300


VALID LOSS:0.8323 ACC:0.67 F1:0.66 REC:0.67 PRE:0.67: 100%|█████████| 14/14 [00:01<00:00,  7.36it/s]


(Epoch 3) VALID LOSS:0.8323 ACC:0.67 F1:0.66 REC:0.67 PRE:0.67
improvement!


(Epoch 4) TRAIN LOSS:0.6958 LR:0.00000300: 100%|██████████████████| 111/111 [00:16<00:00,  6.70it/s]


(Epoch 4) TRAIN LOSS:0.6958 ACC:0.76 F1:0.76 REC:0.76 PRE:0.77 LR:0.00000300


VALID LOSS:0.7744 ACC:0.71 F1:0.71 REC:0.71 PRE:0.72: 100%|█████████| 14/14 [00:01<00:00,  7.44it/s]


(Epoch 4) VALID LOSS:0.7744 ACC:0.71 F1:0.71 REC:0.71 PRE:0.72
improvement!


(Epoch 5) TRAIN LOSS:0.5733 LR:0.00000300: 100%|██████████████████| 111/111 [00:16<00:00,  6.68it/s]


(Epoch 5) TRAIN LOSS:0.5733 ACC:0.81 F1:0.81 REC:0.81 PRE:0.81 LR:0.00000300


VALID LOSS:0.7762 ACC:0.70 F1:0.70 REC:0.70 PRE:0.71: 100%|█████████| 14/14 [00:01<00:00,  7.65it/s]


(Epoch 5) VALID LOSS:0.7762 ACC:0.70 F1:0.70 REC:0.70 PRE:0.71
no improvement, bad_epochs counter: 1


(Epoch 6) TRAIN LOSS:0.4770 LR:0.00000300: 100%|██████████████████| 111/111 [00:16<00:00,  6.70it/s]


(Epoch 6) TRAIN LOSS:0.4770 ACC:0.84 F1:0.84 REC:0.84 PRE:0.85 LR:0.00000300


VALID LOSS:0.7741 ACC:0.70 F1:0.71 REC:0.71 PRE:0.71: 100%|█████████| 14/14 [00:01<00:00,  7.67it/s]


(Epoch 6) VALID LOSS:0.7741 ACC:0.70 F1:0.71 REC:0.71 PRE:0.71
improvement!


(Epoch 7) TRAIN LOSS:0.3964 LR:0.00000300: 100%|██████████████████| 111/111 [00:16<00:00,  6.65it/s]


(Epoch 7) TRAIN LOSS:0.3964 ACC:0.87 F1:0.87 REC:0.87 PRE:0.88 LR:0.00000300


VALID LOSS:0.7886 ACC:0.71 F1:0.71 REC:0.72 PRE:0.72: 100%|█████████| 14/14 [00:01<00:00,  7.49it/s]


(Epoch 7) VALID LOSS:0.7886 ACC:0.71 F1:0.71 REC:0.72 PRE:0.72
no improvement, bad_epochs counter: 1


(Epoch 8) TRAIN LOSS:0.3193 LR:0.00000300: 100%|██████████████████| 111/111 [00:16<00:00,  6.70it/s]


(Epoch 8) TRAIN LOSS:0.3193 ACC:0.90 F1:0.90 REC:0.90 PRE:0.91 LR:0.00000300


VALID LOSS:0.8224 ACC:0.71 F1:0.72 REC:0.72 PRE:0.73: 100%|█████████| 14/14 [00:01<00:00,  7.72it/s]


(Epoch 8) VALID LOSS:0.8224 ACC:0.71 F1:0.72 REC:0.72 PRE:0.73
no improvement, bad_epochs counter: 2


(Epoch 9) TRAIN LOSS:0.2586 LR:0.00000300: 100%|██████████████████| 111/111 [00:16<00:00,  6.74it/s]


(Epoch 9) TRAIN LOSS:0.2586 ACC:0.93 F1:0.93 REC:0.92 PRE:0.93 LR:0.00000300


VALID LOSS:0.8989 ACC:0.71 F1:0.72 REC:0.72 PRE:0.72: 100%|█████████| 14/14 [00:01<00:00,  7.70it/s]


(Epoch 9) VALID LOSS:0.8989 ACC:0.71 F1:0.72 REC:0.72 PRE:0.72
no improvement, bad_epochs counter: 3


(Epoch 10) TRAIN LOSS:0.2151 LR:0.00000300: 100%|█████████████████| 111/111 [00:16<00:00,  6.73it/s]


(Epoch 10) TRAIN LOSS:0.2151 ACC:0.94 F1:0.94 REC:0.94 PRE:0.95 LR:0.00000300


VALID LOSS:0.9289 ACC:0.71 F1:0.72 REC:0.72 PRE:0.73: 100%|█████████| 14/14 [00:01<00:00,  7.66it/s]


(Epoch 10) VALID LOSS:0.9289 ACC:0.71 F1:0.72 REC:0.72 PRE:0.73
no improvement, bad_epochs counter: 4


(Epoch 11) TRAIN LOSS:0.1737 LR:0.00000300: 100%|█████████████████| 111/111 [00:16<00:00,  6.73it/s]


(Epoch 11) TRAIN LOSS:0.1737 ACC:0.95 F1:0.95 REC:0.95 PRE:0.95 LR:0.00000300


VALID LOSS:0.9879 ACC:0.71 F1:0.72 REC:0.72 PRE:0.72: 100%|█████████| 14/14 [00:01<00:00,  7.79it/s]


(Epoch 11) VALID LOSS:0.9879 ACC:0.71 F1:0.72 REC:0.72 PRE:0.72
no improvement, bad_epochs counter: 5
early stop criterion is met, we can stop now


<All keys matched successfully>

In [22]:
# Evaluate on test
model.eval()
torch.set_grad_enabled(False)

total_loss, total_correct, total_labels = 0, 0, 0
list_hyp, list_label = [], []

pbar = tqdm(test_loader, leave=True, total=len(test_loader))
for i, batch_data in enumerate(pbar):
    _, batch_hyp, _ = forward_sequence_classification(model, batch_data[:-1], i2w=i2w, device='cuda')
    list_hyp += batch_hyp

# Save prediction
df = pd.DataFrame({'label':list_hyp}).reset_index()
df.to_csv('pred.txt', index=False)

print(df)

100%|███████████████████████████████████████████████████████████████| 14/14 [00:01<00:00,  8.16it/s]

     index    label
0        0     love
1        1     fear
2        2     fear
3        3    happy
4        4    happy
..     ...      ...
435    435  sadness
436    436  sadness
437    437     fear
438    438  sadness
439    439    happy

[440 rows x 2 columns]





# Test fine-tuned model on sample sentences

In [23]:
text = 'Bahagia hatiku melihat pernikahan putri sulungku yang cantik jelita'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Bahagia hatiku melihat pernikahan putri sulungku yang cantik jelita | Label : happy (81.682%)


In [24]:
text = 'Budi pergi ke pondok indah mall membeli cakwe'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Budi pergi ke pondok indah mall membeli cakwe | Label : sadness (77.337%)


In [25]:
text = 'Dasar anak sialan!! Kurang ajar!!'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Dasar anak sialan!! Kurang ajar!! | Label : anger (98.442%)
