In [None]:

import torch
import tensorflow as tf
from torch.utils.data import TensorDataset , DataLoader, RandomSampler , SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertModel, BertConfig
from transformers import AdamW, BertForSequenceClassification, get_linear_schedule_with_warmup

In [None]:
import pandas as pd 
import io 
import numpy as np 
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
pd.set_option('max_colwidth', 400)
pd.set_option('display.max_columns', None)

In [None]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name())

In [None]:
TRAIN_TEXT_FILE_PTH="../data/raw/MELD/train/train_sent_emo.csv"
DEV_TEXT_FILE_PTH="../data/raw/MELD/dev/dev_sent_emo.csv"

In [None]:
train_df=pd.read_csv(TRAIN_TEXT_FILE_PTH)
dev_df=pd.read_csv(DEV_TEXT_FILE_PTH)


In [None]:
train_df.head()

In [None]:
dev_df.head()

# Seperating text and targets

In [None]:
train_sentences=train_df.Utterance.values
train_labels=train_df.Sentiment.values

dev_sentences=dev_df.Utterance.values
dev_labels=dev_df.Sentiment.values



In [None]:
# Encode labels
from sklearn.preprocessing import OrdinalEncoder
encoder=OrdinalEncoder()
train_labels=encoder.fit_transform(train_labels.reshape(-1,1))
dev_labels=encoder.transform(dev_labels.reshape(-1,1))

# Tokenization

In [None]:
tokenizer=BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

tokenized_train_texts=[tokenizer.tokenize(sent) for sent in train_sentences]
tokenized_dev_texts=[tokenizer.tokenize(sent) for sent in dev_sentences]

In [None]:

max_len=0
for i in tokenized_train_texts:
    if len(i)>max_len:
        max_len=len(i)
print(max_len)

In [None]:
MAX_LEN=128

# Numericalization

In [None]:
input_ids_train=[tokenizer.convert_tokens_to_ids(x) for x in tokenized_train_texts]
print(tokenized_train_texts[0])
print(input_ids_train[0])

In [None]:
input_ids_dev=[tokenizer.convert_tokens_to_ids(x) for x in tokenized_dev_texts]
print(tokenized_dev_texts[0])
print(input_ids_dev[0])

In [None]:
input_ids_train=pad_sequences(input_ids_train, maxlen=MAX_LEN, dtype='long',truncating='post', padding='post')
input_ids_dev=pad_sequences(input_ids_dev, maxlen=MAX_LEN, dtype='long',truncating='post', padding='post')

# Creating attention masks

In [None]:
train_attention_masks=[]
for seq in input_ids_train:
    train_seq_mask=[float(i>0) for i in seq]
    train_attention_masks.append(train_seq_mask)

dev_attention_masks=[]
for seq in input_ids_dev:
    dev_seq_mask=[float(i>0) for i in seq]
    dev_attention_masks.append(dev_seq_mask)

# To Tensors

In [None]:
input_train=torch.tensor(input_ids_train, dtype=torch.long)
input_val=torch.tensor(input_ids_dev, dtype=torch.long)
label_train=torch.tensor(train_labels, dtype=torch.long)
label_val=torch.tensor(dev_labels, dtype=torch.long)
mask_train=torch.tensor(train_attention_masks, dtype=torch.float)
mask_val=torch.tensor(dev_attention_masks, dtype=torch.float)


# Batching and Iterator

In [None]:
train_data=TensorDataset(input_train, mask_train, label_train)
val_data=TensorDataset(input_val, mask_val ,label_val)

train_sampler=RandomSampler(train_data)
train_dataloader=DataLoader(train_data, sampler=train_sampler, batch_size=32)
val_sampler=RandomSampler(val_data)
val_dataloader=DataLoader(val_data, sampler=val_sampler, batch_size=32)

# Model config

In [None]:
configuration=BertConfig()
model=BertModel(configuration)

In [None]:
print(model.config)

# Loading Pretrained Model

In [None]:
model=BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)

# Param Optimization

In [None]:
param_optimizer=list(model.named_parameters())
no_decay=['bias', 'LayerNorm.weight']
optimizer_grouped_parameters=[
    {'params': [p for n,p in param_optimizer if not any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.1,},
    {'params':[p for n,p in param_optimizer if any(nd in n for nd in no_decay)],
    'weight_decay_rate':0.0}
]

In [None]:
optimizer=AdamW(optimizer_grouped_parameters, lr=2e-5,eps = 1e-8 )


In [None]:
from sklearn.metrics import f1_score

def flat_accuracy(preds,labels):
    pred_flat=np.argmax(preds,axis=1).flatten()
    labels_flat=labels.flatten()
    return np.sum(pred_flat==labels_flat) /len(labels_flat)

def get_f1_score(preds, labels):
    pred_flat=np.argmax(preds,axis=1).flatten()
    labels_flat=labels.flatten()
    return f1_score(labels_flat, pred_flat, average="weighted")


# Train Loop

In [None]:
import torch
torch.manual_seed(1)
import random
random.seed(1)
import numpy as np
np.random.seed(1)

epochs=2
total_steps=len(train_dataloader) *epochs
scheduler=get_linear_schedule_with_warmup(optimizer,
                                         num_warmup_steps=0,
                                         num_training_steps=total_steps)

t=[]
train_loss_set=[]
for _ in trange(epochs, desc="Epoch"):
    model.cuda()
    model.train()
    tr_loss=0
    nb_tr_examples, nb_tr_steps=0,0
    
    for step,batch in enumerate(train_dataloader):
        batch=tuple(t.to('cuda') for t in batch)
        b_input_ids,b_input_mask, b_labels=batch
        optimizer.zero_grad()
        outputs=model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
        loss=outputs['loss']
        train_loss_set.append(loss.item())
        loss.backward()
        optimizer.step()
        scheduler.step()

        tr_loss+=loss.item()
        nb_tr_examples+=b_input_ids.size(0)
        nb_tr_steps+=1
    print("train loss: {}".format(tr_loss/nb_tr_steps))    

    model.eval()
    eval_los, eval_f1=0,0
    nb_eval_steps, nb_eval_examples=0,0
    
    for batch in val_dataloader:
        batch=tuple(t.to('cuda') for t in batch)        
        b_input_ids,b_input_mask, b_labels=batch
        
        with torch.no_grad():
            logits=model(b_input_ids,token_type_ids=None, attention_mask=b_input_mask)
        
        logits=logits['logits'].detach().cpu().numpy()
        label_ids=b_labels.to('cpu').numpy()
        
        tmp_eval_f1=get_f1_score(logits,label_ids)
        eval_f1+=tmp_eval_f1
        nb_eval_steps+=1

    print("f1: {}".format(eval_f1/nb_eval_steps))

In [None]:
torch.save(model.state_dict(), "../models/bert_model_sentiment_f1_65.ckpt")

In [None]:
plt.figure(figsize=(15,8))
plt.title("Training Loss")
plt.xlabel("Batch")
plt.ylabel("Loss")
plt.plot(train_loss_set)
plt.show()