# Imports

In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
import pandas as pd

from transformers import  BertModel,BertTokenizer
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data import RandomSampler
from torch.utils.data import SequentialSampler

from keras.utils import pad_sequences
from sklearn.model_selection import train_test_split


  from .autonotebook import tqdm as notebook_tqdm


# Model

In [3]:
# Config

MAX_LEN = 256 # Define the maximum length of tokenized texts

## Load the Data

In [4]:
data = {
    'text': [
        "I'm feeling happy today!",
        "This movie is amazing and uplifting.",
        "The weather is gloomy and sad.",
        "The restaurant service was terrible.",
        "I feel neutral about this book.",
        "The speech was inspiring and motivational.",
        "The internet trolls are spreading toxicity."
    ],
    'emotion': [1, 1, 2, 2, 0, 1, 2],
    'toxicity': [0, 0, 1, 1, 0, 0, 1]
}

# Create a DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,text,emotion,toxicity
0,I'm feeling happy today!,1,0
1,This movie is amazing and uplifting.,1,0
2,The weather is gloomy and sad.,2,1
3,The restaurant service was terrible.,2,1
4,I feel neutral about this book.,0,0
5,The speech was inspiring and motivational.,1,0
6,The internet trolls are spreading toxicity.,2,1


# Preprocessing

In [5]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenized_texts = [tokenizer(text, add_special_tokens=True) for text in df['text']]
tokenized_texts

[{'input_ids': [101, 1045, 1005, 1049, 3110, 3407, 2651, 999, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]},
 {'input_ids': [101, 2023, 3185, 2003, 6429, 1998, 2039, 26644, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]},
 {'input_ids': [101, 1996, 4633, 2003, 24067, 2100, 1998, 6517, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]},
 {'input_ids': [101, 1996, 4825, 2326, 2001, 6659, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]},
 {'input_ids': [101, 1045, 2514, 8699, 2055, 2023, 2338, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]},
 {'input_ids': [101, 1996, 4613, 2001, 18988, 1998, 14354, 2389, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 

In [6]:
import numpy as np

input_ids = tokenized_texts
toxicity_labels = np.array(df["toxicity"])
emotion_labels = np.array(df['emotion'])


# Split the data into train and test sets
train_inputs, test_inputs, train_toxicity_labels, test_toxicity_labels, train_emotion_labels, test_emotion_labels = train_test_split(
    input_ids, 
    toxicity_labels, 
    emotion_labels, 
    random_state=42, test_size=0.2)

# Create attention masks
train_masks = [mask["attention_mask"] for  mask in train_inputs]
test_masks = [mask["attention_mask"] for  mask in test_inputs]

train_input_ids = [mask["input_ids"] for  mask in train_inputs]
test_input_ids = [mask["input_ids"] for  mask in test_inputs]


# Pad and truncate the input_ids and attention_mask to a fixed length

train_inputs = pad_sequences(train_input_ids, maxlen=MAX_LEN, dtype='long', 
                             value=0, truncating='post', padding='post')
test_inputs = pad_sequences(test_input_ids, maxlen=MAX_LEN, dtype='long', 
                             value=0, truncating='post', padding='post')
train_masks = pad_sequences(train_masks, maxlen=MAX_LEN, dtype='long', 
                             value=0, truncating='post', padding='post')
test_masks = pad_sequences(test_masks, maxlen=MAX_LEN, dtype='long', 
                             value=0, truncating='post', padding='post')

#Define Dataloader
batch_size = 32

train_data = TensorDataset(torch.tensor(train_inputs), torch.tensor(train_masks), 
                           torch.tensor(train_toxicity_labels), torch.tensor(train_emotion_labels))
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

test_data = TensorDataset(torch.tensor(test_inputs), torch.tensor(test_masks), 
                          torch.tensor(test_toxicity_labels), torch.tensor(test_emotion_labels))
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)


## Config the model and intantiate

In [10]:
model = MultiTaskModel()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


MultiTaskModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_af

In [11]:
optimizer = optim.Adam(model.parameters(), lr=2e-5, eps=1e-8)
criterion = nn.CrossEntropyLoss()
epoch = 20 
for epoch in range(epoch):
    for step, batch in enumerate(train_dataloader):
        input_ids = batch[0].to(device)
        attention_mask = batch[1].to(device)
        toxicity_labels = batch[2].to(device)
        emotion_labels = batch[3].to(device)
        
        toxicity_logits, emotion_logits,  toxicity_probs , emotion_probs = model(input_ids, attention_mask)
        
        optimizer.zero_grad()


        toxicity_loss = criterion(toxicity_logits, toxicity_labels)
        emotion_loss = criterion(emotion_logits, emotion_labels)
       

        loss = toxicity_loss + emotion_loss

        loss.backward()

        optimizer.step()

        print(f"Epoch: {epoch}, Step: {step}, Loss: {loss.item()}")

Epoch: 0, Step: 0, Loss: 11.021903991699219
Epoch: 1, Step: 0, Loss: 11.389787673950195
Epoch: 2, Step: 0, Loss: 11.110596656799316
Epoch: 3, Step: 0, Loss: 10.880319595336914
Epoch: 4, Step: 0, Loss: 10.860169410705566
Epoch: 5, Step: 0, Loss: 10.729597091674805
Epoch: 6, Step: 0, Loss: 10.76536750793457
Epoch: 7, Step: 0, Loss: 10.663823127746582
Epoch: 8, Step: 0, Loss: 10.63259220123291
Epoch: 9, Step: 0, Loss: 10.589326858520508
Epoch: 10, Step: 0, Loss: 10.412817001342773
Epoch: 11, Step: 0, Loss: 10.261810302734375
Epoch: 12, Step: 0, Loss: 10.630132675170898
Epoch: 13, Step: 0, Loss: 10.528005599975586
Epoch: 14, Step: 0, Loss: 10.492459297180176
Epoch: 15, Step: 0, Loss: 10.387003898620605
Epoch: 16, Step: 0, Loss: 10.423505783081055
Epoch: 17, Step: 0, Loss: 10.223806381225586
Epoch: 18, Step: 0, Loss: 10.16076374053955
Epoch: 19, Step: 0, Loss: 10.309852600097656


# Evaluation

In [12]:
model.eval()



MultiTaskModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_af