In [7]:
!pip install transformers



In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import BertTokenizer, BartForConditionalGeneration, BartTokenizer
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from transformers import BertModel, AdamW
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm

In [2]:
# Load the dataset
data = pd.read_excel('//content/5247-rows_3-Emotions_No-Type.xlsx')

In [3]:
# Extract input (Utterance) and target (Emotion)
X = data['Utterance'].values
y = data['Emotion'].values

In [4]:
# Label encode 'Emotion' with new values [-1, 0, 1]
label_encoder_emotion = LabelEncoder()
y = label_encoder_emotion.fit_transform(y)

In [5]:
# Label encode 'Dialogue_Act'
label_encoder_dialogue_act = LabelEncoder()
dialogue_act_encoded = label_encoder_dialogue_act.fit_transform(data['Dialogue_Act'])

In [6]:
# Train-test split
X_train, X_test, y_train, y_test, dialogue_act_train, dialogue_act_test = train_test_split(
    X, y, dialogue_act_encoded, test_size=0.2, random_state=42
)

In [8]:
# Initialize BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [9]:
# Tokenize the utterances
def tokenize_data(text_list):
    return tokenizer(
        text_list,
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors='pt'
    )

In [10]:
train_encodings = tokenize_data(X_train.astype(str).tolist())
test_encodings = tokenize_data(X_test.astype(str).tolist())

In [11]:
# PyTorch Dataset Class
class EmotionDataset(Dataset):
    def __init__(self, encodings, dialogue_act, labels):
        self.encodings = encodings
        self.dialogue_act = dialogue_act
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['dialogue_act'] = torch.tensor(self.dialogue_act[idx], dtype=torch.long)
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.labels)

In [12]:
# Initialize datasets and data loaders
train_dataset = EmotionDataset(train_encodings, dialogue_act_train, y_train)
test_dataset = EmotionDataset(test_encodings, dialogue_act_test, y_test)

In [13]:
# Custom BERT Model with Embeddings for Dialogue Act
class BertWithAdditionalFeatures(nn.Module):
    def __init__(self, bert_model, dialogue_act_vocab_size, embedding_dim, num_labels):
        super(BertWithAdditionalFeatures, self).__init__()
        self.bert = bert_model
        self.dialogue_act_embedding = nn.Embedding(dialogue_act_vocab_size, embedding_dim)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(bert_model.config.hidden_size + embedding_dim, num_labels)

    def forward(self, input_ids, attention_mask, dialogue_act):
        # Get BERT embeddings
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs[1]

        # Get embedding for Dialogue Act
        dialogue_act_embedded = self.dialogue_act_embedding(dialogue_act)

        # Concatenate BERT output with Dialogue Act embeddings
        combined_output = torch.cat((pooled_output, dialogue_act_embedded), dim=1)

        # Pass through fully connected layer
        output = self.fc(self.dropout(combined_output))
        return output

In [14]:
# Training parameters
batch_size = 16
learning_rate = 5e-5
epochs = 3

In [15]:
# Initialize data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [16]:
# Initialize the BERT model and the custom model
bert_model = BertModel.from_pretrained('bert-base-uncased')
model = BertWithAdditionalFeatures(
    bert_model=bert_model,
    dialogue_act_vocab_size=len(label_encoder_dialogue_act.classes_),
    embedding_dim=16,
    num_labels=len(label_encoder_emotion.classes_)
)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [17]:
# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

BertWithAdditionalFeatures(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

In [18]:
from torch.optim import AdamW  # Import AdamW from torch.optim

# Initialize optimizer and loss function
optimizer = AdamW(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [19]:
# Training loop
model.train()
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    for batch in tqdm(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}

        outputs = model(
            input_ids=batch['input_ids'],
            attention_mask=batch['attention_mask'],
            dialogue_act=batch['dialogue_act']
        )
        loss = criterion(outputs, batch['labels'])
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    print(f"Loss: {loss.item()}")

Epoch 1/3


100%|██████████| 263/263 [01:28<00:00,  2.96it/s]


Loss: 0.8956910967826843
Epoch 2/3


100%|██████████| 263/263 [01:29<00:00,  2.94it/s]


Loss: 0.05311797186732292
Epoch 3/3


100%|██████████| 263/263 [01:29<00:00,  2.94it/s]

Loss: 0.032317787408828735





In [20]:
# Evaluation
model.eval()
predictions, true_labels = [], []

with torch.no_grad():
    for batch in test_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(
            input_ids=batch['input_ids'],
            attention_mask=batch['attention_mask'],
            dialogue_act=batch['dialogue_act']
        )

        logits = outputs
        predictions.extend(torch.argmax(logits, dim=-1).cpu().numpy())
        true_labels.extend(batch['labels'].cpu().numpy())

In [21]:
# Calculate accuracy and classification report
accuracy = accuracy_score(true_labels, predictions)
print(f"Test Accuracy: {accuracy:.4f}")
target_names = [str(class_) for class_ in label_encoder_emotion.classes_]
report = classification_report(true_labels, predictions, target_names=target_names)
print(report)

Test Accuracy: 0.8533
              precision    recall  f1-score   support

          -1       0.64      0.73      0.68       212
           0       0.92      0.90      0.91       827
           1       0.00      0.00      0.00        11

    accuracy                           0.85      1050
   macro avg       0.52      0.54      0.53      1050
weighted avg       0.85      0.85      0.85      1050



In [23]:
# BART Summarization (for new samples longer than 15 words)
bart_model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

model.safetensors:  14%|#4        | 231M/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [35]:
def summarize_text(utterance):
    if len(utterance.split()) > 30:
        inputs = bart_tokenizer(utterance, max_length=1024, return_tensors='pt', truncation=True)
        summary_ids = bart_model.generate(inputs['input_ids'], max_length=50, min_length=15, length_penalty=2.0, num_beams=4, early_stopping=True)
        return bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return utterance

In [72]:
# Example prediction on new sample with summarization
new_sample = {
    "Utterance": "I love you Bhakti",
    "Dialogue_Act": "op"
}

In [73]:
# Summarize the utterance if necessary
summarized_utterance = summarize_text(new_sample['Utterance'])
print(f"Summarized Utterance: {summarized_utterance}")

Summarized Utterance: I love you Bhakti


In [74]:
# Tokenize and encode Dialogue Act
new_utterance_encoding = tokenize_data([summarized_utterance])
encoded_dialogue_act = torch.tensor(label_encoder_dialogue_act.transform([new_sample['Dialogue_Act']]), dtype=torch.long)

In [75]:
# Ensure the model is in evaluation mode
model.eval()

BertWithAdditionalFeatures(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

In [76]:
with torch.no_grad():
    input_ids = new_utterance_encoding['input_ids'].to(device)
    attention_mask = new_utterance_encoding['attention_mask'].to(device)
    dialogue_act = encoded_dialogue_act.to(device)

    output = model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        dialogue_act=dialogue_act
    )

    predicted_emotion_idx = torch.argmax(output, dim=1).cpu().numpy()[0]
    predicted_emotion = label_encoder_emotion.inverse_transform([predicted_emotion_idx])[0]

print(f"Predicted Emotion: {predicted_emotion}")

Predicted Emotion: 0
