# Install necessary packages

In [None]:
!pip install datasets
!pip install evaluate
!pip install transformers
!pip install sentencepiece
!pip install --upgrade pip
!pip install tqdm

# About this project
- This project was the final project of Natural Language Processing course (CSCI 5832) at University of Colorado Boulder
- The main purpose of the final project was to solve a subset of one of [SemEval-2024 Tasks](https://semeval.github.io/SemEval2024/tasks.html).
- This project is in particular focusing on the subtask 1 of [Task 3: The Competition of Multimodal Emotion Cause Analysis in Conversations](https://nustm.github.io/SemEval-2024_ECAC/).
- The project team members are:
  1. Jooseok Lee
  2. Seungwook Lee
- Simplified from the original project

# Introduction
- In this project, we aimed to solve the problem of textual Emotion-Cause Pair Extraction (ECPE), which is the first sub-task of SemEval-2024 Task 3,  using text classification and question answering framework.
- The main purpose of textual ECPE is to find all sets of emotion-cause pairs where each utterance (i.e., small subset of a conversation) is matched with a single or multiple textual cause span(s) along with its emotional category.


<div align="center">
    <img src="img/ECPE_overview.jpg" alt="Overview of ECPE" width="500">
</div>

# Approach
- While the original paper solved this problem using a single solution, in this project we utilized two separate natural language processing (NLP) frameworks to solve it; text classification and question answering.
- That is, we splitted the original problem into two separate sub problems and solved them independently.
- In our approach, the text classification model is responsible for determining the emotional category of a given utterance.
- A single utterance is given to a classification model and it predicts the six emotional categories of the utterance (i.e., Anger, Disgust, Fear, Joy, Sadness and Surprise).
- We fine-tuned the publicly available text classification large language model (LLM) (i.e., BERT classification model) to solve this sub-problem.
- Then, we utilized a question answering model to find textual cause span(s) of a given utterance.
- In particular, we changed our data to Stanford Question Answering Dataset (SQuAD) format to fine-tune publicly available question answering LLM (i.e., DistilBERT question answering model).

<div align="center">
    <img src="img/Approach.jpg" alt="Overview of ECPE" width="500">
</div>

# Data Load
Load the original json data

In [None]:
# Generalized code for handling file path
try:
    import google.colab
    IN_COLAB = True

    import sys
    sys.path.append('/content/drive/My Drive/PersonalPage/ECPE-with-BERT')
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Mount Google Drive (optional, if you need to access files there)
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    # Define the data path (e.g., in a specific folder in Google Drive)
    data_path = "/content/drive/My Drive/PersonalPage/ECPE-with-BERT/data"
else:
    # Define the local data path
    data_path = "/data"

In [None]:
import json
from utils.preprocess import process_json_file

json_data = data_path + '/Subtask_1_train.json'

# Replace 'conversation.json' with the path to your JSON file
conversation_ids, conversations, emotion_labels = process_json_file(json_data)

# Text Classification
- In this assignment, we focus on one aspect of text classification: sentiment analysis.
- Sentiment analysis involves categorizing the emotional aspect of a given sentence or paragraph, identifying various emotional states such as positive, negative, or neutral.
- This type of analysis plays a crucial role in understanding the emotions and attitudes of users from various text sources like customer feedback, online reviews, and social media posts.
- Sentiment analysis has established itself as an important tool in understanding human emotions and attitudes through text data and is applied in various fields, including improving customer service, analyzing product reviews, public opinion research, and market analysis.


## Data Preprocessing
- For sentiment analysis, it is essential to first classify the sentences and the emotions (labels) felt in those sentences.
- Our data structure consists of conversations.
- Each conversation includes several utterances and the emotions felt in those utterances.
- The first step involves extracting the 'conversation' item from the JSON formatted data.
- Subsequently, each utterance within the conversation and its corresponding emotional label are classified.
- The classified emotional labels, which are a total of seven, are then converted into numbers for ease of model processing.
- Following this, the process of tokenization of the utterances is carried out.
- This step involves breaking down the text into smaller units known as tokens, which is crucial for transforming the text data into a format that can be processed by the model.
- For this task, we use the ALBERT model, hence, we employ the Hugging Face Transformer library for optimized tokenization.
- This library provides a tokenization method tailored to the ALBERT model, assisting the model in processing each utterance more accurately and effectively.


In [None]:
# Emotion to number mapping
emotion_to_number = {
    'joy': 0,
    'sadness': 1,
    'disgust': 2,
    'fear': 3,
    'anger': 4,
    'neutral': 5,
    'surprise': 6
}

# Function to map an emotion to a number
def map_emotion_to_number(emotion):
    return emotion_to_number.get(emotion, -1)  # Returns -1 if emotion is not found

# Example usage
emotion = 'joy'
mapped_number = map_emotion_to_number(emotion)
print(f"The emotion '{emotion}' is mapped to number {mapped_number}")

In [None]:
list_utterance = []
list_emotion = []

for emotions in emotion_labels:
  for emotion in emotions:
    list_emotion.append(emotion)

for conversation in conversations:
  for utterance in conversation:
    list_utterance.append(utterance)

print(len(list_emotion), ':', len(list_utterance))

In [None]:
from sklearn.model_selection import train_test_split

# Combine list_emotion and list_utterance
data = list(zip(list_utterance, list_emotion))

# Test set split
train_val_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Validation set split
train_data, val_data = train_test_split(train_val_data, test_size=0.25, random_state=42)

# Split emotions from utterances for model training
train_utterances, train_emotions = zip(*train_data)
val_utterances, val_emotions = zip(*val_data)
test_utterances, test_emotions = zip(*test_data)

# Model Training
- In the process of model development, we adopted the fine-tuning approach using the pre-trained 'AlbertForSequenceClassification' model.
- This method involves adapting an existing model to suit specific datasets and requirements.
- A key change in the fine-tuning process was configuring the model to match the number of labels contained in our data. Since our dataset distinctly identifies seven emotional states, the model was set up to classify these seven categories.
- By utilizing the 'AlbertForSequenceClassification', we were able to leverage the advantages of the ALBERT model, known for its efficiency and effectiveness in language classification tasks.
- The ALBERT model employs the attention mechanism of the BERT model, which aids in understanding the nuanced use of words within sentences.
- This is particularly effective in environments with limited computing resources, as ALBERT can deliver similar performance.
- This is due to its smaller size, faster training speed, and lower memory usage.
- The fine-tuning process demonstrates the flexibility and adaptability of pre-trained models in natural language processing, proving to be highly efficient for specific applications like ours.


In [None]:
# Imports libraries
import torch
from transformers import AutoTokenizer
from typing import Dict, List
import random
from tqdm.autonotebook import tqdm
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import pickle
import os
from transformers import BertTokenizer
from transformers import BertModel, AdamW
from torch import nn

from utils.model import SentimentDataBert
from utils.model import SentimentClassifier

In [None]:
# Prepare dataset
dataset = SentimentDataBert(train_utterances, train_emotions)
data_loader = dataset.get_data_loaders(batch_size=32, shuffle=True)

In [None]:
# Checkpoint saving function
def save_checkpoint(model, optimizer, epoch, filename="checkpoint.pth"):
    checkpoint = {
        "state_dict": model.state_dict(),
        "optimizer": optimizer.state_dict(),
        "epoch": epoch
    }
    torch.save(checkpoint, filename)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Model define
model = SentimentClassifier(n_classes=len(list_emotion)).to(device)

In [None]:
optimizer = AdamW(model.parameters(), lr=0.01, correct_bias=False)
loss_fn = nn.CrossEntropyLoss()

# Training Loop
for epoch in range(1):
    model.train()
    total_loss = 0

    for batch in tqdm(data_loader, desc=f"Epoch {epoch + 1}"):
        input_ids = batch[0].to(device)
        attention_mask = batch[1].to(device)
        labels = batch[2].to(device)

        # forward pass
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = loss_fn(outputs, labels)

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Average Loss: {total_loss / len(data_loader)}")

In [None]:
from utils.model import evaluate_model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

val_dataset = SentimentDataBert(val_utterances, val_emotions)
val_loader = val_dataset.get_data_loaders(batch_size=32, shuffle=True)

predictions, true_labels = evaluate_model(model, val_loader, device='cuda')

accuracy = accuracy_score(true_labels, predictions)
precision = precision_score(true_labels, predictions, average='weighted')
recall = recall_score(true_labels, predictions, average='weighted')
f1 = f1_score(true_labels, predictions, average='weighted')

## Prediction

In [None]:
test_dataset = SentimentDataBert(test_utterances, test_emotions)
test_loader = test_dataset.get_data_loaders(batch_size=32, shuffle=True)

predictions, true_labels = evaluate_model(model, test_loader, device='cuda')

accuracy = accuracy_score(true_labels, predictions)
precision = precision_score(true_labels, predictions, average='weighted')
recall = recall_score(true_labels, predictions, average='weighted')
f1 = f1_score(true_labels, predictions, average='weighted')