# Analyzing .csv files

In [3]:
import pandas as pd

In [None]:
train_sent_emo = pd.read_csv('/kaggle/input/meld-emotion-recognition/MELD.Raw/train/train_sent_emo.csv')
dev_sent_emo = pd.read_csv('/kaggle/input/meld-emotion-recognition/MELD.Raw/dev_sent_emo.csv')
test_sent_emo = pd.read_csv('/kaggle/input/meld-emotion-recognition/MELD.Raw/test_sent_emo.csv')

In [None]:
# train_sent_emo.head(5)

In [None]:
# dev_sent_emo.head(5)

In [None]:
# test_sent_emo.head(5)

---

# Functions to Save and Read Data in JSON Format Using Python

In [4]:
import json

In [5]:
def save_to_json(data, file_path):
    """
    Save data to a JSON file.
    
    Args:
    - data: Python object (e.g., dict or list) to save.
    - file_path: Path to the JSON file.
    """
    try:
        with open(file_path, "w") as json_file:
            json.dump(data, json_file, indent=4)  # Save with pretty formatting
        print(f"Data successfully saved to {file_path}")
    except Exception as e:
        print(f"Error saving data to JSON: {e}")

In [6]:
def read_from_json(file_path):
    """
    Read data from a JSON file.
    
    Args:
    - file_path: Path to the JSON file.
    
    Returns:
    - The Python object (e.g., dict or list) loaded from the JSON file.
    """
    try:
        with open(file_path, "r") as json_file:
            data = json.load(json_file)
        print(f"Data successfully loaded from {file_path}")
        return data
    except Exception as e:
        print(f"Error reading data from JSON: {e}")
        return None

---

# Structuring MELD Data for Emotion Recognition 

In [None]:
# MELD Data for Emotion Recognition (MELD_Data.json)

"""

MELD_data = {
    "data": [
        {
            "text": "also I was the point person on my company\u0092s transition from the KL-5 to GR-6 system.",
            "split": "train",
            "y": 0,
            "dialog": 0,
            "utterance": 0,
            "season": 8,
            "episode": 21,
            "num_words": 16,
            "dia_utt": "0_0",
            "speaker": "Chandler"
        },
        {
            "text": "You must\u0092ve had your hands full.",
            "split": "train",
            "y": 0,
            "dialog": 0,
            "utterance": 1,
            "season": 8,
            "episode": 21,
            "num_words": 6,
            "dia_utt": "0_1",
            "speaker": "The Interviewer"
        }, 

        .................................................. (so on)

        {
            "text": "Oh my God, he\u0092s lost it. He\u0092s totally lost it.",
            "split": "dev",
            "y": 3,
            "dialog": 0,
            "utterance": 0,
            "season": 4,
            "episode": 7,
            "num_words": 10,
            "dia_utt": "0_0",
            "speaker": "Phoebe"
        },
        {
            "text": "What?",
            "split": "dev",
            "y": 1,
            "dialog": 0,
            "utterance": 1,
            "season": 4,
            "episode": 7,
            "num_words": 1,
            "dia_utt": "0_1",
            "speaker": "Monica"
        },

        .................................................. (so on)

        {
            "text": "Why do all you\u0092re coffee mugs have numbers on the bottom?",
            "split": "test",
            "y": 1,
            "dialog": 0,
            "utterance": 0,
            "season": 3,
            "episode": 19,
            "num_words": 11,
            "dia_utt": "0_0",
            "speaker": "Mark"
        },
        {
            "text": "Oh. That\u0092s so Monica can keep track. That way if one on them is missing, she can be like, \u0091Where\u0092s number 27?!\u0092",
            "split": "test",
            "y": 6,
            "dialog": 0,
            "utterance": 1,
            "season": 3,
            "episode": 19,
            "num_words": 22,
            "dia_utt": "0_1",
            "speaker": "Rachel"
        },

        .................................................. (so on)

             ],
    
    "max_sentence_length": 30,
    "label_index": {
        "neutral": 0,
        "surprise": 1,
        "fear": 2,
        "sadness": 3,
        "joy": 4,
        "disgust": 5,
        "anger": 6
    }
    
"""

In [None]:
import pandas as pd

# Read the CSV files
train_df = pd.read_csv('/kaggle/input/meld-emotion-recognition/MELD.Raw/train/train_sent_emo.csv')
dev_df = pd.read_csv('/kaggle/input/meld-emotion-recognition/MELD.Raw/dev_sent_emo.csv')
test_df = pd.read_csv('/kaggle/input/meld-emotion-recognition/MELD.Raw/test_sent_emo.csv')

# Concatenate all data into a single DataFrame
df = pd.concat([train_df, dev_df, test_df])

# Define the label index for emotions
label_index = {
    "neutral": 0,
    "surprise": 1,
    "fear": 2,
    "sadness": 3,
    "joy": 4,
    "disgust": 5,
    "anger": 6
}

# Initialize the data structure
MELD_data = {
    "data": [],
    "max_sentence_length": 30,
    "label_index": label_index
}

# Helper function to find the split by comparing rows
def get_split(row):
    # Drop 'Sr No.' from the DataFrame and compare the rest of the columns
    row_without_srno = row.drop('Sr No.')
    
    # Check if the row exists in the train_df (excluding 'Sr No.')
    if (train_df.drop('Sr No.', axis=1) == row_without_srno).all(axis=1).any():
        return "train"
    
    # Check if the row exists in the dev_df (excluding 'Sr No.')
    elif (dev_df.drop('Sr No.', axis=1) == row_without_srno).all(axis=1).any():
        return "dev"
    
    # Check if the row exists in the test_df (excluding 'Sr No.')
    elif (test_df.drop('Sr No.', axis=1) == row_without_srno).all(axis=1).any():
        return "test"
    
    else:
        return None




# Process each row in the DataFrame and append to the data format
for index, row in df.iterrows():
    # Get the split using the helper function
    split = get_split(row)

    if split is None:
        print(f"Row mismatch: {row['Season']}, {row['Episode']}, {row['Dialogue_ID']}, {row['Utterance_ID']}")
        continue  # Skip rows that don't match any split
    
    entry = {
        "text": row["Utterance"],
        "split": split,
        "y": label_index.get(row["Emotion"].lower(), -1),  # Get emotion label
        "dialog": row["Dialogue_ID"],
        "utterance": row["Utterance_ID"],
        "season": row["Season"],
        "episode": row["Episode"],
        "num_words": len(row["Utterance"].split()),
        "dia_utt": f"{row['Dialogue_ID']}_{row['Utterance_ID']}",
        "speaker": row["Speaker"]
    }
    
    # Append the entry to the data
    MELD_data["data"].append(entry)

---

## Save MELD Data in JSON Format 

In [None]:
# Example Usage
if __name__ == "__main__":
    # Filepath
    MELD_data_file_path = "/kaggle/working/MELD_Data.json"

    # Save data to JSON
    save_to_json(MELD_data, MELD_data_file_path)

---

## Read MELD Data in JSON Format 

In [5]:
# Example Usage
if __name__ == "__main__":
    # Filepath
    MELD_data_file_path = "/kaggle/working/MELD_Data.json"

    # Read data from JSON
    MELD_data = read_from_json(MELD_data_file_path)
    print("Loaded Data:", MELD_data)

Error reading data from JSON: [Errno 2] No such file or directory: '/kaggle/working/MELD_Data.json'
Loaded Data: None


---

# Structuring Text Data for Emotion Recognition 

In [None]:
# Text Data for Emotion Recognition (MELD_Textual_Data.json)

"""

MELD_textual_data = {
    "train": [
        {
            "0_0": "also I was the point person on my company\u0092s transition from the KL-5 to GR-6 system.",
            "y": 0,
            "label": "neutral"
        },
        {
            "0_1": "You must\u0092ve had your hands full.",
            "y": 0,
            "label": "neutral"
        },
        {
            "0_2": "That I did. That I did.",
            "y": 0,
            "label": "neutral"
        },

        .................................................. (so on)

        ],
    
    "dev": [
        {
            "0_0": "Oh my God, he\u0092s lost it. He\u0092s totally lost it.",
            "y": 3,
            "label": "sadness"
        },
        {
            "0_1": "What?",
            "y": 1,
            "label": "surprise"
        },
        {
            "1_0": "Or! Or, we could go to the bank, close our accounts and cut them off at the source.",
            "y": 0,
            "label": "neutral"
        },

        .................................................. (so on)

        ],
    
    "test": [
        {
            "0_0": "Why do all you\u0092re coffee mugs have numbers on the bottom?",
            "y": 1,
            "label": "surprise"
        },
        {
            "0_1": "Oh. That\u0092s so Monica can keep track. That way if one on them is missing, she can be like, \u0091Where\u0092s number 27?!\u0092",
            "y": 6,
            "label": "anger"
        },
        {
            "0_2": "Y'know what?",
            "y": 0,
            "label": "neutral"
        },

        .................................................. (so on)

        ]
}

"""

In [None]:
import os

# Initialize the text_data dictionary
MELD_textual_data = {
    "train": [],
    "dev": [],
    "test": []
}


# Loop through the data_format and organize text accordingly
for split in ['train', 'dev', 'test']:
    for entry in MELD_data["data"]:
        if entry["split"] == split:  # Check if the split matches
            # Construct the key for text (e.g., "0_0")
            text_key = entry["dia_utt"]
            
            
            # Extract the emotion label and index from data_format
            emotion_label = list(MELD_data["label_index"].keys())[entry["y"]]
            # Create the dictionary for the text entry
            text_entry = {
                text_key: entry["text"],
                "y": entry["y"],
                "label": emotion_label
            }
            
            # Add the text entry to the appropriate split in text_data
            MELD_textual_data[split].append(text_entry)

---

## Save MELD Textual Data in JSON Format 

In [None]:
# Example Usage
if __name__ == "__main__":
    # Filepath
    MELD_textual_data_file_path = "/kaggle/working/MELD_Textual_Data.json"

    # Save data to JSON
    save_to_json(MELD_textual_data, MELD_textual_data_file_path)

---

## Read MELD Textual Data in JSON Format 

In [None]:
# Example Usage
if __name__ == "__main__":
    # Filepath
    MELD_textual_data_file_path = "/kaggle/working/MELD_Textual_Data.json"

    # Read data from JSON
    MELD_textual_data = read_from_json(MELD_textual_data_file_path)
    print("Loaded Data:", MELD_textual_data)

---

# Structuring Video Data for Emotion Recognition with Flexible Filename Matching

In [None]:
# Video Data for Emotion Recognition (MELD_Video_Data.json)

"""

MELD_video_data = {
    "train": [
        {
            "0_0": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/train/train_splits/dia0_utt0.mp4",
            "y": 0,
            "label": "neutral"
        },
        {
            "0_1": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/train/train_splits/dia0_utt1.mp4",
            "y": 0,
            "label": "neutral"
        },
        {
            "0_2": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/train/train_splits/dia0_utt2.mp4",
            "y": 0,
            "label": "neutral"
        },

        .................................................. (so on)

        ],

    "dev": [
        {
            "0_0": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/dev/dev_splits_complete/dia0_utt0.mp4",
            "y": 3,
            "label": "sadness"
        },
        {
            "0_1": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/dev/dev_splits_complete/dia0_utt1.mp4",
            "y": 1,
            "label": "surprise"
        },
        {
            "1_0": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/dev/dev_splits_complete/dia1_utt0.mp4",
            "y": 0,
            "label": "neutral"
        },
        
        .................................................. (so on)
        
        ],

    "test": [
        {
            "0_0": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/test/output_repeated_splits_test/dia0_utt0.mp4",
            "y": 1,
            "label": "surprise"
        },
        {
            "0_1": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/test/output_repeated_splits_test/dia0_utt1.mp4",
            "y": 6,
            "label": "anger"
        },
        {
            "0_2": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/test/output_repeated_splits_test/dia0_utt2.mp4",
            "y": 0,
            "label": "neutral"
        },

        .................................................. (so on)

        ]
    }

"""

In [8]:
import os

# Initialize the video_data dictionary
MELD_video_data = {
    "train": [],
    "dev": [],
    "test": []
}

# Define the paths for video files
video_paths = {
    "train": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/train/train_splits/",
    "dev": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/dev/dev_splits_complete/",
    "test": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/test/output_repeated_splits_test/"
}

# Loop through the data_format and organize videos accordingly
for split in ['train', 'dev', 'test']:
    for entry in MELD_data["data"]:
        if entry["split"] == split:  # Check if the split matches
            # Construct the key for video (e.g., "0_0")
            video_key = entry["dia_utt"]
            
            # Convert video_key from "0_0" to "dia0_utt0" format
            video_filename = f"dia{video_key.replace('_', '_utt')}.mp4"
            
            
            # Construct the path for the video
            video_path = os.path.join(video_paths[split], video_filename)
            
            # Extract the emotion label and index from data_format
            emotion_label = list(MELD_data["label_index"].keys())[entry["y"]]
            # Create the dictionary for the video entry
            video_entry = {
                video_key: video_path,
                "y": entry["y"],
                "label": emotion_label
            }
            
            # Add the video entry to the appropriate split in video_data
            MELD_video_data[split].append(video_entry)

---

## Save MELD Video Data in JSON Format 

In [9]:
# Example Usage
if __name__ == "__main__":
    # Filepath
    MELD_video_data_file_path = "/kaggle/working/MELD_Video_Data.json"

    # Save data to JSON
    save_to_json(MELD_video_data, MELD_video_data_file_path)

Data successfully saved to /kaggle/working/MELD_Video_Data.json


---

## Read MELD Video Data in JSON Format 

In [10]:
# Example Usage
if __name__ == "__main__":
    # Filepath
    MELD_video_data_file_path = "/kaggle/working/MELD_Video_Data.json"

    # Read data from JSON
    MELD_video_data = read_from_json(MELD_video_data_file_path)
    print("Loaded Data:", MELD_video_data)

Data successfully loaded from /kaggle/working/MELD_Video_Data.json
Loaded Data: {'train': [{'0_0': '/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/train/train_splits/dia0_utt0.mp4', 'y': 0, 'label': 'neutral'}, {'0_1': '/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/train/train_splits/dia0_utt1.mp4', 'y': 0, 'label': 'neutral'}, {'0_2': '/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/train/train_splits/dia0_utt2.mp4', 'y': 0, 'label': 'neutral'}, {'0_3': '/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/train/train_splits/dia0_utt3.mp4', 'y': 0, 'label': 'neutral'}, {'0_4': '/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/train/train_splits/dia0_utt4.mp4', 'y': 1, 'label': 'surprise'}, {'0_5': '/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/train/train_splits/dia0_utt5.mp4', 'y': 0, 'label': 'neutral'}, {'0_6': '/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/train/train_splits/dia0_utt6.mp4', 'y': 0, 'label': 'neutral'}