### Importing necessary libraries

In [1]:
import nltk
nltk.download('punkt')
nltk.download('vader_lexicon')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\su1qt\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\su1qt\AppData\Roaming\nltk_data...


True

In [2]:
import pandas as pd
import numpy as np
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import vaderSentiment
from transformers import pipeline


In [18]:
import os
# Define the paths
scripts_raw_path = r'C:\Users\su1qt\Significant Projects\Honors Research\data\scripts_raw'  # Adjusted path
dialogues_path = r'C:\Users\su1qt\Significant Projects\Honors Research\data\dialogues'      # Output directory

# Ensure the output directory exists
if not os.path.exists(dialogues_path):
    os.makedirs(dialogues_path)

# Define the path to the Mulan script
script_filename = 'mulan_script.txt'
script_path = os.path.join(scripts_raw_path, script_filename)

# Verify that the script file exists
if not os.path.isfile(script_path):
    print(f"Error: The file {script_path} does not exist.")
else:
    print(f"Processing script: {script_path}")



Processing script: C:\Users\su1qt\Significant Projects\Honors Research\data\scripts_raw\mulan_script.txt


In [24]:
import re
character_pattern = re.compile(r'^[A-Z][A-Z\s\-#\d]*:$')
dialogue_pattern = re.compile(r'^([A-Za-z][A-Za-z\s\-\d#]*)(?:\s*\[.*?\])?:\s*(.*)')
scene_pattern = re.compile(r'^\[.*\]$')


### Dialogue extraction function

In [25]:
def extract_dialogues(script_path):
    dialogues = []
    with open(script_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
    
    current_dialogue = None
    
    for line_number, line in enumerate(lines, start=1):
        line = line.strip()
        
        # Skip empty lines
        if not line:
            continue
        
        # Skip scene descriptions that are in square brackets and do not have dialogue
        if line.startswith('[') and line.endswith(']'):
            continue
        
        # Try to match dialogue lines
        match = re.match(r'^([A-Za-z][A-Za-z\s\-\d#]*)(?:\s*\[.*?\])?:\s*(.*)', line)
        if match:
            character = match.group(1)
            dialogue = match.group(2)
            
            # Remove any stage directions within the dialogue
            dialogue = re.sub(r'\[.*?\]|\(.*?\)', '', dialogue).strip()
            
            if dialogue:
                character = normalize_character_name(character)
                dialogues.append({
                    'Character': character,
                    'Dialogue': dialogue
                })
                current_dialogue = None
            else:
                current_dialogue = {
                    'Character': character,
                    'Dialogue': ''
                }
        else:
            # If current_dialogue is not None, append this line to it
            if current_dialogue is not None:
                # Remove any stage directions within the line
                dialogue_line = re.sub(r'\[.*?\]|\(.*?\)', '', line).strip()
                if dialogue_line:
                    current_dialogue['Dialogue'] += ' ' + dialogue_line
            else:
                # Maybe it's a continuation of the previous dialogue
                if dialogues:
                    last_dialogue = dialogues[-1]
                    dialogue_line = re.sub(r'\[.*?\]|\(.*?\)', '', line).strip()
                    if dialogue_line:
                        last_dialogue['Dialogue'] += ' ' + dialogue_line
    
    # Debug: Print the number of dialogues extracted
    print(f"Extracted {len(dialogues)} dialogues from the script.")
    
    return dialogues


## Creating Dataframe

In [26]:
# Extract dialogues from the script
if os.path.isfile(script_path):
    mulan_dialogues = extract_dialogues(script_path)
    
    # Debug: Print the number of dialogues extracted
    print(f"Number of dialogues extracted: {len(mulan_dialogues)}")
    
    # Debug: Print the first few dialogues
    print("First few dialogues extracted:")
    for dialogue in mulan_dialogues[:5]:
        print(dialogue)
    
    # Add film title to each dialogue entry
    for dialogue in mulan_dialogues:
        dialogue['Film Title'] = 'Mulan'
    
    # Create a DataFrame
    df_mulan_dialogues = pd.DataFrame(mulan_dialogues)
    
    # Debug: Print DataFrame columns
    print("DataFrame columns:", df_mulan_dialogues.columns.tolist())
    
    # Rearrange columns if they exist
    expected_columns = ['Film Title', 'Character', 'Dialogue']
    existing_columns = df_mulan_dialogues.columns.tolist()
    columns_to_select = [col for col in expected_columns if col in existing_columns]
    
    if columns_to_select:
        df_mulan_dialogues = df_mulan_dialogues[columns_to_select]
    else:
        print("Expected columns are not in the DataFrame.")
        print("Available columns:", existing_columns)
    
    # Remove empty dialogues
    df_mulan_dialogues = df_mulan_dialogues[df_mulan_dialogues['Dialogue'] != '']
    
    # Reset index
    df_mulan_dialogues.reset_index(drop=True, inplace=True)
    
    # Save to CSV
    output_file = os.path.join(dialogues_path, 'mulan_dialogues.csv')
    df_mulan_dialogues.to_csv(output_file, index=False)
    
    print(f"Extracted dialogues saved to {output_file}")
else:
    print(f"Cannot find the script file at {script_path}. Please check the file path.")

Extracted 482 dialogues from the script.
Number of dialogues extracted: 482
First few dialogues extracted:
{'Character': 'Guard', 'Dialogue': "We're under attack!  Light the signal! [Guard runs to the tower and up the ladder as Hun Bald Man #1 and Hun Long Hair Man appear trying to stop him.  Hun Bald Man #1 breaks the ladder with his sword just as Guard reaches the top.  The guard picks up the torch to light the fire and sees Shan-Yu jump over the edge of the tower and looks at him across from the caldron.  The guard throws the torch into the caldron lighting a large fire.  Shan-Yu watches as each tower lights their caldrons one by one]"}
{'Character': 'Guard', 'Dialogue': "Now all of China knows you're here."}
{'Character': 'Shan-Yu', 'Dialogue': 'Perfect. [Cut to the palace.  The large doors to the central chamber open as General Li walks in flanked on his left and right by soldiers and approaches the Emperor. He bows, then looks up]'}
{'Character': 'General Li', 'Dialogue': 'Your M

##  Annotate Scripts with Character Metadata

In [27]:
# Load the dialogues DataFrame
dialogues_df = pd.read_csv('C:/Users/su1qt/Significant Projects/Honors Research/data/dialogues/mulan_dialogues.csv')

# Get a list of unique characters
unique_characters = dialogues_df['Character'].unique()

print("List of unique characters:")
for character in unique_characters:
    print(character)

List of unique characters:
Guard
Shan-Yu
General Li
Chi-Fu
Emperor
Mulan
Fa Zhou
Bath Lady
Fa Li
Grandmother Fa
Fa Li And Others
Chorus
Maidens And Mulan
Maiden #1
Maiden #2
Maiden #3
Maiden #4
Mulan And Maidens
Matchmaker
Townspeople
First Ancestor
Mushu
Ancestor
Fa Deng
Cri-Kee
Hun Long-Hair Guy
Scout #1
Scout #2
Archer Guy
Tattoo Soldier
Ling
Yao
Chien-Po
Shang
All Recruits
Recruit #2
Recruit #3
All Soldiers
Recruits
Hun Strong Man
Bald Hun Man #1
Long Hair Hun Man
Hun Archer
Cow
Hun Soldier
Parade Leader
Hun Bald Man #2
Hun Bald Man #1
Barry Cook
Man In Crowd #1
Man In Crowd #2
Little Brother


## Character MetaData Dictionary

In [37]:
character_metadata = {
    'Mulan': {
        'Gender': 'Female',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Protagonist',
        'Marginalized': True,  # POC, female protagonist
        'POC': True
    },
    'Shang': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': True,  # POC male character
        'POC': True
    },
    'Mushu': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Mythical Creature',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': False,  # Mythical creature, supporting
        'POC': False
    },
    'Shan-Yu': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Hun',
        'Culture': 'Hun',
        'Role': 'Antagonist',
        'Marginalized': False,  # Antagonist, from a different culture
        'POC': False
    },
    'Chi-Fu': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': True,  # POC, Chinese
        'POC': True
    },
    'Yao': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': True,  # POC
        'POC': True
    },
    'Ling': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': True,  # POC
        'POC': True
    },
    'Chien-Po': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': True,  # POC
        'POC': True
    },
    'Fa Zhou': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': True,  # POC
        'POC': True
    },
    'Fa Li': {
        'Gender': 'Female',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': True,  # POC
        'POC': True
    },
    'Grandmother Fa': {
        'Gender': 'Female',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': True,  # POC
        'POC': True
    },
    'Matchmaker': {
        'Gender': 'Female',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': True,  # POC
        'POC': True
    },
    'Emperor': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': True,  # POC
        'POC': True
    },
    'General Li': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Chinese',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': True,  # POC
        'POC': True
    },
    'First Ancestor': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Spiritual Entity',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': False,  # Mythical character
        'POC': False
    },
    'Cri-Kee': {
        'Gender': 'Male',
        'Race/Ethnicity': 'Cricket',
        'Culture': 'Chinese',
        'Role': 'Supporting',
        'Marginalized': False,  # Animal entity
        'POC': False
    }
}


## Merge metadata with diologues

In [50]:
# Refined list of characters
characters_to_keep = [
    'Mulan',
    'Shang',
    'Mushu',
    'Shan-Yu',
    'Chi-Fu',
    'Yao',
    'Ling',
    'Chien-Po',
    'Fa Zhou',
    'Fa Li',
    'Grandmother Fa',
    'Matchmaker',
    'Emperor',
    'General Li',
    'First Ancestor',
    'Cri-Kee'
]

# Filter the DataFrame
filtered_dialogues_df = dialogues_df[dialogues_df['Character'].isin(characters_to_keep)]
filtered_dialogues_df.head()

Unnamed: 0,Film Title,Character,Dialogue
2,Mulan,Shan-Yu,Perfect. [Cut to the palace. The large doors ...
3,Mulan,General Li,"Your Majesty, the Huns have crossed our Northe..."
4,Mulan,Chi-Fu,Impossible! No one can get through The Great W...
5,Mulan,General Li,Shun-Yu is leading them. We'll set up defense...
6,Mulan,Emperor,No! Send your troops to protect my people. C...


In [51]:
# Convert the metadata dictionary to a DataFrame
metadata_df = pd.DataFrame.from_dict(character_metadata, orient='index')

# Reset the index to make 'Character' a column
metadata_df.reset_index(inplace=True)
metadata_df.rename(columns={'index': 'Character'}, inplace=True)

# Preview the metadata DataFrame
print("Metadata DataFrame:")
metadata_df.head()

annotated_dialogues_df = pd.merge(filtered_dialogues_df, metadata_df, on='Character', how='left')

Metadata DataFrame:


In [52]:
# Find characters with missing metadata
missing_metadata = annotated_dialogues_df[annotated_dialogues_df['Gender'].isnull()]['Character'].unique()

if len(missing_metadata) > 0:
    print("Characters missing metadata:")
    for character in missing_metadata:
        print(character)
else:
    print("All characters have metadata.")


All characters have metadata.


In [53]:
# Rearrange columns for clarity
columns_order = [
    'Film Title', 'Character', 'Gender', 'Race/Ethnicity', 'Culture',
    'Role', 'Marginalized', 'POC', 'Dialogue'
]
annotated_dialogues_df = annotated_dialogues_df[columns_order]

# Preview the annotated dialogues DataFrame
print("Annotated Dialogues DataFrame:")
annotated_dialogues_df.head(10)

Annotated Dialogues DataFrame:


Unnamed: 0,Film Title,Character,Gender,Race/Ethnicity,Culture,Role,Marginalized,POC,Dialogue
0,Mulan,Shan-Yu,Male,Hun,Hun,Antagonist,False,False,Perfect. [Cut to the palace. The large doors ...
1,Mulan,General Li,Male,Chinese,Chinese,Supporting,True,True,"Your Majesty, the Huns have crossed our Northe..."
2,Mulan,Chi-Fu,Male,Chinese,Chinese,Supporting,True,True,Impossible! No one can get through The Great W...
3,Mulan,General Li,Male,Chinese,Chinese,Supporting,True,True,Shun-Yu is leading them. We'll set up defense...
4,Mulan,Emperor,Male,Chinese,Chinese,Supporting,True,True,No! Send your troops to protect my people. C...
5,Mulan,Chi-Fu,Male,Chinese,Chinese,Supporting,True,True,"Yes, your highness."
6,Mulan,Emperor,Male,Chinese,Chinese,Supporting,True,True,Deliver conscription notices throughout all th...
7,Mulan,General Li,Male,Chinese,Chinese,Supporting,True,True,"Forgive me your Majesty, but I believe my troo..."
8,Mulan,Emperor,Male,Chinese,Chinese,Supporting,True,True,"I wont take any chances, General. A single gr..."
9,Mulan,Mulan,Female,Chinese,Chinese,Protagonist,True,True,Quiet and demure...graceful...polite...[pickin...


In [54]:
output_file = os.path.join(dialogues_path, 'mulan_annotated_dialogues.csv')
annotated_dialogues_df.to_csv(output_file, index=False)

## Preprocessing the text data

In [55]:
# Define the path to the annotated dialogues CSV
dialogues_path = r'C:\Users\su1qt\Significant Projects\Honors Research\data\dialogues'
annotated_dialogues_file = os.path.join(dialogues_path, 'mulan_annotated_dialogues.csv')

# Check if the file exists
if not os.path.isfile(annotated_dialogues_file):
    raise FileNotFoundError(f"The file {annotated_dialogues_file} does not exist. Please check the path.")

# Load the annotated dialogues DataFrame
annotated_dialogues_df = pd.read_csv(annotated_dialogues_file)

# Display the first few rows
annotated_dialogues_df.head(5)

Unnamed: 0,Film Title,Character,Gender,Race/Ethnicity,Culture,Role,Marginalized,POC,Dialogue
0,Mulan,Shan-Yu,Male,Hun,Hun,Antagonist,False,False,Perfect. [Cut to the palace. The large doors ...
1,Mulan,General Li,Male,Chinese,Chinese,Supporting,True,True,"Your Majesty, the Huns have crossed our Northe..."
2,Mulan,Chi-Fu,Male,Chinese,Chinese,Supporting,True,True,Impossible! No one can get through The Great W...
3,Mulan,General Li,Male,Chinese,Chinese,Supporting,True,True,Shun-Yu is leading them. We'll set up defense...
4,Mulan,Emperor,Male,Chinese,Chinese,Supporting,True,True,No! Send your troops to protect my people. C...


## Cleaning the dialogue text

In [56]:
def clean_dialogue(text):
    """
    Clean the dialogue text by removing stage directions, special characters, and extra whitespace.
    
    Parameters:
        text (str): The original dialogue text.
    
    Returns:
        str: The cleaned dialogue text.
    """
    # Remove stage directions enclosed in brackets []
    text = re.sub(r'\[.*?\]', '', text)
    
    # Remove parentheses and their content (e.g., (laughs))
    text = re.sub(r'\(.*?\)', '', text)
    
    # Optionally, remove other non-dialogue annotations
    # text = re.sub(r'<.*?>', '', text)
    
    # Remove extra whitespace
    text = text.strip()
    
    return text

# Apply the cleaning function to the 'Dialogue' column
annotated_dialogues_df['Cleaned Dialogue'] = annotated_dialogues_df['Dialogue'].apply(clean_dialogue)

# Display the first few cleaned dialogues
annotated_dialogues_df[['Dialogue', 'Cleaned Dialogue']].head()


Unnamed: 0,Dialogue,Cleaned Dialogue
0,Perfect. [Cut to the palace. The large doors ...,Perfect.
1,"Your Majesty, the Huns have crossed our Northe...","Your Majesty, the Huns have crossed our Northe..."
2,Impossible! No one can get through The Great W...,Impossible! No one can get through The Great W...
3,Shun-Yu is leading them. We'll set up defense...,Shun-Yu is leading them. We'll set up defense...
4,No! Send your troops to protect my people. C...,No! Send your troops to protect my people. C...


In [58]:
!pip install contractions

Collecting contractions
  Downloading contractions-0.1.73-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting textsearch>=0.0.21 (from contractions)
  Downloading textsearch-0.0.24-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting anyascii (from textsearch>=0.0.21->contractions)
  Downloading anyascii-0.3.2-py3-none-any.whl.metadata (1.5 kB)
Collecting pyahocorasick (from textsearch>=0.0.21->contractions)
  Downloading pyahocorasick-2.1.0-cp312-cp312-win_amd64.whl.metadata (13 kB)
Downloading contractions-0.1.73-py2.py3-none-any.whl (8.7 kB)
Downloading textsearch-0.0.24-py2.py3-none-any.whl (7.6 kB)
Downloading anyascii-0.3.2-py3-none-any.whl (289 kB)
Downloading pyahocorasick-2.1.0-cp312-cp312-win_amd64.whl (39 kB)
Installing collected packages: pyahocorasick, anyascii, textsearch, contractions
Successfully installed anyascii-0.3.2 contractions-0.1.73 pyahocorasick-2.1.0 textsearch-0.0.24



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


## Expanding Contractions

In [61]:
import contractions

def expand_contractions_func(text):
    """
    Expand contractions in the text to their full forms.
    
    Parameters:
        text (str): The cleaned dialogue text.
    
    Returns:
        str: The dialogue text with expanded contractions.
    """
    return contractions.fix(text)

# Apply the contraction expansion function
annotated_dialogues_df['Cleaned Dialogue'] = annotated_dialogues_df['Cleaned Dialogue'].apply(expand_contractions_func)

# Display the first few dialogues after expanding contractions
annotated_dialogues_df[['Dialogue', 'Cleaned Dialogue']].sample(5)


Unnamed: 0,Dialogue,Cleaned Dialogue
7,"Forgive me your Majesty, but I believe my troo...","Forgive me your Majesty, but I believe my troo..."
196,"Oh, that's my tough looking warrior. That's w...","Oh, that is my tough looking warrior. That is..."
194,Don't talk with your mouth full. Now let's se...,Do not talk with your mouth full. Now let us ...
267,...the General. [Shang takes the helmet and wa...,...the General.
89,Silence! We must send the most powerful of al...,Silence! We must send the most powerful of all


In [62]:
# Define the path to save the preprocessed dialogues
preprocessed_file = os.path.join(dialogues_path, 'mulan_preprocessed_dialogues.csv')

# Save the preprocessed DataFrame
annotated_dialogues_df.to_csv(preprocessed_file, index=False)

print(f"\nPreprocessed dialogues saved to {preprocessed_file}")



Preprocessed dialogues saved to C:\Users\su1qt\Significant Projects\Honors Research\data\dialogues\mulan_preprocessed_dialogues.csv


## Sentiment Analysis Using VADER

In [63]:

# Define the path to the preprocessed dialogues CSV
dialogues_path = r'C:\Users\su1qt\Significant Projects\Honors Research\data\dialogues'
preprocessed_dialogues_file = os.path.join(dialogues_path, 'mulan_preprocessed_dialogues.csv')

# Check if the file exists
if not os.path.isfile(preprocessed_dialogues_file):
    raise FileNotFoundError(f"The file {preprocessed_dialogues_file} does not exist. Please check the path.")

# Load the preprocessed dialogues DataFrame
preprocessed_dialogues_df = pd.read_csv(preprocessed_dialogues_file)

# Display the first few rows

preprocessed_dialogues_df.sample(5)


Unnamed: 0,Film Title,Character,Gender,Race/Ethnicity,Culture,Role,Marginalized,POC,Dialogue,Cleaned Dialogue
38,Mulan,Matchmaker,Female,Chinese,Chinese,Supporting,True,True,Speaking without permission.,Speaking without permission.
378,Mulan,Mushu,Male,Mythical Creature,Chinese,Supporting,False,False,You don't have a plan?!,You do not have a plan?!
240,Mulan,Mulan,Female,Chinese,Chinese,Protagonist,True,True,"Boy, that was close.","Boy, that was close."
247,Mulan,Mushu,Male,Mythical Creature,Chinese,Supporting,False,False,Oh no you don't. I've worked to hard to get M...,Oh no you do not. I have worked to hard to ge...
409,Mulan,Emperor,Male,Chinese,Chinese,Supporting,True,True,The flower that blooms in adversity is the mos...,The flower that blooms in adversity is the mos...


In [65]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Download VADER lexicon if not already downloaded
nltk.download('vader_lexicon')

# Initialize VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\su1qt\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


## Calculate Sentiment Scores and Labels

In [66]:
def get_sentiment_scores(text):
    """
    Calculate sentiment scores for the given text using VADER.
    
    Parameters:
        text (str): The preprocessed dialogue text.
    
    Returns:
        dict: A dictionary containing 'neg', 'neu', 'pos', and 'compound' scores.
    """
    return sia.polarity_scores(text)

def categorize_sentiment(score):
    """
    Categorize sentiment based on the compound score.
    
    Parameters:
        score (float): The compound sentiment score.
    
    Returns:
        str: The sentiment category ('Positive 😊', 'Negative 😡', 'Neutral 😐').
    """
    if score >= 0.05:
        return 'Positive 😊'
    elif score <= -0.05:
        return 'Negative 😡'
    else:
        return 'Neutral 😐'


### Applying sentiment analysis to each dialogue

In [69]:
# Apply the sentiment scores function to the 'Cleaned Dialogue' column
preprocessed_dialogues_df['Sentiment Scores'] = preprocessed_dialogues_df['Cleaned Dialogue'].apply(get_sentiment_scores)

# Extract the compound score
preprocessed_dialogues_df['Compound Score'] = preprocessed_dialogues_df['Sentiment Scores'].apply(lambda x: x['compound'])

# Categorize sentiment based on the compound score
preprocessed_dialogues_df['Sentiment'] = preprocessed_dialogues_df['Compound Score'].apply(categorize_sentiment)

# Display the first few dialogues with sentiment scores and labels
preprocessed_dialogues_df[['Cleaned Dialogue', 'Compound Score', 'Sentiment']].sample(5)


Unnamed: 0,Cleaned Dialogue,Compound Score,Sentiment
215,Just because I look like a man does not mean I...,0.6124,Positive 😊
282,"All right, you might want to light that right ...",0.2225,Positive 😊
121,"Hey, dragon, dragon, not lizard. I do not do...",0.0,Neutral 😐
109,Whach' you mean loser? How 'bout I pop one of...,-0.7998,Negative 😡
81,Mushu!,0.0,Neutral 😐


In [70]:
# Define the path to save the sentiment-analyzed dialogues
sentiment_file = os.path.join(dialogues_path, 'mulan_sentiment_dialogues.csv')

# Save the DataFrame with sentiment analysis
preprocessed_dialogues_df.to_csv(sentiment_file, index=False)

print(f"\nDialogues with sentiment analysis saved to {sentiment_file}")



Dialogues with sentiment analysis saved to C:\Users\su1qt\Significant Projects\Honors Research\data\dialogues\mulan_sentiment_dialogues.csv
