In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [3]:
df = pd.read_csv('../data/movie_final_dataset.csv')
df.head()

Unnamed: 0,Name,Genres,Budget(USD)_Inflated,Domestic(USD)_Inflated,Domestic_Percentage,Foreign(USD)_Inflated,Foreign_Percentage,Worldwide(USD)_Inflated,Runtime(mins),Rating,...,period piece,animation,teen,film adaptation,musical,history,coming of age,sports,war,Worldwide_profit
0,10 cloverfield lane,"drama, thriller, horror, sci-fi",6076746.0,87606020.0,0.65401,46346130.0,0.34599,133952100.0,103,pg-13,...,0,0,0,0,0,0,0,0,0,22.0434
1,"10,000 bc","drama, thriller, action, adventure, romance fi...",144156300.0,130130900.0,0.351333,240260600.0,0.648667,370391400.0,109,pg-13,...,0,0,0,0,0,0,0,0,0,2.569373
2,12 rounds,"thriller, action, crime",27262750.0,16677570.0,0.708013,6877890.0,0.291987,23555460.0,108,pg-13,...,0,0,0,0,0,0,0,0,0,0.864016
3,12 strong,"drama, action, history, war",39790500.0,52091130.0,0.644274,28761330.0,0.355726,80852460.0,130,r,...,0,0,0,0,0,1,0,0,1,2.031954
4,12 years a slave,"drama, biography, history",25392600.0,71952470.0,0.301875,166399300.0,0.698125,238351700.0,134,r,...,0,0,0,0,0,1,0,0,0,9.38666


In [4]:
counts = df["emotion"].value_counts().reset_index()
counts.columns = ["emotion", "Counts"]
fig = px.bar(
    counts, 
    x="emotion", 
    y="Counts",
    title="Bar Plot of Counts of Emotions", 
    color="emotion"
)
fig.update_layout(showlegend=False)
fig.show()

In [5]:
fig.write_html("../_includes/emotions_bar.html")

In [21]:
from plotly.subplots import make_subplots

# Define a custom color map for emotions
color_map = {
    'anger': 'red',
    'sadness': 'blue',
    'joy': 'gold',
    'disgust': 'green',
    'neutral': 'brown',
    'surprise': 'hotpink',
    'fear': 'purple'
}

# Separate data for each group
domestic_counts = df[df['Foreign_higher'] == 0]['emotion'].value_counts().reset_index()
domestic_counts.columns = ['emotion', 'Counts']

foreign_counts = df[df['Foreign_higher'] == 1]['emotion'].value_counts().reset_index()
foreign_counts.columns = ['emotion', 'Counts']

# Create a subplot figure with two pie charts
fig2 = make_subplots(
    rows=1, cols=2, 
    specs=[[{'type': 'domain'}, {'type': 'domain'}]], 
    subplot_titles=[
        "Movies with Domestic % > 50%", 
        "Movies with Foreign % > 50%"
    ]
)

# Add pie chart for domestic movies
fig2.add_trace(
    px.pie(
        domestic_counts, 
        names='emotion', 
        values='Counts', 
        color='emotion',
        color_discrete_map=color_map
    ).data[0], 
    row=1, col=1
)

# Add pie chart for foreign movies
fig2.add_trace(
    px.pie(
        foreign_counts, 
        names='emotion', 
        values='Counts', 
        color='emotion',
        color_discrete_map=color_map
    ).data[0], 
    row=1, col=2
)

# Update layout to center title, adjust subplot titles, and show the legend
fig2.update_layout(
    title=dict(
        text="Emotion Distribution Comparison",
        x=0.5,  # Center align the title
        xanchor="center"
    ),
    showlegend=True,  # Enable the legend
    legend=dict(
        orientation="h",  # Horizontal legend
        x=0.5,
        xanchor="center",
        y=-0.1
    ),
    font=dict(
        size=12  # Adjust font size for subplot titles
    )
)

# Adjust subplot title font size
for annotation in fig2['layout']['annotations']:
    annotation['font'] = dict(size=15)  # Smaller font for subplot titles

# Show the combined figure
fig2.show()


In [22]:
fig2.write_html("../_includes/emotions_pie.html")

In [6]:
emotion_labels = ['neutral', 'fear', 'anger', 'sadness', 'disgust', 'joy', 'surprise']
df_corr = df[['Foreign_Percentage']]
for emotion in emotion_labels:
    df_corr[emotion] = df['emotion'].str.contains(emotion).astype(int)






A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [7]:
df_corr.head()

Unnamed: 0,Foreign_Percentage,neutral,fear,anger,sadness,disgust,joy,surprise
0,0.34599,0,1,0,0,0,0,0
1,0.648667,0,1,0,0,0,0,0
2,0.291987,0,0,1,0,0,0,0
3,0.355726,0,1,0,0,0,0,0
4,0.698125,0,0,0,1,0,0,0


In [8]:
corr = df_corr.corr()
fig = go.Figure(data= go.Heatmap(z=corr,
                                 x=corr.index.values,
                                 y=corr.columns.values,
                                 colorscale='earth',
                                 )
                )
fig.update_layout(title_text='<b>Correlation Matrix (cont. features)<b>',
                  title_x=0.5,
                  titlefont={'size': 24},
                  width=550, height=550,
                  xaxis_showgrid=False,
                  yaxis_showgrid=False,
                  yaxis_autorange='reversed', 
                  paper_bgcolor=None,
                  )
fig.show()

In [None]:
fig.write_html("../_includes/emotions_heatmap.html")