In [1]:
import os
import sys
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import plotly.subplots as sp
import plotly.colors as pc
import sqlalchemy as sa
from sqlalchemy import create_engine

In [2]:
movie_df = pd.read_csv("enriched_new_movies.csv")

In [3]:
movie_df

Unnamed: 0,summary,themes,age_rating,tone,movie_id,movie_title
0,"A death row inmate, having already paid for hi...","['Justice and Punishment', 'Redemption and Jud...",R,"['Somber', 'Ominous']",866741,Box
1,A freelance translator and his sex partner's c...,"['Infidelity and Betrayal', 'Complex Relations...",R,"['Provocative', 'Dramatic']",1322121,Dangerous Cohabitation
2,"A crime boss, a cop, and a serial killer form ...","['Justice vs. Revenge', 'The Blurred Lines Bet...",R,"['Gritty', 'Violent']",581528,"The Gangster, the Cop, the Devil"
3,"A self-centered race car, Lightning McQueen, l...","['The importance of friendship and community',...",G,"['Heartwarming', 'Humorous']",920,Cars
4,"A grieving widower, seeking revenge on the man...","['Revenge and its consequences', 'The destruct...",R,"['Suspenseful', 'Erotic']",193551,All of a Sudden
...,...,...,...,...,...,...
59,A desperate car salesman's kidnapping plot spi...,"['The consequences of greed and desperation', ...",R,"['Darkly comedic', 'Ironic']",275,Fargo
60,"Decades after their first adventure, estranged...",['Family reconciliation and the importance of ...,PG,"['Humorous', 'Heartwarming']",459151,The Boss Baby: Family Business
61,"Amidst looming war, a young aspiring glasswork...",['The conflict between pacifism and militarism...,PG,"['Melancholic', 'Hopeful']",629078,The Glassworker
62,"A disgraced baseball player, caught in a web o...","['Redemption', 'Corruption and Betrayal', 'Sac...",R,"['Gritty', 'Violent']",280019,For the Emperor


In [4]:
movie_df['themes'] = movie_df['themes'].apply(eval)
movie_df['tone'] = movie_df['tone'].apply(eval)

rating_counts = movie_df['age_rating'].value_counts().reset_index()
rating_counts.columns = ['age_rating', 'count']

In [5]:
def extract_themes():
    theme_counts = {}
    for themes_list in movie_df['themes']:
        for theme in themes_list:
            theme_counts[theme] = theme_counts.get(theme, 0) + 1
    return pd.DataFrame({'theme': list(theme_counts.keys()), 
                         'count': list(theme_counts.values())}).sort_values('count', ascending=False)

themes_df = extract_themes()

In [6]:
# Create a function to extract all unique tones and their frequencies
def extract_tones():
    tone_counts = {}
    for tones_list in movie_df['tone']:
        for tone in tones_list:
            tone_counts[tone] = tone_counts.get(tone, 0) + 1
    return pd.DataFrame({'tone': list(tone_counts.keys()), 
                         'count': list(tone_counts.values())}).sort_values('count', ascending=False)

tones_df = extract_tones()

In [7]:
# Create some basic visualizations
fig1 = px.bar(rating_counts, x='age_rating', y='count', 
              title='Distribution of Movies by Age Rating',
              labels={'count': 'Number of Movies', 'age_rating': 'Age Rating'})

fig2 = px.bar(themes_df.head(10), x='theme', y='count', 
              title='Top 10 Movie Themes',
              labels={'count': 'Frequency', 'theme': 'Theme'})

fig3 = px.bar(tones_df.head(10), x='tone', y='count', 
              title='Top 10 Movie Tones',
              labels={'count': 'Frequency', 'tone': 'Tone'})

# Display the figures
fig1.show()
fig2.show()
fig3.show()

In [8]:
movie_df['theme_count'] = movie_df['themes'].apply(len)
movie_avg_themes = movie_df['theme_count'].mean()

# Get average number of tones per movie
movie_df['tone_count'] = movie_df['tone'].apply(len)
movie_avg_tones = movie_df['tone_count'].mean()

# Create a simple theme-to-tone mapping to explore relationships
theme_tone_mapping = {}
for _, movie in movie_df.iterrows():
    for theme in movie['themes']:
        if theme not in theme_tone_mapping:
            theme_tone_mapping[theme] = {}
        for tone in movie['tone']:
            theme_tone_mapping[theme][tone] = theme_tone_mapping[theme].get(tone, 0) + 1

# Convert theme-tone mapping to DataFrame for easier use
theme_tone_pairs = []
for theme, tones in theme_tone_mapping.items():
    for tone, count in tones.items():
        theme_tone_pairs.append({'theme': theme, 'tone': tone, 'count': count})
theme_tone_df = pd.DataFrame(theme_tone_pairs)

# Save processed data to CSVs for use in the dashboard
rating_counts.to_csv('rating_counts.csv', index=False)
themes_df.to_csv('themes_data.csv', index=False)
tones_df.to_csv('tones_data.csv', index=False)
theme_tone_df.to_csv('theme_tone_data.csv', index=False)

# Save a processed version of the movie data
processed_movie_df = movie_df.copy()
processed_movie_df['tone_str'] = processed_movie_df['tone'].apply(lambda x: ', '.join(x))
processed_movie_df['themes_str'] = processed_movie_df['themes'].apply(lambda x: ', '.join(x))
processed_movie_df.to_csv('processed_movies.csv', index=False)

print("Data processing complete. Files saved for dashboard use.")

Data processing complete. Files saved for dashboard use.


In [None]:
# processed_movie_df.drop(columns=['themes', 'tone'], inplace=True)
# processed_movie_df.to_csv('processed_movies.csv', index=False)

In [None]:
# processed_movie_df = pd.read_csv('processed_movies.csv')

In [12]:
processed_movie_df

Unnamed: 0,summary,age_rating,movie_id,movie_title,theme_count,tone_count,tone_str,themes_str
0,"A death row inmate, having already paid for hi...",R,866741,Box,3,2,"Somber, Ominous","Justice and Punishment, Redemption and Judgeme..."
1,A freelance translator and his sex partner's c...,R,1322121,Dangerous Cohabitation,3,2,"Provocative, Dramatic","Infidelity and Betrayal, Complex Relationships..."
2,"A crime boss, a cop, and a serial killer form ...",R,581528,"The Gangster, the Cop, the Devil",3,2,"Gritty, Violent","Justice vs. Revenge, The Blurred Lines Between..."
3,"A self-centered race car, Lightning McQueen, l...",G,920,Cars,3,2,"Heartwarming, Humorous","The importance of friendship and community, Th..."
4,"A grieving widower, seeking revenge on the man...",R,193551,All of a Sudden,3,2,"Suspenseful, Erotic","Revenge and its consequences, The destructive ..."
...,...,...,...,...,...,...,...,...
59,A desperate car salesman's kidnapping plot spi...,R,275,Fargo,3,2,"Darkly comedic, Ironic","The consequences of greed and desperation, The..."
60,"Decades after their first adventure, estranged...",PG,459151,The Boss Baby: Family Business,3,2,"Humorous, Heartwarming",Family reconciliation and the importance of si...
61,"Amidst looming war, a young aspiring glasswork...",PG,629078,The Glassworker,3,2,"Melancholic, Hopeful","The conflict between pacifism and militarism.,..."
62,"A disgraced baseball player, caught in a web o...",R,280019,For the Emperor,3,2,"Gritty, Violent","Redemption, Corruption and Betrayal, Sacrifice"
