In [10]:
import pandas as pd
import numpy as np
import ast

import plotly.graph_objects as go
from plotly.subplots import make_subplots

import warnings
warnings.filterwarnings('ignore')

In [11]:
DATA_FOLDER = "data/"

movies = pd.read_csv(DATA_FOLDER + "preprocessed_movies.csv")

for col in movies.columns:
    try:
        movies[col] = movies[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    except:
        pass
    print(col, type(movies[col][0]))

movies.head(3)

wikipedia_id <class 'numpy.float64'>
freebase_id <class 'str'>
title <class 'str'>
languages <class 'float'>
countries <class 'list'>
genres <class 'list'>
keywords <class 'float'>
release_date <class 'numpy.int64'>
runtime <class 'numpy.float64'>
plot_summary <class 'str'>
cold_war_side <class 'str'>
character_western_bloc_representation <class 'list'>
character_eastern_bloc_representation <class 'list'>
western_bloc_values <class 'list'>
eastern_bloc_values <class 'list'>
theme <class 'list'>


Unnamed: 0,wikipedia_id,freebase_id,title,languages,countries,genres,keywords,release_date,runtime,plot_summary,cold_war_side,character_western_bloc_representation,character_eastern_bloc_representation,western_bloc_values,eastern_bloc_values,theme
0,4213160.0,/m/0bq8q8,$,,[Russia],"[Comedy, Crime, Drama]",,1971,119.0,"Set in Hamburg, West Germany, several criminal...",Western,"[Joe Collins, American bank security consultan...","[Dawn Divine, hooker with a heart of gold, cun...",[None],"[Resourcefulness, cleverness, individualism, h...",[None]
1,,,"$1,000 on the Black","[German, Italian]","[Germany, Italy]",[Western],,1966,104.0,Johnny Liston has just been released from pris...,Eastern,[None],"[Sartana, villainous, oppressive, cruel, arche...","[Johnny Liston, justice, determination, resili...","[Justice, revenge, oppressed vs. oppressor, re...","[Terror, betrayal, familial conflict, crime, r..."
2,,,"$10,000 Blood Money",,[Russia],"[Western, Drama]",,1967,,Hired by a Mexican landowner to rescue his dau...,,[None],[None],[None],[None],"[crime, betrayal, revenge, bounty hunter, heis..."


In [12]:
print(len(movies))

25621


In [13]:
EASTERN_COLOR = "#DD3C32"
WESTERN_COLOR = "#0F89E6"
NEUTRAL_COLOR = "#C2C7D6"

COLOR_DICT = {
    "Eastern": EASTERN_COLOR,
    "Western": WESTERN_COLOR,
    "None": NEUTRAL_COLOR
}


In [14]:
year_counts = movies.groupby("release_date").size()
fig = go.Figure(data=[go.Bar(x=year_counts.index, 
                            y=year_counts.values, 
                            width=0.55, 
                            marker_color="midnightblue", 
                            hovertemplate="Year: %{x}<br>No. of movies: %{y}<extra></extra>")])

fig.update_layout(title="Number of movies by year", xaxis_title="Year", yaxis_title="Number of movies")

fig.update_xaxes(tickangle=-45, tickvals=list(year_counts.index), tickfont=dict(size=8))

# add some marker to show start of cold war and end of cold war
fig.add_vline(x=1947, line_dash="dash", line_color="red")
fig.add_vline(x=1991, line_dash="dash", line_color="red")


fig.add_annotation(
    x=1947, y = 600,
    text="Start of Cold War",
    font=dict(color='red', size=12),
    showarrow=False,
    yanchor='middle',
    xshift=10, yshift=10,
    textangle=-90  
)

fig.add_annotation(
    x=1991, y = 600,
    text="End of Cold War",
    font=dict(color='red', size=12),
    showarrow=False,
    yanchor='middle',
    xshift=10, yshift=10,
    textangle=-90 
)

fig.show()

In [142]:
movie_side_count = movies.groupby(["release_date", "cold_war_side"]).size().unstack()

fig = go.Figure()

for side in movie_side_count.columns:
    fig.add_trace(go.Line(x=movie_side_count.index, 
                            y=movie_side_count[side], 
                            mode="lines", 
                            name=side, 
                            line=dict(width=2, color=COLOR_DICT[side]),
                            hovertemplate="Year: %{x}<br>No. of movies: %{y}<extra></extra>"))
    
fig.update_layout(title="Evolution of the number of movies per side by year", xaxis_title="Year", yaxis_title="Number of movies")
fig.update_xaxes(tickangle=-45, tickvals=list(year_counts.index), tickfont=dict(size=8))

# add some marker to show start of cold war and end of cold war
fig.add_vline(x=1947, line_dash="dash", line_color="black", annotation_text="Start of Cold War")
fig.add_vline(x=1991, line_dash="dash", line_color="black", annotation_text="End of Cold War")

fig.show()

In [143]:
fig = make_subplots(rows=1, cols=2, specs=[[{'type': 'pie'}, {'type': 'pie'}]], subplot_titles=("Including neutral movies", "Excluding neutral movies"))

# prepare data for the pie chart
global_movies_side_count = movies['cold_war_side'].value_counts().reset_index()
global_movies_side_count.columns = ['side', 'count']

cw_movies_side_count = movies[movies['cold_war_side'] != 'None']
cw_movies_side_count = cw_movies_side_count['cold_war_side'].value_counts().reset_index()
cw_movies_side_count.columns = ['side', 'count']

# Define colors for the pie chart slices
colors = {'None': NEUTRAL_COLOR, 'Western': WESTERN_COLOR, 'Eastern': EASTERN_COLOR}

fig.add_trace(go.Pie(labels=global_movies_side_count['side'],
                    values=global_movies_side_count['count'],
                    marker=dict(colors=[colors[side] for side in global_movies_side_count['side']]),
                    hovertemplate='<b>%{label}</b><br>Count: %{value}<br><extra></extra>',
                    name='Side'), row=1, col=1)

fig.add_trace(go.Pie(labels=cw_movies_side_count['side'],
                    values=cw_movies_side_count['count'],
                    marker=dict(colors=[colors[side] for side in cw_movies_side_count['side']]),
                    hovertemplate='<b>%{label}</b><br>Count: %{value}<br><extra></extra>',
                    name='Side'), row=1, col=2)

# Adjust the domain of the pie charts to add space between the charts and the titles
fig.update_traces(domain=dict(x=[0, 0.45], y=[0, 0.95]), row=1, col=1)
fig.update_traces(domain=dict(x=[0.55, 1], y=[0, 0.95]), row=1, col=2)

fig.update_layout(title_text="Movies Distribution based on Cold War Side", title_x=0.5, title_font_weight='bold')
fig.show()