In [26]:
import pandas as pd
import numpy as np 
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import pycountry_convert as pc
from plotly.subplots import make_subplots
import plotly.graph_objects as go


In [27]:
movies = pd.read_pickle("./data/Processed/movies_cleaned.pkl")

In [28]:
movies.columns

Index(['actor_name', 'wikiID', 'freebaseID', 'movie_title',
       'movie_release_date', 'movie_bo_revenue', 'movie_runtime',
       'fbid_languages', 'fbid_countries', 'fbid_genres', 'year',
       'character_name', 'actor_date_of_birth', 'actor_gender',
       'actor_height_meters', 'actor_ethni_fbid', 'actor_age_at_movie_release',
       'fbid_char_actor_map', 'fbid_char', 'fbid_actor', 'summary', 'budget',
       'popularity', 'vote_average', 'imdbid', 'id', 'director',
       'director_gender', 'producer', 'producer_gender', 'writer',
       'writer_gender'],
      dtype='object')

In [29]:
movies = movies[["year","fbid_countries","actor_gender"]]

In [48]:
female_df = movies[movies['actor_gender'] == 'F']

# Flatten the list of countries
female_df = female_df.explode('fbid_countries')

# Aggregate total count for each country
total_count_by_country = female_df['fbid_countries'].value_counts()

# Identify top 25 countries
top_25_countries = total_count_by_country.head(8).index

# Filter the DataFrame to include only top 25 countries
filtered_df = female_df[female_df['fbid_countries'].isin(top_25_countries)]

# Group by year and country, then count the number of female actors
grouped_df = filtered_df.groupby(['year', 'fbid_countries']).size().reset_index(name='count')

def plot_movies(grouped_df):
    # Create subplots
    fig = make_subplots()

    # Get unique countries
    countries = grouped_df['fbid_countries'].unique()

    # Add a trace for each country
    for country in countries:
        country_df = grouped_df[grouped_df['fbid_countries'] == country]
        fig.add_trace(go.Scatter(x=country_df['year'], y=country_df['count'],
                                 name=country))

    # Update layout
    fig.update_layout(title="Evolution of the Number of Movies with Female Actors by Country",
                      xaxis_title="Year",
                      yaxis_title="Number of Movies with Female Actors")

    return fig

# Call the function and show the plot
fig = plot_movies(grouped_df)
fig.show()

In [49]:
file_path = 'countries_movies.html'  # Vous pouvez changer le chemin et le nom de fichier selon vos besoins
fig.write_html(file_path)

In [50]:
def country_to_continent(country_name):
    try:
        country_alpha2 = pc.country_name_to_country_alpha2(country_name)
        country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
        return pc.convert_continent_code_to_continent_name(country_continent_code)
    except:
        return "Unknown"  # For countries not found in the pycountry_convert database

# Flatten the list of countries and repeat the other columns for each country
movies = movies.explode('fbid_countries')

# Map countries to continents
movies['continent'] = movies['fbid_countries'].apply(country_to_continent)

# Filter out only female actors
female_df = movies[movies['actor_gender'] == 'F']

# Group by year and continent, then count the number of female actors
grouped_df = female_df.groupby(['year', 'continent']).size().reset_index(name='count')
grouped_df = grouped_df.loc[grouped_df['continent'] != 'Unknown']



# Function to plot data using Plotly
def plot_continents(grouped_df):
    # Create subplots
    fig = make_subplots()

    # Get unique continents
    continents = grouped_df['continent'].unique()

    # Add a trace for each continent
    for continent in continents:
        continent_df = grouped_df[grouped_df['continent'] == continent]
        fig.add_trace(go.Scatter(x=continent_df['year'], y=continent_df['count'],
                                 name=continent))

    # Update layout
    fig.update_layout(title="Evolution of the Number of Movies with Female Actors by Continent",
                      xaxis_title="Year",
                      yaxis_title="Number of Movies with Female Actors")

    return fig

# Call the function and show the plot
fig = plot_continents(grouped_df)
fig.show()

In [51]:
file_path = 'continent_movie.html'  # Vous pouvez changer le chemin et le nom de fichier selon vos besoins
fig.write_html(file_path)

In [45]:
continent_coordinates = {
    'Asia': (50, 100),
    'Europe': (54, 15),
    'Africa': (0, 20),
    'North America': (40, -100),
    'South America': (-15, -60),
    'Oceania': (-25, 135),
    'Antarctica': (-82, 20)
}

# Initialize the figure with the first year
first_year = grouped_df['year'].min()
initial_year_data = grouped_df[grouped_df['year'] == first_year]

# Adjust the size of the spheres
sizeref_value = 2.*max(grouped_df['count'])/(40.**2) / 3  # Reduced sizeref for larger spheres

fig = go.Figure(data=[
    go.Scattergeo(
        text=[f'{continent}: {count} female actors' for continent, count in zip(initial_year_data['continent'], initial_year_data['count'])],  # Updated hover text
        marker=dict(
            size=initial_year_data['count'],
            sizemode='area',
            sizeref=sizeref_value,
            showscale=True
        )
    )
])

# Add frames for each year
frames = []
years = grouped_df['year'].unique()
for year in years:
    year_data = grouped_df[grouped_df['year'] == year]
    frames.append(go.Frame(
        data=[go.Scattergeo(
            lon=[continent_coordinates[continent][1] for continent in year_data['continent']],
            lat=[continent_coordinates[continent][0] for continent in year_data['continent']],
            text=[f'{continent}: {count} female actors' for continent, count in zip(year_data['continent'], year_data['count'])],  # Updated hover text
            marker=dict(
                size=year_data['count'],
                sizemode='area',
                sizeref=sizeref_value,
                showscale=True
            )
        )],
        name=str(year)
    ))

fig.frames = frames

# Update layout and add slider
fig.update_layout(
    title_text='Number of Female Actors by Continent Over Time',
    showlegend=False,
    geo=dict(
        showland=True,
        landcolor="rgb(243, 243, 243)",
        countrycolor="rgb(204, 204, 204)",
    ),
    updatemenus=[{
        "buttons": [
            {
                "args": [None, {"frame": {"duration": 500, "redraw": True}, "fromcurrent": True}],
                "label": "Play",
                "method": "animate"
            },
            {
                "args": [[None], {"frame": {"duration": 0, "redraw": True}, "mode": "immediate", "transition": {"duration": 0}}],
                "label": "Pause",
                "method": "animate"
            }
        ],
        "direction": "left",
        "pad": {"r": 10, "t": 87},
        "showactive": False,
        "type": "buttons",
        "x": 0.1,
        "xanchor": "right",
        "y": 0,
        "yanchor": "top"
    }],
    sliders=[{
        "active": 0,
        "yanchor": "top",
        "xanchor": "left",
        "currentvalue": {
            "font": {"size": 20},
            "prefix": "Year:",
            "visible": True,
            "xanchor": "right"
        },
        "transition": {"duration": 300, "easing": "cubic-in-out"},
        "pad": {"b": 10, "t": 50},
        "len": 0.9,
        "x": 0.1,
        "y": 0,
        "steps": [{"args": [[year], {"frame": {"duration": 300, "redraw": True}, "mode": "immediate", "transition": {"duration": 300}}],
                  "label": str(year), "method": "animate"} for year in years]
    }]
)

# Show the figure
fig.show()