In [1]:
import pandas as pd

# prenoms inventés
mean_diff = pd.read_csv("data/clean/influenced_names_means_diff.csv")
influenced_meandiff = mean_diff[mean_diff["Influence"] > 0]
# removing common identification mistakes such as "the", "a" or "Mr"
influenced_meandiff = influenced_meandiff[~influenced_meandiff["Character Name"].isin(["the", "a", "Mr"])]
print("Number of influenced names with mean diff: ", len(influenced_meandiff))
influenced_meandiff.head()



Number of influenced names with mean diff:  1585


Unnamed: 0,Wikipedia ID,Movie Name,Year,Character Name,Count,Full name,Normalized_name,Influence
0,31186339,the hunger games,2012,Katniss,4,Katniss,KATNISS,inf
1,22144721,iron man 2,2010,Stark,3,Howard Stark,STARK,inf
2,146947,spider-man,2002,Osborn,6,Harry Osborn,OSBORN,inf
4,443972,hook,1991,Banning,16,Peter Banning,BANNING,inf
5,537416,ace ventura: when nature calls,1995,Abbot,2,Grand Abbot,ABBOT,inf


In [3]:
# tous les prénoms influencés
prophet = pd.read_csv("data/clean/influenced_names_prophet.csv")
influenced_prophet = prophet[prophet["Influenced"] > 0]
# removing common identification mistakes such as "the", "a" or "Mr"
influenced_prophet = influenced_prophet[~influenced_prophet["Character Name"].isin(["the", "a", "Mr"])]
print("Number of influenced names with mean diff: ", len(influenced_prophet))
influenced_prophet.head()

Number of influenced names with mean diff:  432


Unnamed: 0,Wikipedia ID,Movie Name,Year,Character Name,Count,Full name,Normalized_name,Mean Difference,Influenced
0,451866,mission: impossible ii,2000,Ethan,18,Ethan Hunt,ETHAN,15725.466667,1
1,633411,the avengers,1998,Emma,15,Emma Peel,EMMA,14985.966667,1
2,3727473,man on fire,1987,Samantha,4,"Samantha ""Sam"" Balletto",SAMANTHA,14453.5,1
3,347000,suspiria,1977,Sarah,15,Sarah,SARAH,14372.466667,1
5,320401,barton fink,1991,Taylor,3,Audrey Taylor,TAYLOR,13892.1,1


In [4]:
#remove every non numeric value in mean diff
mean_diff = mean_diff[mean_diff["Influence"].apply(lambda x: str(x).replace(".", "").isdigit())]
threshold = mean_diff['Influence'].quantile(0.75)

significant_names = mean_diff[mean_diff['Influence'] > threshold]
print("Number of significant names:", len(significant_names))
significant_names.head()

Number of significant names: 371


Unnamed: 0,Wikipedia ID,Movie Name,Year,Character Name,Count,Full name,Normalized_name,Influence
151,451866,mission: impossible ii,2000,Ethan,18,Ethan Hunt,ETHAN,15725.466667
152,633411,the avengers,1998,Emma,15,Emma Peel,EMMA,14985.966667
153,3727473,man on fire,1987,Samantha,4,"Samantha ""Sam"" Balletto",SAMANTHA,14453.5
154,347000,suspiria,1977,Sarah,15,Sarah,SARAH,14372.466667
155,483274,point break,1991,Tyler,3,Tyler Endicott,TYLER,14176.666667


In [3]:
import dash
from dash import dcc, html, Input, Output, State
import plotly.graph_objects as go
import pandas as pd
import dash_bootstrap_components as dbc

# --- Data Preparation ---
merged_df = pd.read_csv("data/clean/influenced_prophet_with_genres.csv")
merged_df['Genres'] = merged_df['Genres'].str.split(', ')
exploded_df = merged_df.explode('Genres')

top_25_genres = (
    exploded_df.groupby('Genres')['Mean Difference']
    .sum()
    .reset_index()
    .sort_values(by='Mean Difference', ascending=False)
    .head(25)['Genres']
)

filtered_df = exploded_df[exploded_df['Genres'].isin(top_25_genres)]

top_names_by_genre = (
    filtered_df.groupby(['Genres', 'Normalized_name'])['Mean Difference']
    .sum()
    .reset_index()
    .sort_values(['Genres', 'Mean Difference'], ascending=[True, False])
    .groupby('Genres')
    .head(3)
)

unique_genres = top_names_by_genre['Genres'].unique()

# --- App Initialization ---
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Create a list of buttons
buttons = [
    dbc.Button(
        genre,
        id={'type': 'genre-button', 'index': genre},
        className='custom-button',
        n_clicks=0
    )
    for genre in unique_genres
]

app.layout = html.Div([
    html.Div([
        html.H3("Select a movie genre:"),
        html.Div(buttons, style={
            "display": "flex",
            "flexDirection": "column",
            "height": "calc(100% - 50px)",
            "justifyContent": "flex-start"
        }),
    ], style={
        'width': '20%',
        'display': 'inline-block',
        'verticalAlign': 'top',
        'padding': '20px',
        'backgroundColor': '#f7f7f7',
        'height': '80vh'  # Matches the height of the graph container
    }),

    html.Div([
        dcc.Graph(id='genre-bar-plot', style={'height': '80vh'})
    ], style={
        'width': '75%',
        'display': 'inline-block',
        'verticalAlign': 'top',
        'padding': '20px'
    }),

    # Hidden div or store to hold the currently selected genre
    dcc.Store(id='selected-genre', data=unique_genres[0])
])


@app.callback(
    Output('genre-bar-plot', 'figure'),
    Input('selected-genre', 'data')
)
def update_figure(selected_genre):
    filtered_data = top_names_by_genre[top_names_by_genre['Genres'] == selected_genre]
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=filtered_data['Normalized_name'],
        y=filtered_data['Mean Difference'],
        name=selected_genre,
        marker_color='rgb(102,153,255)'
    ))
    fig.update_layout(
        title=f"Top 3 prénoms influencés par le genre '{selected_genre}'",
        xaxis_title="Names",
        yaxis_title="Mean of influence",
        template="plotly_white"
    )
    return fig


@app.callback(
    Output('selected-genre', 'data'),
    [Input({'type': 'genre-button', 'index': genre}, 'n_clicks') for genre in unique_genres],
    [State('selected-genre', 'data')]
)
def update_selected_genre(*args):
    ctx = dash.callback_context
    if not ctx.triggered:
        return unique_genres[0]
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]
        # button_id is a string with JSON, parse it to get the index
        import json
        button_id_json = json.loads(button_id)
        return button_id_json['index']


# Callback to update button classes (highlight active button)
@app.callback(
    [Output({'type': 'genre-button', 'index': genre}, 'className') for genre in unique_genres],
    Input('selected-genre', 'data')
)
def update_button_classes(selected_genre):
    return [
        'custom-button active' if genre == selected_genre else 'custom-button'
        for genre in unique_genres
    ]


if __name__ == '__main__':
    app.run_server(debug=True)

## Test with honey-comb cells

In [1]:
import dash
from dash import dcc, html, Input, Output, State
import plotly.graph_objects as go
import pandas as pd
import dash_bootstrap_components as dbc
import json

# Data Preparation
merged_df = pd.read_csv("data/clean/influenced_prophet_with_genres.csv")
merged_df['Genres'] = merged_df['Genres'].str.split(', ')
exploded_df = merged_df.explode('Genres')

top_25_genres = (
    exploded_df.groupby('Genres')['Mean Difference']
    .sum()
    .reset_index()
    .sort_values(by='Mean Difference', ascending=False)
    .head(25)['Genres']
)

filtered_df = exploded_df[exploded_df['Genres'].isin(top_25_genres)]

top_names_by_genre = (
    filtered_df.groupby(['Genres', 'Normalized_name'])['Mean Difference']
    .sum()
    .reset_index()
    .sort_values(['Genres', 'Mean Difference'], ascending=[True, False])
    .groupby('Genres')
    .head(3)
)

unique_genres = top_names_by_genre['Genres'].unique()

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Create the honeycomb hex buttons
hex_buttons = [
    html.Div(
        genre,
        id={'type': 'genre-hex', 'index': genre},
        className='hex',
        n_clicks=0
    )
    for genre in unique_genres
]

app.layout = html.Div([
    html.Div([
        html.Div("Select a movie genre:", className='title'),
        html.Div(hex_buttons, className='honeycomb-container')
    ], className='sidebar'),

    html.Div([
        dcc.Graph(id='genre-bar-plot', style={'height': '80vh'})
    ], className='graph-container'),

    # Hidden store for selected genre
    dcc.Store(id='selected-genre', data=unique_genres[0])
])

# Update figure based on selected genre
@app.callback(
    Output('genre-bar-plot', 'figure'),
    Input('selected-genre', 'data')
)
def update_figure(selected_genre):
    filtered_data = top_names_by_genre[top_names_by_genre['Genres'] == selected_genre]
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=filtered_data['Normalized_name'],
        y=filtered_data['Mean Difference'],
        name=selected_genre,
        marker_color='rgb(102,153,255)'
    ))
    fig.update_layout(
        title=f"Top 3 prénoms influencés par le genre '{selected_genre}'",
        xaxis_title="Names",
        yaxis_title="Mean of influence",
        template="plotly_white"
    )
    return fig

# Update the selected genre when a hex is clicked
@app.callback(
    Output('selected-genre', 'data'),
    [Input({'type': 'genre-hex', 'index': genre}, 'n_clicks') for genre in unique_genres],
    [State('selected-genre', 'data')]
)
def update_selected_genre(*args):
    ctx = dash.callback_context
    if not ctx.triggered:
        return unique_genres[0]
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]
        button_id_json = json.loads(button_id)
        return button_id_json['index']

# Highlight the active hex
@app.callback(
    [Output({'type': 'genre-hex', 'index': genre}, 'className') for genre in unique_genres],
    Input('selected-genre', 'data')
)
def update_hex_classes(selected_genre):
    return [
        'hex active' if genre == selected_genre else 'hex'
        for genre in unique_genres
    ]

if __name__ == '__main__':
    app.run_server(debug=True)
