In [48]:
import pandas as pd

mean_diff = pd.read_csv("data/clean/influenced_names_means_diff.csv")
influenced_meandiff = mean_diff[mean_diff["Influence"] > 0]
# removing common identification mistakes such as "the", "a" or "Mr"
influenced_meandiff = influenced_meandiff[~influenced_meandiff["Character Name"].isin(["the", "a", "Mr"])]
print("Number of influenced names with mean diff: ", len(influenced_meandiff))
influenced_meandiff.sample()



Number of influenced names with mean diff:  1585


Unnamed: 0,Wikipedia ID,Movie Name,Year,Character Name,Count,Full name,Normalized_name,Influence
205,91133,the karate kid,1984,Daniel,23,Daniel LaRusso,DANIEL,4869.833333


In [28]:
prophet = pd.read_csv("data/clean/influenced_names_prophet.csv")
influenced_prophet = prophet[prophet["Influenced"] > 0]
# removing common identification mistakes such as "the", "a" or "Mr"
influenced_prophet = influenced_prophet[~influenced_prophet["Character Name"].isin(["the", "a", "Mr"])]
print("Number of influenced names with mean diff: ", len(influenced_prophet))
influenced_prophet.head()

Number of influenced names with mean diff:  432


Unnamed: 0,Wikipedia ID,Movie Name,Year,Character Name,Count,Full name,Normalized_name,Mean Difference,Influenced
0,451866,mission: impossible ii,2000,Ethan,18,Ethan Hunt,ETHAN,15725.466667,1
1,633411,the avengers,1998,Emma,15,Emma Peel,EMMA,14985.966667,1
2,3727473,man on fire,1987,Samantha,4,"Samantha ""Sam"" Balletto",SAMANTHA,14453.5,1
3,347000,suspiria,1977,Sarah,15,Sarah,SARAH,14372.466667,1
5,320401,barton fink,1991,Taylor,3,Audrey Taylor,TAYLOR,13892.1,1


In [54]:
#remove every non numeric value in mean diff
mean_diff = mean_diff[mean_diff["Influence"].apply(lambda x: str(x).replace(".", "").isdigit())]
threshold = mean_diff['Influence'].quantile(0.75)

significant_names = mean_diff[mean_diff['Influence'] > threshold]
print("Number of significant names:", len(significant_names))
significant_names.head()

Number of significant names: 371


Unnamed: 0,Wikipedia ID,Movie Name,Year,Character Name,Count,Full name,Normalized_name,Influence
151,451866,mission: impossible ii,2000,Ethan,18,Ethan Hunt,ETHAN,15725.466667
152,633411,the avengers,1998,Emma,15,Emma Peel,EMMA,14985.966667
153,3727473,man on fire,1987,Samantha,4,"Samantha ""Sam"" Balletto",SAMANTHA,14453.5
154,347000,suspiria,1977,Sarah,15,Sarah,SARAH,14372.466667
155,483274,point break,1991,Tyler,3,Tyler Endicott,TYLER,14176.666667


In [57]:
sort_significant = significant_names.sort_values(by='Year')

top_per_year = (
    sort_significant.groupby('Year')
    .apply(lambda x: x.nlargest(3, 'Influence'))
    .reset_index(drop=True)
)

top_per_year['Label'] = top_per_year['Character Name'] + " from " + top_per_year['Movie Name']


import plotly.express as px
import plotly.graph_objects as go

# Build a list of years for the slider
years = sorted(sort_significant['Year'].unique())

# Create a figure with a scatter plot for each year
fig = go.Figure()

for year in years:
    yearly_data = top_per_year[top_per_year['Year'] == year]
    
    fig.add_trace(go.Bar(
        x=yearly_data['Label'],  # Character names as x-axis
        y=yearly_data['Influence'],       # Influence as y-axis
        name=str(year),
        visible=(year == years[0])        # Show only the first year initially
    ))

# Add a slider to switch between years
steps = []
for i, year in enumerate(years):
    step = dict(
        method="update",
        args=[
            {"visible": [j == i for j in range(len(years))]},  # Show only the current year
            {"title": f"Top Names for {year} (with mean diff method)"}                # Update title dynamically
        ],
        label=str(year),
    )
    steps.append(step)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Year: "},
    pad={"t": 50},
    steps=steps
)]

# Update layout for the slider and axis labels
fig.update_layout(
    sliders=sliders,
    title="Most Significant Names by Year",
    xaxis_title="Character Name and Movie",
    yaxis_title="Influence",
    showlegend=False  # Optional: Hide legend since the slider is the primary control
)

# Display the figure
fig.show()





