# Parallel questions study
In this notebook, we carry out the study of the parallel questions related to the influence of movies on baby names, therefore conduction a global analysis.

In [18]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from scipy import stats
import seaborn as sns
from statsmodels.stats.proportion import proportions_ztest

In [19]:
folder_processed_data_path = './data/processed_data/'

# Dataset containing month of release
movie_df = pd.read_csv(os.path.join(folder_processed_data_path, 'movie_df.csv'))
movie_df.set_index(['wiki_ID'], inplace=True)
display(movie_df)

# Dataset containing p_value
name_by_movie_df = pd.read_csv(os.path.join(folder_processed_data_path, 'name_by_movie_ordered_pvalue_10_5_df.csv'))
name_by_movie_df.set_index(['wiki_ID'], inplace=True)
display(name_by_movie_df)

# Dataset containing movie genre
movie_genres_df = pd.read_csv(os.path.join(folder_processed_data_path, 'movie_genres_df.csv'))
movie_genres_df.set_index(['wiki_ID'], inplace=True)
display(movie_genres_df)

Unnamed: 0_level_0,mov_name,year,month,revenue,numVotes,averageRating
wiki_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
975900,Ghosts of Mars,2001,8.0,14010832.0,56880,4.9
3196793,Getting Away with Murder: The JonBenét Ramsey ...,2000,2.0,,69,6.0
28463795,Brun bitter,1988,,,40,5.6
9363483,White Of The Eye,1987,,,2891,6.1
261236,A Woman in Flames,1983,,,623,5.9
...,...,...,...,...,...,...
35228177,Mermaids: The Body Found,2011,3.0,,1711,4.6
34980460,Knuckle,2011,1.0,,3192,6.8
9971909,Another Nice Mess,1972,9.0,,111,5.8
913762,The Super Dimension Fortress Macross II: Lover...,1992,5.0,,657,6.0


Unnamed: 0_level_0,char_words,gender,order,p_value,slope_change,t_stat
wiki_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3217,Gold,,6.0,,,
3217,Linda,F,7.0,0.676072,-0.000675,0.429187
3217,Henry,M,4.0,0.068422,-0.002435,2.019954
3217,Duke,M,4.0,0.582585,0.000108,-0.566260
3217,Warrior,M,9.0,,,
...,...,...,...,...,...,...
37478048,Ajay,M,9.0,0.436957,-0.000126,0.806658
37501922,Murphy,F,3.0,0.234444,0.000354,-1.257988
37501922,Hunter,M,1.0,0.000021,-0.035578,7.051709
37501922,John,M,1.0,0.052067,-0.012279,2.177768


Unnamed: 0_level_0,genre
wiki_ID,Unnamed: 1_level_1
330,Comedy-drama
330,Drama
3217,Action
3217,Comedy
3217,Time travel
...,...
37476824,Crime Comedy
37476824,Caper story
37476824,Crime Fiction
37478048,Comedy film


### Question 1: Month of release

In [20]:
# First, aggregate dataframe with p_value table with dataframe containing release month 
name_by_movie_aggregate_df = name_by_movie_df.merge(movie_df, how='left', left_on='wiki_ID', right_on='wiki_ID')
display(name_by_movie_aggregate_df)

Unnamed: 0_level_0,char_words,gender,order,p_value,slope_change,t_stat,mov_name,year,month,revenue,numVotes,averageRating
wiki_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3217,Gold,,6.0,,,,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Linda,F,7.0,0.676072,-0.000675,0.429187,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Henry,M,4.0,0.068422,-0.002435,2.019954,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Duke,M,4.0,0.582585,0.000108,-0.566260,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Warrior,M,9.0,,,,Army of Darkness,1992,10.0,21502796.0,191068,7.4
...,...,...,...,...,...,...,...,...,...,...,...,...
37478048,Ajay,M,9.0,0.436957,-0.000126,0.806658,Mr. Bechara,1996,,,395,5.4
37501922,Murphy,F,3.0,0.234444,0.000354,-1.257988,Terminal Bliss,1992,,,245,4.4
37501922,Hunter,M,1.0,0.000021,-0.035578,7.051709,Terminal Bliss,1992,,,245,4.4
37501922,John,M,1.0,0.052067,-0.012279,2.177768,Terminal Bliss,1992,,,245,4.4


In [21]:
summer = [6.0, 7.0, 8.0]
fall = [9.0,10.0,11.0]
winter = [12.0,1.0,2.0]
spring = [3.0,4.0,5.0]
summer_movies_df = name_by_movie_aggregate_df[name_by_movie_aggregate_df['month'].isin(summer)]
fall_movies_df = name_by_movie_aggregate_df[name_by_movie_aggregate_df['month'].isin(fall)]
winter_movies_df = name_by_movie_aggregate_df[name_by_movie_aggregate_df['month'].isin(winter)]
spring_movies_df = name_by_movie_aggregate_df[name_by_movie_aggregate_df['month'].isin(spring)]


display(summer_movies_df)
display(fall_movies_df)
display(winter_movies_df)
display(spring_movies_df)

Unnamed: 0_level_0,char_words,gender,order,p_value,slope_change,t_stat,mov_name,year,month,revenue,numVotes,averageRating
wiki_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3746,Deckard,M,0.0,,,,Blade Runner,1982,6.0,33139618.0,804384,8.1
3746,Eldon,M,8.0,0.653938,-0.000105,0.460773,Blade Runner,1982,6.0,33139618.0,804384,8.1
3746,Lewis,M,12.0,0.327638,-0.000698,1.024419,Blade Runner,1982,6.0,33139618.0,804384,8.1
3746,Bear,M,11.0,,,,Blade Runner,1982,6.0,33139618.0,804384,8.1
3746,Leon,M,7.0,0.469966,0.000525,-0.748319,Blade Runner,1982,6.0,33139618.0,804384,8.1
...,...,...,...,...,...,...,...,...,...,...,...,...
36699915,Luke,M,5.0,0.846639,0.001421,-0.198023,Percy Jackson & the Olympians: Sea of Monsters,2013,8.0,,123248,5.7
36699915,Underwood,M,1.0,,,,Percy Jackson & the Olympians: Sea of Monsters,2013,8.0,,123248,5.7
36699915,Chase,F,2.0,0.149005,0.011448,-1.551732,Percy Jackson & the Olympians: Sea of Monsters,2013,8.0,,123248,5.7
36699915,Circe,F,,,,,Percy Jackson & the Olympians: Sea of Monsters,2013,8.0,,123248,5.7


Unnamed: 0_level_0,char_words,gender,order,p_value,slope_change,t_stat,mov_name,year,month,revenue,numVotes,averageRating
wiki_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3217,Gold,,6.0,,,,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Linda,F,7.0,0.676072,-0.000675,0.429187,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Henry,M,4.0,0.068422,-0.002435,2.019954,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Duke,M,4.0,0.582585,0.000108,-0.566260,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Warrior,M,9.0,,,,Army of Darkness,1992,10.0,21502796.0,191068,7.4
...,...,...,...,...,...,...,...,...,...,...,...,...
37322106,Major,M,0.0,0.080101,-0.002548,1.927686,Jab Tak Hai Jaan,2012,11.0,,58012,6.7
37373877,Beth,F,5.0,0.425923,-0.000270,0.826799,Crazy Eights,2006,10.0,,3338,3.8
37373877,Patterson,F,5.0,,,,Crazy Eights,2006,10.0,,3338,3.8
37373877,Jennifer,F,0.0,0.687248,-0.003315,0.413414,Crazy Eights,2006,10.0,,3338,3.8


Unnamed: 0_level_0,char_words,gender,order,p_value,slope_change,t_stat,mov_name,year,month,revenue,numVotes,averageRating
wiki_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3837,Lamarr,M,3.0,0.765429,0.000046,-0.305851,Blazing Saddles,1974,2.0,119500000.0,147934,7.7
3837,Van,M,11.0,0.249160,-0.000456,1.216756,Blazing Saddles,1974,2.0,119500000.0,147934,7.7
3837,Bart,M,0.0,0.861123,0.000158,-0.179090,Blazing Saddles,1974,2.0,119500000.0,147934,7.7
3837,Lyle,M,6.0,0.879211,-0.000105,0.155542,Blazing Saddles,1974,2.0,119500000.0,147934,7.7
3837,Buddy,M,18.0,0.965905,0.000018,-0.043728,Blazing Saddles,1974,2.0,119500000.0,147934,7.7
...,...,...,...,...,...,...,...,...,...,...,...,...
36956792,Kid,M,18.0,,,,The Water Horse: Legend of the Deep,2007,12.0,103071443.0,42523,6.4
36956792,Charlie,M,5.0,0.000208,-0.006215,5.427450,The Water Horse: Legend of the Deep,2007,12.0,103071443.0,42523,6.4
36956792,Beach,M,18.0,,,,The Water Horse: Legend of the Deep,2007,12.0,103071443.0,42523,6.4
36956792,Walker,M,8.0,0.561595,-0.000575,0.598551,The Water Horse: Legend of the Deep,2007,12.0,103071443.0,42523,6.4


Unnamed: 0_level_0,char_words,gender,order,p_value,slope_change,t_stat,mov_name,year,month,revenue,numVotes,averageRating
wiki_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
4560,Morrison,M,19.0,,,,Braveheart,1995,5.0,211409945.0,1072580,8.3
4560,Edward,M,3.0,0.713319,-0.000845,0.377036,Braveheart,1995,5.0,211409945.0,1072580,8.3
4560,Campbell,M,5.0,0.113180,-0.000474,1.721204,Braveheart,1995,5.0,211409945.0,1072580,8.3
4560,Murron,F,1.0,,,,Braveheart,1995,5.0,211409945.0,1072580,8.3
4560,William,M,0.0,0.006095,-0.015277,3.384440,Braveheart,1995,5.0,211409945.0,1072580,8.3
...,...,...,...,...,...,...,...,...,...,...,...,...
36814246,Girl,F,4.0,,,,Eraserhead,1977,3.0,7000000.0,124128,7.3
36814246,Mary,F,1.0,0.017282,-0.041302,2.799750,Eraserhead,1977,3.0,7000000.0,124128,7.3
36814246,Beautiful,F,4.0,,,,Eraserhead,1977,3.0,7000000.0,124128,7.3
36814246,Hall,F,4.0,,,,Eraserhead,1977,3.0,7000000.0,124128,7.3


In [22]:
summer_movies_df.index.unique()

Index([    3746,     3947,     4231,     4726,     4727,     4728,     4729,
           4730,     8481,     9979,
       ...
       36306987, 36329343, 36354051, 36354224, 36422681, 36448415, 36478252,
       36566804, 36617100, 36699915],
      dtype='int64', name='wiki_ID', length=4096)

In [23]:
prop_summer = len(summer_movies_df[summer_movies_df['p_value']<0.1])/len(summer_movies_df['p_value'])
display(prop_summer)
prop_fall = len(fall_movies_df[fall_movies_df['p_value']<0.1])/len(fall_movies_df['p_value'])
display(prop_fall)
prop_winter = len(winter_movies_df[winter_movies_df['p_value']<0.1])/len(winter_movies_df['p_value'])
display(prop_winter)
prop_spring = len(spring_movies_df[spring_movies_df['p_value']<0.1])/len(spring_movies_df['p_value'])
display(prop_spring)

0.13600104190407972

0.14431685722080306

0.13875639832480224

0.13644032637256256

Faire test statistique pour dire que different ? Je dirai que oui et on le fait en dessous

In [24]:
from scipy.stats import chi2_contingency

# Organize the data into a contingency table
observed_data = [
    [len(summer_movies_df[summer_movies_df['p_value'] < 0.1]), len(summer_movies_df['p_value'])],
    [len(fall_movies_df[fall_movies_df['p_value'] < 0.1]), len(fall_movies_df['p_value'])],
    [len(winter_movies_df[winter_movies_df['p_value'] < 0.1]), len(winter_movies_df['p_value'])],
    [len(spring_movies_df[spring_movies_df['p_value'] < 0.1]), len(spring_movies_df['p_value'])]
]

# Perform the chi-squared test
chi2, p, _, _ = chi2_contingency(observed_data)

# Print the results
print("Chi-squared value:", chi2)
print("P-value:", p)

Chi-squared value: 10.399003008260278
P-value: 0.015461904797585156


H0 : The proportions are all equal 

We can reject the null hypothesis at the 5% significance level.

### Question 2: Movie Genre has an impact ?

In [25]:
# First, aggregate dataframe with p_value table with dataframe containing movie genre
# Outer merge required in order to obtain for each name of each film, all the possible genre it can be associated to 
movie_genre_aggregate_df = name_by_movie_df.merge(movie_genres_df, how='outer', left_on='wiki_ID', right_on='wiki_ID')
movie_genre_aggregate_df.head(25)

Unnamed: 0_level_0,char_words,gender,order,p_value,slope_change,t_stat,genre
wiki_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3217,Gold,,6.0,,,,Action
3217,Gold,,6.0,,,,Comedy
3217,Gold,,6.0,,,,Time travel
3217,Gold,,6.0,,,,Black comedy
3217,Gold,,6.0,,,,Zombie Film
3217,Gold,,6.0,,,,Horror Comedy
3217,Gold,,6.0,,,,Action/Adventure
3217,Gold,,6.0,,,,Costume drama
3217,Gold,,6.0,,,,Stop motion
3217,Gold,,6.0,,,,Horror


In [26]:
name_by_genre_significant_df = movie_genre_aggregate_df.groupby('genre').apply(lambda x: x[x['p_value'] < 0.1])
display(name_by_genre_significant_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,char_words,gender,order,p_value,slope_change,t_stat,genre
genre,wiki_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Absurdism,19701,Tim,M,1.0,0.001787,-0.010096,4.090696,Absurdism
Absurdism,46505,Victor,M,11.0,0.070347,-0.003045,2.003778,Absurdism
Absurdism,46505,Ted,M,0.0,0.046833,-0.001105,2.238396,Absurdism
Absurdism,46505,Johnny,M,10.0,0.039946,-0.002623,2.328911,Absurdism
Absurdism,46505,Roger,M,3.0,0.066845,-0.004609,2.033517,Absurdism
...,...,...,...,...,...,...,...,...
Zombie Film,31633054,Andy,M,0.0,0.077429,-0.001991,1.947633,Zombie Film
Zombie Film,33432215,Cassie,F,3.0,0.061421,-0.001099,2.082602,Zombie Film
Zombie Film,33432215,Sarah,F,7.0,0.000060,-0.020843,6.283305,Zombie Film
Zombie Film,33432215,Mack,M,4.0,0.023270,-0.001138,2.633292,Zombie Film


In [27]:
# Compute proportion of impacted names by genre
name_by_genre_prop_significant_df = movie_genre_aggregate_df.groupby('genre').apply(lambda x: pd.Series({
        'prop_signif_per_genre': (x['p_value'] < 0.1).sum()/len(x['p_value']),
        'prop_non_signi': (x['p_value'] > 0.1).sum()/len(x['p_value']),
        'prop_nan': (x['p_value'].isna()).sum()/len(x['p_value']),
        'avg_t_value_per_genre': x['t_stat'].mean(),
        'avg_slope_change' : x['slope_change'].mean()
    }))
display(name_by_genre_prop_significant_df)


Unnamed: 0_level_0,prop_signif_per_genre,prop_non_signi,prop_nan,avg_t_value_per_genre,avg_slope_change
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Absurdism,0.132432,0.570270,0.297297,0.500326,-0.000527
Acid western,0.187500,0.562500,0.250000,0.843112,-0.002524
Action,0.117510,0.469206,0.413283,0.457436,-0.000264
Action Comedy,0.124517,0.505792,0.369691,0.431969,-0.000097
Action Thrillers,0.128478,0.517004,0.354517,0.421422,-0.000294
...,...,...,...,...,...
World History,0.000000,0.000000,1.000000,,
World cinema,0.067656,0.379661,0.552683,0.184046,0.000080
Wuxia,0.032558,0.158140,0.809302,0.390231,-0.000327
Z movie,0.000000,0.000000,1.000000,,


#### Analysis looking at time effects

In [28]:
# Need to merge datasets containing "p_value" (name_by_movie_df), "movie_genre" (movie_genres_df), "release_date" (movie_df)
# => aggregate "name_by_movie_aggregate_df" with "movie_genres_df"
movie_genre_aggregate_with_years_df = movie_genre_aggregate_df.merge(movie_df, how='left', left_on='wiki_ID', right_on='wiki_ID')
display(movie_genre_aggregate_with_years_df)

Unnamed: 0_level_0,char_words,gender,order,p_value,slope_change,t_stat,genre,mov_name,year,month,revenue,numVotes,averageRating
wiki_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
3217,Gold,,6.0,,,,Action,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Gold,,6.0,,,,Comedy,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Gold,,6.0,,,,Time travel,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Gold,,6.0,,,,Black comedy,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Gold,,6.0,,,,Zombie Film,Army of Darkness,1992,10.0,21502796.0,191068,7.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
37241569,,,,,,,Action,Cold War,2012,11.0,,5033,6.6
37476824,,,,,,,Comedy,I Love New Year,2011,,,876,3.4
37476824,,,,,,,Crime Comedy,I Love New Year,2011,,,876,3.4
37476824,,,,,,,Caper story,I Love New Year,2011,,,876,3.4


In [29]:
signi_names_per_genre_per_year_df = movie_genre_aggregate_with_years_df.groupby(['genre','year']).apply(lambda x: x[x['p_value'] < 0.1])
signi_names_per_genre_per_year_df.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,char_words,gender,order,p_value,slope_change,t_stat,genre,mov_name,year,month,revenue,numVotes,averageRating
genre,year,wiki_ID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Absurdism,1964,248601,George,M,2.0,0.02664,-0.011959,2.557438,Absurdism,A Hard Day's Night,1964,7.0,,47276,7.5
Absurdism,1964,248601,John,M,0.0,0.047794,0.073648,-2.22679,Absurdism,A Hard Day's Night,1964,7.0,,47276,7.5
Absurdism,1974,19701,Tim,M,1.0,0.001787,-0.010096,4.090696,Absurdism,Monty Python and the Holy Grail,1974,4.0,,560662,8.2
Absurdism,1978,75261,Robert,M,9.0,0.004233,-0.053751,3.591418,Absurdism,National Lampoon's Animal House,1978,7.0,141600000.0,127114,7.4
Absurdism,1978,75261,Dave,M,19.0,0.030485,-0.001459,2.481634,Absurdism,National Lampoon's Animal House,1978,7.0,141600000.0,127114,7.4
Absurdism,1978,75261,Barbara,F,15.0,0.004086,-0.018562,3.611592,Absurdism,National Lampoon's Animal House,1978,7.0,141600000.0,127114,7.4
Absurdism,1978,75261,Kent,M,8.0,0.022216,-0.002411,2.659263,Absurdism,National Lampoon's Animal House,1978,7.0,141600000.0,127114,7.4
Absurdism,1978,75261,Douglas,M,,0.088331,-0.010605,1.869884,Absurdism,National Lampoon's Animal House,1978,7.0,141600000.0,127114,7.4
Absurdism,1978,75261,Donald,M,6.0,0.011954,-0.010231,3.005882,Absurdism,National Lampoon's Animal House,1978,7.0,141600000.0,127114,7.4
Absurdism,1978,75261,Guy,M,39.0,0.078059,-0.00166,1.942873,Absurdism,National Lampoon's Animal House,1978,7.0,141600000.0,127114,7.4


In [30]:
# Compute proportion of impacted names by genre by year
name_by_genre_by_year_prop_significant_df = movie_genre_aggregate_with_years_df.groupby(['genre','year']).apply(lambda x: pd.Series({
        'prop_signif_per_genre': (x['p_value'] < 0.1).sum()/len(x['p_value']),
        'avg_slope_change': x['slope_change'].mean()
    }))
display(name_by_genre_by_year_prop_significant_df)
name_by_genre_by_year_prop_significant_df.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,prop_signif_per_genre,avg_slope_change
genre,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Absurdism,1929,0.000000,
Absurdism,1930,0.000000,
Absurdism,1932,0.000000,0.000171
Absurdism,1938,0.000000,
Absurdism,1940,0.000000,
...,...,...,...
Zombie Film,2008,0.168675,-0.000864
Zombie Film,2009,0.093023,0.001817
Zombie Film,2010,0.172414,-0.001540
Zombie Film,2011,0.307692,-0.003649


Unnamed: 0_level_0,Unnamed: 1_level_0,prop_signif_per_genre,avg_slope_change
genre,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Absurdism,1929,0.0,
Absurdism,1930,0.0,
Absurdism,1932,0.0,0.000171
Absurdism,1938,0.0,
Absurdism,1940,0.0,
Absurdism,1941,0.0,
Absurdism,1950,0.0,-0.000357
Absurdism,1959,0.0,
Absurdism,1964,0.333333,0.014922
Absurdism,1965,0.0,0.00162


### Question 3: Attendence/popularity + ratings

In [31]:
# The dataframe "name_by_movie_aggregate_df" already contains the wanted caracteristics
display(name_by_movie_aggregate_df)
name_by_movie_aggregate_df['numVotes'].max()

#Proportion of the film that had an influence in data segmented by number of votes

prop_0_10k = len(name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] < 10000) & (name_by_movie_aggregate_df['p_value'] < 0.1)]) / len(name_by_movie_aggregate_df[name_by_movie_aggregate_df['numVotes'] < 10000])

print(f"Proportion of movies with numVotes < 10k and p_value < 0.1: {prop_0_10k :.3%}")

prop_10k_100k = len(name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] > 10000) & (name_by_movie_aggregate_df['numVotes'] < 100000) & (name_by_movie_aggregate_df['p_value'] < 0.1)]) / len(name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] > 10000) & (name_by_movie_aggregate_df['numVotes'] < 100000)])

print(f"Proportion of movies with numVotes in [10k-100k] and p_value < 0.1: {prop_10k_100k :.3%}")

prop_100k_1M = len(name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] > 100000) & (name_by_movie_aggregate_df['numVotes'] < 1000000) & (name_by_movie_aggregate_df['p_value'] < 0.1)]) / len(name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] > 100000) & (name_by_movie_aggregate_df['numVotes'] < 1000000)])

print(f"Proportion of movies with numVotes in [100k-1M] and p_value < 0.1: {prop_100k_1M :.3%}")

prop_greater_1M = len(name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] > 1000000) & (name_by_movie_aggregate_df['p_value'] < 0.1)]) / len(name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] > 1000000)])

print(f"Proportion of movies with numVotes > 1M and p_value < 0.1: {prop_greater_1M :.3%}")


len(name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] > 1000000) & (name_by_movie_aggregate_df['p_value'] < 0.1)]['numVotes'].unique())


Unnamed: 0_level_0,char_words,gender,order,p_value,slope_change,t_stat,mov_name,year,month,revenue,numVotes,averageRating
wiki_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3217,Gold,,6.0,,,,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Linda,F,7.0,0.676072,-0.000675,0.429187,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Henry,M,4.0,0.068422,-0.002435,2.019954,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Duke,M,4.0,0.582585,0.000108,-0.566260,Army of Darkness,1992,10.0,21502796.0,191068,7.4
3217,Warrior,M,9.0,,,,Army of Darkness,1992,10.0,21502796.0,191068,7.4
...,...,...,...,...,...,...,...,...,...,...,...,...
37478048,Ajay,M,9.0,0.436957,-0.000126,0.806658,Mr. Bechara,1996,,,395,5.4
37501922,Murphy,F,3.0,0.234444,0.000354,-1.257988,Terminal Bliss,1992,,,245,4.4
37501922,Hunter,M,1.0,0.000021,-0.035578,7.051709,Terminal Bliss,1992,,,245,4.4
37501922,John,M,1.0,0.052067,-0.012279,2.177768,Terminal Bliss,1992,,,245,4.4


Proportion of movies with numVotes < 10k and p_value < 0.1: 12.741%
Proportion of movies with numVotes in [10k-100k] and p_value < 0.1: 15.123%
Proportion of movies with numVotes in [100k-1M] and p_value < 0.1: 15.200%
Proportion of movies with numVotes > 1M and p_value < 0.1: 13.737%


50

Assumption: 
-Attendence is estimated by the number of votes
-A threshold of # of votes anove wich we start to study the influence of rating 

Ideas: 

-separate data according to number of votes & then separate data accordimng to rating 

-separate first according to votes and then in the segments of votes separates bad and good reviews

Question 4 : Faire la moyenne

In [16]:
#We segment the data frame according to the number of votes

votes_seg_0_10k = name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] < 10000) & (name_by_movie_aggregate_df['p_value'] < 0.1)]
votes_seg_10k_100k = name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] > 10000) & (name_by_movie_aggregate_df['numVotes'] < 100000) & (name_by_movie_aggregate_df['p_value'] < 0.1)]
votes_seg_100k_1M = name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] > 100000) & (name_by_movie_aggregate_df['numVotes'] < 1000000) & (name_by_movie_aggregate_df['p_value'] < 0.1)]
votes_seg_1M_inf = name_by_movie_aggregate_df[(name_by_movie_aggregate_df['numVotes'] > 1000000) & (name_by_movie_aggregate_df['p_value'] < 0.1)]

a = [votes_seg_0_10k['slope_change'].mean(), votes_seg_10k_100k['slope_change'].mean(), votes_seg_100k_1M['slope_change'].mean(), votes_seg_1M_inf['slope_change'].mean()]
index_names = ['0-10k', '10k-100k', '100k-1M', '1M-inf']
results = pd.DataFrame(a, index=index_names,columns = ['avg_slope_change'])
results.index.name = 'Seg_numVotes'
display(results)

Unnamed: 0_level_0,avg_slope_change
Seg_numVotes,Unnamed: 1_level_1
0-10k,-0.00129
10k-100k,-0.001823
100k-1M,-0.002344
1M-inf,-0.000799


In [44]:
name_by_movie_aggregate_df_significant = name_by_movie_aggregate_df[name_by_movie_aggregate_df['p_value'] < 0.1]

#We segment the data frame according to the number of votes

##Calculate the average cahnge of slopes for the different number of vote segments 

numVotes_bins = [0,10000,100000,1000000,np.inf]
segments_numVotes_label = ['0-10000','10000-100000','100000-1000000','1000000+']
name_by_movie_aggregate_df_significant['numVotes_segmented']  = pd.cut(name_by_movie_aggregate_df_significant['numVotes'],numVotes_bins,labels=segments_numVotes_label,right=True)

avg_slopes_numVotes = name_by_movie_aggregate_df_significant.groupby('numVotes_segmented')[['slope_change']].mean()
avg_slopes_numVotes.rename(columns={'slope_change': 'avg_slopes_change'},inplace=True)

display(avg_slopes_numVotes) #Average slope change per segment

avg_magnitude_slopes_change_numVotes = name_by_movie_aggregate_df_significant.groupby('numVotes_segmented')[['slope_change']].apply(lambda x: x.abs().mean())
avg_magnitude_slopes_change_numVotes.rename(columns={'slope_change': 'avg_magnitude_slopes_change'},inplace=True)
display(avg_magnitude_slopes_change_numVotes) #Average of the magnitude of the slope change



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  name_by_movie_aggregate_df_significant['numVotes_segmented']  = pd.cut(name_by_movie_aggregate_df_significant['numVotes'],numVotes_bins,labels=segments_numVotes_label,right=True)


Unnamed: 0_level_0,avg_slopes_change
numVotes_segmented,Unnamed: 1_level_1
0-10000,-0.00129
10000-100000,-0.001823
100000-1000000,-0.002344
1000000+,-0.000799


Unnamed: 0_level_0,avg_magnitude_slopes_change
numVotes_segmented,Unnamed: 1_level_1
0-10000,0.013986
10000-100000,0.010802
100000-1000000,0.009755
1000000+,0.011324


In [42]:
#We segment the data frame according to the rating
#Calculate the average change of slopes for the different rating segements

rating_quantiles = np.quantile(name_by_movie_aggregate_df_significant['averageRating'],[0.25,0.5,0.75])
#display(rating_quantiles)

# display((name_by_movie_aggregate_df_significant['averageRating']<= 5.5).sum()/len(name_by_movie_aggregate_df_significant))

rating_bins = [0,rating_quantiles[0],rating_quantiles[1],rating_quantiles[2],10]
segments_rating_label = ['0-5.5','5.5-6.3','6.3-6.9','6.9-10']
name_by_movie_aggregate_df_significant['rating_segmented']  = pd.cut(name_by_movie_aggregate_df_significant['averageRating'],rating_bins,labels=segments_rating_label,right=True)
avg_slopes_change_rating = name_by_movie_aggregate_df_significant.groupby('rating_segmented')[['slope_change']].mean()
avg_slopes_change_rating.rename(columns = {'slope_change': 'avg_slopes_change'},inplace=True)
display(avg_slopes_change_rating)

avg_magnitude_slopes_change_rating = name_by_movie_aggregate_df_significant.groupby('rating_segmented')[['slope_change']].apply(lambda x : x.abs().mean())
avg_magnitude_slopes_change_rating.rename(columns = {'slope_change':'avg_magnitude_slopes_change'})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  name_by_movie_aggregate_df_significant['rating_segmented']  = pd.cut(name_by_movie_aggregate_df_significant['averageRating'],rating_bins,labels=segments_rating_label,right=True)


Unnamed: 0_level_0,avg_slopes_change
rating_segmented,Unnamed: 1_level_1
0-5.5,-0.001735
5.5-6.3,-0.001851
6.3-6.9,-0.00136
6.9-10,-0.001422


Unnamed: 0_level_0,avg_magnitude_slopes_change
rating_segmented,Unnamed: 1_level_1
0-5.5,0.011755
5.5-6.3,0.012728
6.3-6.9,0.012693
6.9-10,0.012424


### Question 4: Character Importance in film

In [None]:
# The dataframe "name_by_movie_df" already contains the wanted caracteristics ("order")
#name_by_order_df = name_by_movie_aggregate_df.groupby("order").apply(lambda x: x[x['p_value'] < 0.1])
#display(name_by_order_df)
name_by_order_df = name_by_movie_df.groupby("order").apply(lambda x: x[x['p_value'] < 0.1])
display(name_by_order_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,char_words,gender,order,p_value,slope_change,t_stat
order,wiki_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,4560,William,M,0.0,0.006095,-0.015277,3.384440
0.0,5035,Eric,M,0.0,0.000031,-0.025032,6.755076
0.0,5729,Harold,M,0.0,0.045354,-0.001979,2.256695
0.0,19715,Gracie,F,0.0,0.013734,-0.008358,2.928240
0.0,22751,Julia,F,0.0,0.013218,0.023395,-2.949628
...,...,...,...,...,...,...,...
94.0,9834441,Lily,F,94.0,0.000714,0.023868,-4.642400
95.0,20777420,Thomas,M,95.0,0.001181,-0.010917,4.336954
98.0,370064,Anderson,F,98.0,0.001162,-0.003267,4.346455
98.0,25079197,Tyson,M,98.0,0.001493,0.003804,-4.197156


In [51]:
name_by_order_prop_df = name_by_movie_df.groupby("order").apply(lambda x: pd.Series({
        'prop_signif_per_order': (x['p_value'] < 0.1).sum()/len(x['p_value']),
        'avg_slope_change': x['slope_change'].mean(),
        'total_number_signif_per_order': (x['p_value'] < 0.1).sum(),
    }))
display(name_by_order_prop_df)


Unnamed: 0_level_0,prop_signif_per_order,avg_slope_change,total_number_signif_per_order
order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,0.152382,-0.000224,3723.0
1.0,0.153846,0.000031,2910.0
2.0,0.150620,-0.000290,2187.0
3.0,0.148164,-0.000378,1767.0
4.0,0.144475,-0.000353,1454.0
...,...,...,...
151.0,0.000000,0.001574,0.0
152.0,0.000000,-0.000905,0.0
169.0,0.000000,-0.000067,0.0
300.0,0.000000,-0.000038,0.0


#### Does movie genre and caracter role are linked ?

In [52]:
# Does the order influence is impacted by movie genre ? Study of impact due to role importance per movie genre
name_by_order_by_genre_prop_df = movie_genre_aggregate_df.groupby(['order','genre']).apply(lambda x: pd.Series({
        'prop_signif_per_order_per_genre': (x['p_value'] < 0.1).sum()/len(x['p_value']),
        'avg_slope_change': x['slope_change'].mean(),
        'total_number_signif_per_order_per_genre': (x['p_value'] < 0.1).sum(),
    }))
display(name_by_order_by_genre_prop_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,prop_signif_per_order_per_genre,avg_slope_change,total_number_signif_per_order_per_genre
order,genre,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,Absurdism,0.189873,0.003077,15.0
0.0,Acid western,0.500000,-0.000975,4.0
0.0,Action,0.150447,-0.000131,657.0
0.0,Action Comedy,0.113402,0.000727,11.0
0.0,Action Thrillers,0.159269,-0.000243,61.0
...,...,...,...,...
302.0,Biographical film,0.000000,,0.0
302.0,Biography,0.000000,,0.0
302.0,Drama,0.000000,,0.0
302.0,Period piece,0.000000,,0.0


### Does the order of a name influence differently according to gender ?
<span style="color:red"> *Prendre seulement les valeur ou p less 0.1 pour faire l'etude des slopes ? Si on les gardes ça va influencer nos moyenne avec des truc pas significantes *</span>

In [62]:
# Calculate the average magnitude of slope change on all the data
name_by_order_by_gender_prop_df = movie_genre_aggregate_df.groupby(['order','gender']).apply(lambda x: pd.Series({
        'prop_signif_per_order_per_genre': (x['p_value'] < 0.1).sum()/len(x['p_value']),
        'avg_magnitude_slope_change': x['slope_change'].abs().mean(),
        'total_number_signif_per_order_per_genre': (x['p_value'] < 0.1).sum(),
    }))
display(name_by_order_by_gender_prop_df)

# Calculate the average magnitude of slope change on data having a slope change statistically significant

name_by_order_by_gender_prop_aggregate_significant = name_by_movie_aggregate_df_significant.groupby(['order','gender']).apply(lambda x: pd.Series({
        'prop_signif_per_order_per_genre': (x['p_value'] < 0.1).sum()/len(x['p_value']),
        'avg_magnitude_slope_change': x['slope_change'].abs().mean(),
        'total_number_signif_per_order_per_genre': (x['p_value'] < 0.1).sum(),
    }))
display(name_by_order_by_gender_prop_aggregate_significant)

# Calculate the average of slope change on data having a slope change statistically significant

name_by_order_by_gender_prop_aggregate_sig = name_by_movie_aggregate_df_significant.groupby(['order','gender']).apply(lambda x: pd.Series({
        'prop_signif_per_order_per_genre': (x['p_value'] < 0.1).sum()/len(x['p_value']),
        'avg_magnitude_slope_change': x['slope_change'].mean(),
        'total_number_signif_per_order_per_genre': (x['p_value'] < 0.1).sum(),
    }))
display(name_by_order_by_gender_prop_aggregate_sig)

Unnamed: 0_level_0,Unnamed: 1_level_0,prop_signif_per_order_per_genre,avg_magnitude_slope_change,total_number_signif_per_order_per_genre
order,gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,F,0.176309,0.005117,5336.0
0.0,M,0.147234,0.004940,10602.0
1.0,F,0.166325,0.004809,6646.0
1.0,M,0.146448,0.004309,5978.0
2.0,F,0.164852,0.004664,4420.0
...,...,...,...,...
151.0,M,0.000000,0.001574,0.0
152.0,F,0.000000,0.000905,0.0
169.0,M,0.000000,0.000067,0.0
300.0,M,0.000000,0.000038,0.0


Unnamed: 0_level_0,Unnamed: 1_level_0,prop_signif_per_order_per_genre,avg_magnitude_slope_change,total_number_signif_per_order_per_genre
order,gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,F,1.0,0.014784,1283.0
0.0,M,1.0,0.015421,2392.0
1.0,F,1.0,0.014767,1540.0
1.0,M,1.0,0.013695,1343.0
2.0,F,1.0,0.013123,1015.0
...,...,...,...,...
94.0,F,1.0,0.012731,2.0
95.0,M,1.0,0.010917,1.0
98.0,F,1.0,0.003267,1.0
98.0,M,1.0,0.003804,1.0


Unnamed: 0_level_0,Unnamed: 1_level_0,prop_signif_per_order_per_genre,avg_magnitude_slope_change,total_number_signif_per_order_per_genre
order,gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,F,1.0,-0.000833,1283.0
0.0,M,1.0,-0.002366,2392.0
1.0,F,1.0,0.000097,1540.0
1.0,M,1.0,-0.001762,1343.0
2.0,F,1.0,-0.001191,1015.0
...,...,...,...,...
94.0,F,1.0,0.011138,2.0
95.0,M,1.0,-0.010917,1.0
98.0,F,1.0,-0.003267,1.0
98.0,M,1.0,0.003804,1.0


### Question 5: Caracter gender in film

In [None]:
# The dataframe "name_by_movie" has everything we need
name_by_gender_df = name_by_movie_df.groupby('gender').apply(lambda x: x[x['p_value'] < 0.1])
display(name_by_gender_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,char_words,gender,order,p_value,slope_change,t_stat
gender,wiki_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F,3947,Barbara,F,8.0,0.063999,-0.003945,2.058789
F,4231,Cassandra,F,8.0,0.071099,0.017066,-1.997573
F,4231,Jennifer,F,6.0,0.031782,-0.086605,2.458174
F,4560,Isabelle,F,2.0,0.000007,-0.008570,7.995228
F,4726,Vicki,F,2.0,0.084777,-0.000762,1.894210
...,...,...,...,...,...,...,...
M,36956792,Gunner,M,13.0,0.001406,-0.002385,4.232822
M,36956792,Charlie,M,5.0,0.000208,-0.006215,5.427450
M,37322106,Major,M,0.0,0.080101,-0.002548,1.927686
M,37501922,Hunter,M,1.0,0.000021,-0.035578,7.051709


<span style="color:red"> *Même question, est ce que on fait notre analyse sur les film significant suelement ou pas *</span>

In [67]:
#Average slope change 
name_by_gender_prop_df = name_by_movie_df.groupby("gender").apply(lambda x: pd.Series({
        'prop_signif_per_gender': (x['p_value'] < 0.1).sum()/len(x['p_value']),
        'avg_slope_change_per_gender': x['slope_change'].mean(),
        'total_number_signif_per_gender': (x['p_value'] < 0.1).sum(),
    }))
display(name_by_gender_prop_df)

#Average magnitude slope change taking significant only
name_by_gender_prop_df = name_by_movie_aggregate_df_significant.groupby("gender").apply(lambda x: pd.Series({
        'prop_signif_per_gender': (x['p_value'] < 0.1).sum()/len(x['p_value']),
        'avg_magnitude_slope_change_per_gender': x['slope_change'].abs().mean(),
        'total_number_signif_per_gender': (x['p_value'] < 0.1).sum(),
    }))
display(name_by_gender_prop_df)

Unnamed: 0_level_0,prop_signif_per_gender,avg_slope_change_per_gender,total_number_signif_per_gender
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
F,0.156183,-0.000154,9793.0
M,0.12601,-0.000378,13199.0


Unnamed: 0_level_0,prop_signif_per_gender,avg_magnitude_slope_change_per_gender,total_number_signif_per_gender
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
F,1.0,0.012687,9793.0
M,1.0,0.012182,13199.0


#### Does caracter gender and movie genre are linked ?

In [68]:
# Does the gender influence is impacted by movie genre ? Study of impact due to role importance per movie genre
name_by_gender_by_genre_prop_df = movie_genre_aggregate_df.groupby(['gender','genre']).apply(lambda x: pd.Series({
        'prop_signif_per_gender_per_genre': (x['p_value'] < 0.1).sum()/len(x['p_value']),
        'avg_slope_change_value_per_gender_per_genre': x['slope_change'].mean(),
        'total_number_signif_per_gender_per_genre': (x['p_value'] < 0.1).sum(),
    }))
display(name_by_gender_by_genre_prop_df)

#Calculate average magnitude of slope change 
name_by_gender_by_genre_prop_magnitude_df = movie_genre_aggregate_df.groupby(['gender','genre']).apply(lambda x: pd.Series({
        'prop_signif_per_gender_per_genre': (x['p_value'] < 0.1).sum()/len(x['p_value']),
        'avg_slope_change_magnitude_per_gender_per_genre': x['slope_change'].abs().mean(),
        'total_number_signif_per_gender_per_genre': (x['p_value'] < 0.1).sum(),
    }))
display(name_by_gender_by_genre_prop_magnitude_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,prop_signif_per_gender_per_genre,avg_slope_change_value_per_gender_per_genre,total_number_signif_per_gender_per_genre
gender,genre,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
F,Absurdism,0.156863,-0.000930,32.0
F,Acid western,0.166667,-0.001279,1.0
F,Action,0.157500,0.000176,1366.0
F,Action Comedy,0.178030,0.000645,47.0
F,Action Thrillers,0.181692,0.000482,131.0
...,...,...,...,...
M,Women in prison films,0.153846,-0.008910,2.0
M,Workplace Comedy,0.141667,-0.000440,51.0
M,World cinema,0.079252,-0.000068,691.0
M,Wuxia,0.060976,-0.000335,5.0


Unnamed: 0_level_0,Unnamed: 1_level_0,prop_signif_per_gender_per_genre,avg_slope_change_magnitude_per_gender_per_genre,total_number_signif_per_gender_per_genre
gender,genre,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
F,Absurdism,0.156863,0.003611,32.0
F,Acid western,0.166667,0.001279,1.0
F,Action,0.157500,0.004378,1366.0
F,Action Comedy,0.178030,0.004563,47.0
F,Action Thrillers,0.181692,0.005683,131.0
...,...,...,...,...
M,Women in prison films,0.153846,0.012976,2.0
M,Workplace Comedy,0.141667,0.002512,51.0
M,World cinema,0.079252,0.002190,691.0
M,Wuxia,0.060976,0.000871,5.0


In [94]:
display(name_by_gender_by_genre_prop_magnitude_df['avg_slope_change_magnitude_per_gender_per_genre']['F']['Absurdism'])

display(name_by_gender_by_genre_prop_magnitude_df['avg_slope_change_magnitude_per_gender_per_genre']['M']['Absurdism'])

display(name_by_gender_by_genre_prop_magnitude_df['avg_slope_change_magnitude_per_gender_per_genre']['F'].idxmax())
display(name_by_gender_by_genre_prop_magnitude_df['avg_slope_change_magnitude_per_gender_per_genre']['M'].idxmax())


0.0036109347677933056

0.003890474545940062

'Outlaw biker film'

'Animals'