In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import seaborn as sns
from scipy.stats import ttest_ind

df = pd.read_csv("../Speed_dating.csv", encoding='ISO-8859-1')

In [5]:
shar_columns = ['shar1_1', 'shar1_2', 'shar1_3', 'shar7_2', 'shar7_3']
race_column = 'imprace'


In [6]:
comparison_data = []

for gender, label in zip([0, 1], ['Femmes évaluant les hommes', 'Hommes évaluant les femmes']):
    df_gender = df[df['gender'] == gender]

    # Moyennes intérêts partagés (tous moments disponibles)
    for col in shar_columns:
        if col in df_gender.columns:
            mean_val = df_gender[col].dropna().mean()
            comparison_data.append({
                "Genre": label,
                "Attribut": f"{col} (Shared Interests)",
                "Moyenne": round(mean_val, 2)
            })

    # Moyenne importance background racial
    if race_column in df_gender.columns:
        race_mean = df_gender[race_column].dropna().mean()
        comparison_data.append({
            "Genre": label,
            "Attribut": f"{race_column} (Shared Racial Background)",
            "Moyenne": round(race_mean, 2)
        })


In [7]:
import pandas as pd

comparison_df = pd.DataFrame(comparison_data)
print(comparison_df)


                         Genre                            Attribut  Moyenne
0   Femmes évaluant les hommes          shar1_1 (Shared Interests)    12.70
1   Femmes évaluant les hommes          shar1_2 (Shared Interests)    14.04
2   Femmes évaluant les hommes          shar1_3 (Shared Interests)    14.07
3   Femmes évaluant les hommes          shar7_2 (Shared Interests)    13.78
4   Femmes évaluant les hommes          shar7_3 (Shared Interests)    13.75
5   Femmes évaluant les hommes  imprace (Shared Racial Background)     4.11
6   Hommes évaluant les femmes          shar1_1 (Shared Interests)    11.00
7   Hommes évaluant les femmes          shar1_2 (Shared Interests)    11.54
8   Hommes évaluant les femmes          shar1_3 (Shared Interests)    11.01
9   Hommes évaluant les femmes          shar7_2 (Shared Interests)    10.39
10  Hommes évaluant les femmes          shar7_3 (Shared Interests)    10.30
11  Hommes évaluant les femmes  imprace (Shared Racial Background)     3.46


In [8]:
import plotly.express as px

fig = px.bar(
    comparison_df,
    x="Attribut",
    y="Moyenne",
    color="Genre",
    barmode="group",
    title="Shared Interests vs. Shared Racial Background – Moyennes par genre",
    labels={"Moyenne": "Score moyen"}
)
fig.update_layout(xaxis_tickangle=45, template="plotly_white")
fig.show()


In [9]:
import plotly.express as px

# Regrouper les attributs "shar" sous "Shared Interests", le reste est "Racial Background"
visual_df = comparison_df.copy()
visual_df["Attribut simplifié"] = visual_df["Attribut"].apply(
    lambda x: "Shared Interests" if "shar" in x else "Shared Racial Background"
)

# Calcul de la moyenne globale par genre
summary_df = visual_df.groupby(["Genre", "Attribut simplifié"]).agg({"Moyenne": "mean"}).reset_index()

# Création du graphique
fig = px.bar(
    summary_df,
    x="Genre",
    y="Moyenne",
    color="Attribut simplifié",
    barmode="group",
    title="Comparaison de l'importance : Shared Interests vs Racial Background",
    labels={"Moyenne": "Score moyen"},
    height=500
)
fig.update_layout(template="plotly_white")
fig.show()
