In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

df_base= pd.read_csv("datasets/dataset_clean.csv", sep=";")
df_partners=pd.read_csv("datasets/dataset_partners.csv", sep=";")
df_date = pd.read_csv("datasets/dataset_couple.csv", sep=";")

In [3]:
df_partners['Age'] = df_partners['Age'].fillna(df_partners['Age'].median())

In [4]:
df_partners['Age']=df_partners['Age'].astype('int64')

In [5]:
fig = px.box(df_partners, x="Gender", y="Age", title='Age of participants', color='Gender', width=500)
fig.show()

In [6]:
fig = px.pie(df_partners, names="Goal for the evening", title='Goal of the evening', width=700)
fig.show()

In [7]:
fig = px.histogram(df_date, x='Match', color='Match', width=500, text_auto=True, title='Nombre de matches')
fig.show()

In [8]:
fig = px.histogram(df_partners, x='Satisfaction of people met', title='Satisfaction of people met', text_auto=True, width=600)
fig.show()

In [9]:
df_base["Match"]=df_base["Match"].apply(lambda x : 1 if x=="Yes" else 0)
df_date["Match"]=df_date["Match"].apply(lambda x : 1 if x=="Yes" else 0)
df_date['Decision partner1_P2']=df_date["Decision partner1_P2"].apply(lambda x : 1 if x=="Yes" else 0)
df_date['Decision partner1_P1']=df_date["Decision partner1_P1"].apply(lambda x : 1 if x=="Yes" else 0)

In [10]:
df_male_partners = df_partners[df_partners['Gender']=="Male"]
df_female_partners = df_partners[df_partners['Gender']=="Female"]

male_results=[]
female_results=[]

cols = ['Looking for attractive (before evening)',
        'Looking for sincere (before evening)',
        'Looking for intelligent (before evening)',
        'Looking for fun (before evening)',
        'Looking for ambitious (before evening)',
        'Looking for shared interests (before evening)']

for c in cols:
    row = [c,df_male_partners[c].mean()]
    male_results.append(row)

for c in cols:
    row = [c,df_female_partners[c].mean()]
    female_results.append(row)


df_what_are_males_looking_for = pd.DataFrame(male_results, columns=['Attribute', 'Average'])

df_what_are_females_looking_for = pd.DataFrame(female_results, columns=['Attribute', 'Average'])

In [11]:
df_what_are_males_looking_for.sort_values('Average')

Unnamed: 0,Attribute,Average
4,Looking for ambitious (before evening),8.823956
5,Looking for shared interests (before evening),10.982271
1,Looking for sincere (before evening),16.3796
3,Looking for fun (before evening),17.600839
2,Looking for intelligent (before evening),19.387418
0,Looking for attractive (before evening),27.2488


In [12]:
df_what_are_females_looking_for.sort_values('Average')

Unnamed: 0,Attribute,Average
5,Looking for shared interests (before evening),12.697836
4,Looking for ambitious (before evening),12.818476
3,Looking for fun (before evening),17.299108
0,Looking for attractive (before evening),18.020372
1,Looking for sincere (before evening),18.22223
2,Looking for intelligent (before evening),20.971004


In [13]:
fig = px.histogram(df_partners, x='How many called you', text_auto=True, title="How many called after the event ?")
fig.show()

In [14]:
fig=px.histogram(df_base,width=800,x='Rating of attractive (date)',barnorm = 'percent',color='Decision partner1',text_auto=True, nbins=10, title='Swipe right vs. rating of attractiveness of the date')
fig.show()

In [15]:
fig=px.histogram(df_base,width=800,barnorm = 'percent',x='Importance of attractiveness in decision (after event)',color='Decision partner1',text_auto=True, nbins=9, title='Importance of attractiveness in decision (after the event)')
fig.show()

In [16]:
fig=px.histogram(df_date,x='Rating of fun (date)_P2',color='Decision partner1_P1', barnorm='percent', nbins=20)
fig.show()

In [17]:
fig=px.histogram(df_date,x='Rating of attractive (date)_P1',color='Decision partner1_P1', nbins=30)
fig.show()

In [18]:
fig = px.histogram(df_partners,x='Expected number of interested people', text_auto=True,color='Gender', title='Expected number of interested people by gender')
fig.show()

In [19]:
fig = px.histogram(df_base[df_base['Match']==1], x='Number of the date', title='Matches according to the number of the date of the partner')
fig.show()

In [20]:
from plotly.subplots import make_subplots

fig = go.Figure()
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]],subplot_titles=['First date', 'Last date','First date', 'Last date'])
fig.add_trace(go.Pie(labels=df_base[(df_base['Number of the date']==1)]['Partner2 Decision']),
              1, 1)
fig.add_trace(go.Pie(labels=df_base[(df_base['Number of the date']==20)]['Partner2 Decision']),
              1, 2)

fig.update_layout(
    width=800,
    title_text="Chances of getting a 'Yes' from the date in the first and last date",
    # Add annotations in the center of the donut pies.
    legend=dict(
        title=dict(
            text="Partner likes you"
        ))
    )
fig.show()

In [21]:
data_corr = df_base.corr(numeric_only=True)

In [22]:
data_corr["Match"].sort_values(ascending=False)


Match                                                              1.000000
Do you like the P2 (date)                                          0.305853
How do you like P2                                                 0.305723
Rating of fun (date)                                               0.277700
Rating of fun of P2                                                0.277545
Rating of shared interests (date)                                  0.270840
Rating of shared interests of P2                                   0.270679
Rating of attractive (date)                                        0.260837
Rating of attractiveness of P2                                     0.260676
Probability of P2 saying yes (date)                                0.255574
Prob of P2 saying yes                                              0.255531
How many called you                                                0.202762
Rating of intelligence of P2                                       0.169532
Rating of in