# Projet Speed Dating (Tinder)

Cheminement du nettoyage apporté au dataset avant de poursuivre l'analyse.

## Import de modules externes

In [None]:
import requests
import pandas as pd

from speed_dating_document_context import (
    LabelDecoder,
    pref_1_1_cols,
    pref_4_1_cols,
    pref_2_1_cols,
    pref_1_2_cols
)

from config import (
    JEDHA_SPEED_DATING_DATA_EXPLANATIONS_URL,
    LOCAL_SPEED_DATING_DATA_EXPLANATIONS_PATH,
    JEDHA_SPEED_DATING_CSV_URL,
    LOCAL_SPEED_DATING_CSV_PATH,
)

# display settings
pd.set_option('display.max_columns', None)    # Show all columns
pd.set_option('display.width', 1000)          # Expand display width
pd.set_option('display.max_colwidth', None)   # Show full content in each cell

## Chargement des données

In [12]:
# Retrieve locally explanation document
if not LOCAL_SPEED_DATING_DATA_EXPLANATIONS_PATH.exists():
    response = requests.get(JEDHA_SPEED_DATING_DATA_EXPLANATIONS_URL)

    with open(LOCAL_SPEED_DATING_DATA_EXPLANATIONS_PATH, mode="wb") as file:
        file.write(response.content)
        
if not LOCAL_SPEED_DATING_CSV_PATH.exists():
    response = requests.get(JEDHA_SPEED_DATING_CSV_URL)

    with open(LOCAL_SPEED_DATING_CSV_PATH, mode="wb") as file:
        file.write(response.content)

# Loading DataFrame
speed_dating_df = pd.read_csv(LOCAL_SPEED_DATING_CSV_PATH, encoding="ISO-8859-1")

## Simple data cleaning

### Null PID

In [13]:
waves_with_null_pid = speed_dating_df[speed_dating_df["pid"].isna()]["wave"].unique()
print("Waves where a pid is null: ", [pid.item() for pid in waves_with_null_pid])


Waves where a pid is null:  [5]


In [14]:
wave5_df = speed_dating_df[speed_dating_df["wave"] == 5]

# Retrieve list of participant without a partner
wave5_iids_with_null_pid = wave5_df[wave5_df["pid"].isna()]
iid_order_list = wave5_iids_with_null_pid[["iid", "order"]].values.tolist()

peer_dates = 0

for iid, order in iid_order_list:
    has_peer_date = len(wave5_df[(wave5_df["pid"] == iid) & (wave5_df["order"] == order)]) > 0
    if has_peer_date:
        print(f"FOUND peer date for {iid} / {order}")
        peer_dates = peer_dates + 1

print(f"Check done! Found {peer_dates} peer dates.")

Check done! Found 0 peer dates.


No peers found, we can simply remove those lines.
Maybe the expected participant decided not to replies to follow-up polls.

In [15]:
speed_dating_df.dropna(subset=["pid"], ignore_index=True, inplace=True)

speed_dating_df

Unnamed: 0,iid,id,gender,idg,condtn,wave,round,position,positin1,order,partner,pid,match,int_corr,samerace,age_o,race_o,pf_o_att,pf_o_sin,pf_o_int,pf_o_fun,pf_o_amb,pf_o_sha,dec_o,attr_o,sinc_o,intel_o,fun_o,amb_o,shar_o,like_o,prob_o,met_o,age,field,field_cd,undergra,mn_sat,tuition,race,imprace,imprelig,from,zipcode,income,goal,date,go_out,career,career_c,sports,tvsports,exercise,dining,museums,art,hiking,gaming,clubbing,reading,tv,theater,movies,concerts,music,shopping,yoga,exphappy,expnum,attr1_1,sinc1_1,intel1_1,fun1_1,amb1_1,shar1_1,attr4_1,sinc4_1,intel4_1,fun4_1,amb4_1,shar4_1,attr2_1,sinc2_1,intel2_1,fun2_1,amb2_1,shar2_1,attr3_1,sinc3_1,fun3_1,intel3_1,amb3_1,attr5_1,sinc5_1,intel5_1,fun5_1,amb5_1,dec,attr,sinc,intel,fun,amb,shar,like,prob,met,match_es,attr1_s,sinc1_s,intel1_s,fun1_s,amb1_s,shar1_s,attr3_s,sinc3_s,intel3_s,fun3_s,amb3_s,satis_2,length,numdat_2,attr7_2,sinc7_2,intel7_2,fun7_2,amb7_2,shar7_2,attr1_2,sinc1_2,intel1_2,fun1_2,amb1_2,shar1_2,attr4_2,sinc4_2,intel4_2,fun4_2,amb4_2,shar4_2,attr2_2,sinc2_2,intel2_2,fun2_2,amb2_2,shar2_2,attr3_2,sinc3_2,intel3_2,fun3_2,amb3_2,attr5_2,sinc5_2,intel5_2,fun5_2,amb5_2,you_call,them_cal,date_3,numdat_3,num_in_3,attr1_3,sinc1_3,intel1_3,fun1_3,amb1_3,shar1_3,attr7_3,sinc7_3,intel7_3,fun7_3,amb7_3,shar7_3,attr4_3,sinc4_3,intel4_3,fun4_3,amb4_3,shar4_3,attr2_3,sinc2_3,intel2_3,fun2_3,amb2_3,shar2_3,attr3_3,sinc3_3,intel3_3,fun3_3,amb3_3,attr5_3,sinc5_3,intel5_3,fun5_3,amb5_3
0,1,1.0,0,1,1,1,10,7,,4,1,11.0,0,0.14,0,27.0,2.0,35.0,20.0,20.0,20.0,0.0,5.0,0,6.0,8.0,8.0,8.0,8.0,6.0,7.0,4.0,2.0,21.0,Law,1.0,,,,4.0,2.0,4.0,Chicago,60521,69487.00,2.0,7.0,1.0,lawyer,,9.0,2.0,8.0,9.0,1.0,1.0,5.0,1.0,5.0,6.0,9.0,1.0,10.0,10.0,9.0,8.0,1.0,3.0,2.0,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,35.0,20.0,15.0,20.0,5.0,5.0,6.0,8.0,8.0,8.0,7.0,,,,,,1,6.0,9.0,7.0,7.0,6.0,5.0,7.0,6.0,2.0,4.0,,,,,,,,,,,,6.0,2.0,1.0,,,,,,,19.44,16.67,13.89,22.22,11.11,16.67,,,,,,,,,,,,,6.0,7.0,8.0,7.0,6.0,,,,,,1.0,1.0,0.0,,,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,,,,,,,,,,,,,5.0,7.0,7.0,7.0,7.0,,,,,
1,1,1.0,0,1,1,1,10,7,,3,2,12.0,0,0.54,0,22.0,2.0,60.0,0.0,0.0,40.0,0.0,0.0,0,7.0,8.0,10.0,7.0,7.0,5.0,8.0,4.0,2.0,21.0,Law,1.0,,,,4.0,2.0,4.0,Chicago,60521,69487.00,2.0,7.0,1.0,lawyer,,9.0,2.0,8.0,9.0,1.0,1.0,5.0,1.0,5.0,6.0,9.0,1.0,10.0,10.0,9.0,8.0,1.0,3.0,2.0,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,35.0,20.0,15.0,20.0,5.0,5.0,6.0,8.0,8.0,8.0,7.0,,,,,,1,7.0,8.0,7.0,8.0,5.0,6.0,7.0,5.0,1.0,4.0,,,,,,,,,,,,6.0,2.0,1.0,,,,,,,19.44,16.67,13.89,22.22,11.11,16.67,,,,,,,,,,,,,6.0,7.0,8.0,7.0,6.0,,,,,,1.0,1.0,0.0,,,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,,,,,,,,,,,,,5.0,7.0,7.0,7.0,7.0,,,,,
2,1,1.0,0,1,1,1,10,7,,10,3,13.0,1,0.16,1,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,1,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,1.0,21.0,Law,1.0,,,,4.0,2.0,4.0,Chicago,60521,69487.00,2.0,7.0,1.0,lawyer,,9.0,2.0,8.0,9.0,1.0,1.0,5.0,1.0,5.0,6.0,9.0,1.0,10.0,10.0,9.0,8.0,1.0,3.0,2.0,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,35.0,20.0,15.0,20.0,5.0,5.0,6.0,8.0,8.0,8.0,7.0,,,,,,1,5.0,8.0,9.0,8.0,5.0,7.0,7.0,,1.0,4.0,,,,,,,,,,,,6.0,2.0,1.0,,,,,,,19.44,16.67,13.89,22.22,11.11,16.67,,,,,,,,,,,,,6.0,7.0,8.0,7.0,6.0,,,,,,1.0,1.0,0.0,,,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,,,,,,,,,,,,,5.0,7.0,7.0,7.0,7.0,,,,,
3,1,1.0,0,1,1,1,10,7,,5,4,14.0,1,0.61,0,23.0,2.0,30.0,5.0,15.0,40.0,5.0,5.0,1,7.0,8.0,9.0,8.0,9.0,8.0,7.0,7.0,2.0,21.0,Law,1.0,,,,4.0,2.0,4.0,Chicago,60521,69487.00,2.0,7.0,1.0,lawyer,,9.0,2.0,8.0,9.0,1.0,1.0,5.0,1.0,5.0,6.0,9.0,1.0,10.0,10.0,9.0,8.0,1.0,3.0,2.0,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,35.0,20.0,15.0,20.0,5.0,5.0,6.0,8.0,8.0,8.0,7.0,,,,,,1,7.0,6.0,8.0,7.0,6.0,8.0,7.0,6.0,2.0,4.0,,,,,,,,,,,,6.0,2.0,1.0,,,,,,,19.44,16.67,13.89,22.22,11.11,16.67,,,,,,,,,,,,,6.0,7.0,8.0,7.0,6.0,,,,,,1.0,1.0,0.0,,,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,,,,,,,,,,,,,5.0,7.0,7.0,7.0,7.0,,,,,
4,1,1.0,0,1,1,1,10,7,,7,5,15.0,1,0.21,0,24.0,3.0,30.0,10.0,20.0,10.0,10.0,20.0,1,8.0,7.0,9.0,6.0,9.0,7.0,8.0,6.0,2.0,21.0,Law,1.0,,,,4.0,2.0,4.0,Chicago,60521,69487.00,2.0,7.0,1.0,lawyer,,9.0,2.0,8.0,9.0,1.0,1.0,5.0,1.0,5.0,6.0,9.0,1.0,10.0,10.0,9.0,8.0,1.0,3.0,2.0,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,35.0,20.0,15.0,20.0,5.0,5.0,6.0,8.0,8.0,8.0,7.0,,,,,,1,5.0,6.0,7.0,7.0,6.0,6.0,6.0,6.0,2.0,4.0,,,,,,,,,,,,6.0,2.0,1.0,,,,,,,19.44,16.67,13.89,22.22,11.11,16.67,,,,,,,,,,,,,6.0,7.0,8.0,7.0,6.0,,,,,,1.0,1.0,0.0,,,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,,,,,,,,,,,,,5.0,7.0,7.0,7.0,7.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8363,552,22.0,1,44,2,21,22,14,10.0,5,18,526.0,0,0.64,0,26.0,3.0,10.0,10.0,30.0,20.0,10.0,15.0,1,10.0,5.0,3.0,2.0,6.0,5.0,6.0,1.0,,25.0,Climate Dynamics,18.0,"Ecole Normale Suprieure, Paris",,,2.0,1.0,1.0,France,78110,,1.0,2.0,1.0,assistant master of the universe (otherwise it's too much work),15.0,8.0,2.0,5.0,10.0,10.0,10.0,7.0,1.0,9.0,8.0,3.0,7.0,9.0,10.0,10.0,7.0,3.0,10.0,,70.0,0.0,15.0,15.0,0.0,0.0,90.0,0.0,0.0,0.0,0.0,10.0,50.0,0.0,0.0,30.0,0.0,20.0,8.0,7.0,6.0,7.0,7.0,9.0,7.0,10.0,5.0,9.0,0,3.0,5.0,5.0,5.0,,,2.0,5.0,0.0,3.0,,,,,,,,,,,,5.0,1.0,2.0,70.0,0.0,15.0,10.0,0.0,5.0,70.00,0.00,15.00,10.00,0.00,5.00,80.0,0.0,5.0,5.0,0.0,10.0,50.0,5.0,10.0,20.0,5.0,10.0,9.0,3.0,7.0,6.0,9.0,9.0,3.0,9.0,4.0,7.0,2.0,0.0,0.0,,1.0,70.0,0.0,20.0,10.0,0.0,0.0,70.0,0.0,20.0,10.0,0.0,0.0,80.0,0.0,10.0,0.0,0.0,10.0,50.0,5.0,10.0,20.0,10.0,5.0,8.0,5.0,7.0,6.0,7.0,9.0,5.0,9.0,5.0,6.0
8364,552,22.0,1,44,2,21,22,13,10.0,4,19,527.0,0,0.71,0,24.0,6.0,50.0,20.0,10.0,5.0,10.0,5.0,0,6.0,3.0,7.0,3.0,7.0,2.0,2.0,2.0,2.0,25.0,Climate Dynamics,18.0,"Ecole Normale Suprieure, Paris",,,2.0,1.0,1.0,France,78110,,1.0,2.0,1.0,assistant master of the universe (otherwise it's too much work),15.0,8.0,2.0,5.0,10.0,10.0,10.0,7.0,1.0,9.0,8.0,3.0,7.0,9.0,10.0,10.0,7.0,3.0,10.0,,70.0,0.0,15.0,15.0,0.0,0.0,90.0,0.0,0.0,0.0,0.0,10.0,50.0,0.0,0.0,30.0,0.0,20.0,8.0,7.0,6.0,7.0,7.0,9.0,7.0,10.0,5.0,9.0,0,4.0,6.0,8.0,4.0,4.0,,4.0,4.0,0.0,3.0,,,,,,,,,,,,5.0,1.0,2.0,70.0,0.0,15.0,10.0,0.0,5.0,70.00,0.00,15.00,10.00,0.00,5.00,80.0,0.0,5.0,5.0,0.0,10.0,50.0,5.0,10.0,20.0,5.0,10.0,9.0,3.0,7.0,6.0,9.0,9.0,3.0,9.0,4.0,7.0,2.0,0.0,0.0,,1.0,70.0,0.0,20.0,10.0,0.0,0.0,70.0,0.0,20.0,10.0,0.0,0.0,80.0,0.0,10.0,0.0,0.0,10.0,50.0,5.0,10.0,20.0,10.0,5.0,8.0,5.0,7.0,6.0,7.0,9.0,5.0,9.0,5.0,6.0
8365,552,22.0,1,44,2,21,22,19,10.0,10,20,528.0,0,-0.46,0,29.0,3.0,40.0,10.0,30.0,10.0,10.0,,0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,25.0,Climate Dynamics,18.0,"Ecole Normale Suprieure, Paris",,,2.0,1.0,1.0,France,78110,,1.0,2.0,1.0,assistant master of the universe (otherwise it's too much work),15.0,8.0,2.0,5.0,10.0,10.0,10.0,7.0,1.0,9.0,8.0,3.0,7.0,9.0,10.0,10.0,7.0,3.0,10.0,,70.0,0.0,15.0,15.0,0.0,0.0,90.0,0.0,0.0,0.0,0.0,10.0,50.0,0.0,0.0,30.0,0.0,20.0,8.0,7.0,6.0,7.0,7.0,9.0,7.0,10.0,5.0,9.0,0,4.0,7.0,8.0,8.0,8.0,,6.0,5.0,0.0,3.0,,,,,,,,,,,,5.0,1.0,2.0,70.0,0.0,15.0,10.0,0.0,5.0,70.00,0.00,15.00,10.00,0.00,5.00,80.0,0.0,5.0,5.0,0.0,10.0,50.0,5.0,10.0,20.0,5.0,10.0,9.0,3.0,7.0,6.0,9.0,9.0,3.0,9.0,4.0,7.0,2.0,0.0,0.0,,1.0,70.0,0.0,20.0,10.0,0.0,0.0,70.0,0.0,20.0,10.0,0.0,0.0,80.0,0.0,10.0,0.0,0.0,10.0,50.0,5.0,10.0,20.0,10.0,5.0,8.0,5.0,7.0,6.0,7.0,9.0,5.0,9.0,5.0,6.0
8366,552,22.0,1,44,2,21,22,3,10.0,16,21,529.0,0,0.62,0,22.0,4.0,10.0,25.0,25.0,10.0,10.0,20.0,1,5.0,7.0,5.0,5.0,3.0,6.0,6.0,4.0,2.0,25.0,Climate Dynamics,18.0,"Ecole Normale Suprieure, Paris",,,2.0,1.0,1.0,France,78110,,1.0,2.0,1.0,assistant master of the universe (otherwise it's too much work),15.0,8.0,2.0,5.0,10.0,10.0,10.0,7.0,1.0,9.0,8.0,3.0,7.0,9.0,10.0,10.0,7.0,3.0,10.0,,70.0,0.0,15.0,15.0,0.0,0.0,90.0,0.0,0.0,0.0,0.0,10.0,50.0,0.0,0.0,30.0,0.0,20.0,8.0,7.0,6.0,7.0,7.0,9.0,7.0,10.0,5.0,9.0,0,4.0,6.0,5.0,4.0,,5.0,5.0,5.0,0.0,3.0,,,,,,,,,,,,5.0,1.0,2.0,70.0,0.0,15.0,10.0,0.0,5.0,70.00,0.00,15.00,10.00,0.00,5.00,80.0,0.0,5.0,5.0,0.0,10.0,50.0,5.0,10.0,20.0,5.0,10.0,9.0,3.0,7.0,6.0,9.0,9.0,3.0,9.0,4.0,7.0,2.0,0.0,0.0,,1.0,70.0,0.0,20.0,10.0,0.0,0.0,70.0,0.0,20.0,10.0,0.0,0.0,80.0,0.0,10.0,0.0,0.0,10.0,50.0,5.0,10.0,20.0,10.0,5.0,8.0,5.0,7.0,6.0,7.0,9.0,5.0,9.0,5.0,6.0


### Normalisation des notes

Pour les préferences dons l'échelle de notation varie, on va adapter à celle du plus grand nombre: distribution de 100 points.
Pour les vagues 6 à 9 on normalisera les valeurs des groupes suivant:
- attr1_1, sinc1_1, intel1_1, fun1_1, amb1_1, shar1_1
- attr4_1, sinc4_1, intel4_1, fun4_1, amb4_1, shar4_1
- attr2_1, sinc2_1, int2_1, fun2_1, amb2_1, shar2_1
- attr1_2, sinc1_2, intel1_2, fun1_2, amb1_2, shar1_2

In [16]:
# enforce boundaries for abnormal values (could be a global sanitization applied on all columns expecting a range)
abnormal_cols = ["attr_o", "fun_o", "gaming", "reading"]
display(speed_dating_df[abnormal_cols].max())

speed_dating_df[abnormal_cols] = speed_dating_df[abnormal_cols].clip(lower=1, upper=10)

display(speed_dating_df[abnormal_cols].max())
display(speed_dating_df.describe(include='all'))


attr_o     10.5
fun_o      11.0
gaming     14.0
reading    13.0
dtype: float64

attr_o     10.0
fun_o      10.0
gaming     10.0
reading    10.0
dtype: float64

Unnamed: 0,iid,id,gender,idg,condtn,wave,round,position,positin1,order,partner,pid,match,int_corr,samerace,age_o,race_o,pf_o_att,pf_o_sin,pf_o_int,pf_o_fun,pf_o_amb,pf_o_sha,dec_o,attr_o,sinc_o,intel_o,fun_o,amb_o,shar_o,like_o,prob_o,met_o,age,field,field_cd,undergra,mn_sat,tuition,race,imprace,imprelig,from,zipcode,income,goal,date,go_out,career,career_c,sports,tvsports,exercise,dining,museums,art,hiking,gaming,clubbing,reading,tv,theater,movies,concerts,music,shopping,yoga,exphappy,expnum,attr1_1,sinc1_1,intel1_1,fun1_1,amb1_1,shar1_1,attr4_1,sinc4_1,intel4_1,fun4_1,amb4_1,shar4_1,attr2_1,sinc2_1,intel2_1,fun2_1,amb2_1,shar2_1,attr3_1,sinc3_1,fun3_1,intel3_1,amb3_1,attr5_1,sinc5_1,intel5_1,fun5_1,amb5_1,dec,attr,sinc,intel,fun,amb,shar,like,prob,met,match_es,attr1_s,sinc1_s,intel1_s,fun1_s,amb1_s,shar1_s,attr3_s,sinc3_s,intel3_s,fun3_s,amb3_s,satis_2,length,numdat_2,attr7_2,sinc7_2,intel7_2,fun7_2,amb7_2,shar7_2,attr1_2,sinc1_2,intel1_2,fun1_2,amb1_2,shar1_2,attr4_2,sinc4_2,intel4_2,fun4_2,amb4_2,shar4_2,attr2_2,sinc2_2,intel2_2,fun2_2,amb2_2,shar2_2,attr3_2,sinc3_2,intel3_2,fun3_2,amb3_2,attr5_2,sinc5_2,intel5_2,fun5_2,amb5_2,you_call,them_cal,date_3,numdat_3,num_in_3,attr1_3,sinc1_3,intel1_3,fun1_3,amb1_3,shar1_3,attr7_3,sinc7_3,intel7_3,fun7_3,amb7_3,shar7_3,attr4_3,sinc4_3,intel4_3,fun4_3,amb4_3,shar4_3,attr2_3,sinc2_3,intel2_3,fun2_3,amb2_3,shar2_3,attr3_3,sinc3_3,intel3_3,fun3_3,amb3_3,attr5_3,sinc5_3,intel5_3,fun5_3,amb5_3
count,8368.0,8367.0,8368.0,8368.0,8368.0,8368.0,8368.0,8368.0,6532.0,8368.0,8368.0,8368.0,8368.0,8210.0,8368.0,8274.0,8305.0,8289.0,8289.0,8289.0,8280.0,8271.0,8249.0,8368.0,8166.0,8091.0,8072.0,8018.0,7656.0,7302.0,8128.0,8060.0,7993.0,8274.0,8305,8286.0,4914,3133.0,3583.0,8305.0,8289.0,8289.0,8289,7305.0,4273.0,8289.0,8271.0,8289.0,8279,8230.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8267.0,1790.0,8289.0,8289.0,8289.0,8280.0,8271.0,8249.0,6489.0,6489.0,6489.0,6489.0,6489.0,6467.0,8289.0,8289.0,8289.0,8289.0,8280.0,8280.0,8263.0,8263.0,8263.0,8263.0,8263.0,4906.0,4906.0,4906.0,4906.0,4906.0,8368.0,8166.0,8091.0,8072.0,8018.0,7656.0,7302.0,8128.0,8060.0,7993.0,7195.0,4096.0,4096.0,4096.0,4096.0,4096.0,4096.0,4000.0,4000.0,4000.0,4000.0,4000.0,7454.0,7454.0,7424.0,1984.0,1955.0,1984.0,1984.0,1955.0,1974.0,7436.0,7454.0,7454.0,7454.0,7454.0,7454.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,7454.0,7454.0,7454.0,7454.0,7454.0,4377.0,4377.0,4377.0,4377.0,4377.0,3969.0,3969.0,3969.0,1496.0,666.0,3969.0,3969.0,3969.0,3969.0,3969.0,3969.0,2016.0,2016.0,2016.0,2016.0,2016.0,2016.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2016.0,3969.0,3969.0,3969.0,3969.0,3969.0,2016.0,2016.0,2016.0,2016.0,2016.0
unique,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,259,,241,68.0,115.0,,,,269,409.0,261.0,,,,367,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
top,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Business,,UC Berkeley,1400.0,26908.0,,,,New York,0.0,55080.0,,,,Finance,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
freq,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,521,,107,403.0,241.0,,,,521,355.0,123.0,,,,202,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
mean,283.863767,8.964384,0.5,17.335325,1.829828,11.358509,16.880258,9.048757,9.295775,8.931764,8.965942,283.863767,0.164914,0.1963,0.396272,26.364999,2.756653,22.495347,17.396867,20.270759,17.459714,10.685375,11.84593,0.420053,6.19133,7.175256,7.369301,6.40222,6.778409,5.47487,6.134498,5.208251,1.960215,26.364999,,7.665339,,,,2.756653,3.785981,3.653879,,,,2.121848,5.007375,2.158523,,5.277886,6.424418,4.574255,6.2443,7.783448,6.987453,6.716371,5.737483,3.850042,5.745325,7.6616,5.304379,6.77814,7.920618,6.824587,7.850887,5.631077,4.338038,5.534051,5.560335,22.495347,17.396867,20.270759,17.459714,10.685375,11.84593,26.39436,11.071506,12.636308,15.566805,9.780089,11.014845,30.361422,13.264852,14.417394,18.427955,11.744583,11.855626,7.084352,8.295776,7.704345,8.404454,7.577877,6.941908,7.927232,8.284346,7.426213,7.617611,0.420053,6.19035,7.175256,7.369301,6.400474,6.778409,5.47487,6.134498,5.208251,0.947579,3.206991,20.791624,15.434255,17.243708,15.260869,11.144619,12.457925,7.21125,8.082,8.25775,7.6925,7.58925,5.711833,1.843708,2.337823,32.819556,13.529923,15.293851,18.868448,7.286957,12.156028,26.197823,15.872299,17.817152,17.656897,9.918161,12.762388,26.806234,11.929177,12.10303,15.16381,9.342511,11.320866,29.344369,13.89823,13.958265,17.967233,11.909735,12.887976,7.124497,7.931983,8.238664,7.601959,7.486048,6.827964,7.394106,7.838702,7.279415,7.332191,0.781053,0.982111,0.376921,1.230615,0.935435,24.364853,16.588065,19.416904,16.237488,10.902986,12.705062,31.330357,15.654266,16.679563,16.418155,7.823909,12.207837,25.610341,10.751267,11.524839,14.276783,9.207503,11.253802,24.970936,10.923285,11.952687,14.959108,9.526191,11.96627,7.239859,8.093474,8.389771,7.658856,7.391786,6.81002,7.615079,7.93254,7.155258,7.048611
std,158.584899,5.492409,0.50003,10.94329,0.375806,5.995461,4.354579,5.515476,5.650199,5.478099,5.493929,158.584899,0.371125,0.303522,0.489151,3.563648,1.230689,12.569802,7.044003,6.782895,6.085526,6.126544,6.362746,0.493597,1.947317,1.740575,1.550501,1.948535,1.79408,2.156163,1.841258,2.129354,0.245925,3.563648,,3.759695,,,,1.230689,2.846506,2.805641,,,,1.406983,1.444439,1.105428,,3.309885,2.619419,2.801521,2.419612,1.755105,2.0516,2.262518,2.570595,2.492039,2.502831,1.969231,2.52958,2.233341,1.700264,2.156155,1.792227,2.609472,2.717712,1.733627,4.75251,12.569802,7.044003,6.782895,6.085526,6.126544,6.362746,16.297045,6.659233,6.717476,7.328256,6.998428,6.06015,16.252894,6.964893,6.265235,6.577442,6.888958,6.169747,1.395527,1.407226,1.564041,1.076268,1.778718,1.498653,1.627054,1.283657,1.779129,1.773094,0.493597,1.950178,1.740575,1.550501,1.953816,1.79408,2.156163,1.841258,2.129354,0.989878,2.444377,12.968524,6.915322,6.59642,5.356969,5.514028,5.921789,1.41545,1.455741,1.179317,1.626839,1.793136,1.82161,0.975682,0.63134,17.15527,7.977482,7.292868,8.535963,6.125187,8.241906,14.371856,6.657147,6.535516,6.124525,5.675667,6.652739,16.402836,6.401556,5.990607,7.290107,5.856329,6.296155,14.551171,6.17169,5.398621,6.100307,6.313281,5.615691,1.371758,1.503184,1.18039,1.548276,1.74498,1.411096,1.588145,1.280936,1.647478,1.521854,1.612446,1.382652,0.484676,1.294557,0.75416,13.704025,7.473194,6.124299,5.163801,5.90215,6.55751,17.55154,9.336288,7.880088,7.231325,6.100502,8.615985,17.477134,5.740351,6.004222,6.927869,6.385852,6.516178,17.007669,6.226283,7.01065,7.935509,6.403117,7.012067,1.576162,1.609442,1.458259,1.744381,1.96169,1.507341,1.504551,1.340868,1.672787,1.717988
min,1.0,1.0,0.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,0.0,-0.83,0.0,18.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,18.0,,1.0,,,,1.0,0.0,1.0,,,,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,2.0,2.0,1.0,3.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,3.0,1.0,4.0,3.0,2.0,1.0,1.0,1.0,10.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,2.0,1.0,2.0,2.0,4.0,1.0,1.0
25%,154.0,4.0,0.0,8.0,2.0,7.0,14.0,4.0,4.0,4.0,4.0,154.0,0.0,-0.01,0.0,24.0,2.0,15.0,15.0,17.39,15.0,5.0,9.52,0.0,5.0,6.0,6.0,5.0,6.0,4.0,5.0,4.0,2.0,24.0,,5.0,,,,2.0,1.0,1.0,,,,1.0,4.0,1.0,,2.0,4.0,2.0,5.0,7.0,6.0,5.0,4.0,2.0,4.0,7.0,3.0,5.0,7.0,5.0,7.0,4.0,2.0,5.0,2.0,15.0,15.0,17.39,15.0,5.0,9.52,10.0,6.0,8.0,10.0,5.0,7.0,20.0,10.0,10.0,15.0,6.0,10.0,6.0,8.0,7.0,8.0,7.0,6.0,7.0,8.0,6.0,7.0,0.0,5.0,6.0,6.0,5.0,6.0,4.0,5.0,4.0,0.0,2.0,14.81,10.0,10.0,10.0,7.0,9.0,7.0,7.0,8.0,7.0,7.0,5.0,1.0,2.0,20.0,10.0,10.0,10.0,0.0,5.0,16.67,10.0,15.0,15.0,5.0,10.0,10.0,8.0,8.0,9.0,5.0,7.0,19.15,10.0,10.0,15.0,10.0,10.0,6.25,7.0,8.0,7.0,7.0,6.0,6.0,7.0,6.0,6.0,0.0,0.0,0.0,1.0,1.0,15.22,10.0,16.67,14.81,5.0,10.0,20.0,10.0,10.0,10.0,0.0,5.0,10.0,7.0,7.0,9.0,5.0,7.0,10.0,7.0,7.0,9.0,6.0,5.0,7.0,7.0,8.0,7.0,6.0,6.0,7.0,7.0,6.0,6.0
50%,281.0,8.0,0.5,16.0,2.0,11.0,18.0,8.0,9.0,8.0,8.0,281.0,0.0,0.21,0.0,26.0,2.0,20.0,18.37,20.0,18.0,10.0,10.64,0.0,6.0,7.0,7.0,7.0,7.0,6.0,6.0,5.0,2.0,26.0,,8.0,,,,2.0,3.0,3.0,,,,2.0,5.0,2.0,,6.0,7.0,4.0,6.0,8.0,7.0,7.0,6.0,3.0,6.0,8.0,6.0,7.0,8.0,7.0,8.0,6.0,4.0,6.0,4.0,20.0,18.37,20.0,18.0,10.0,10.64,25.0,10.0,10.0,15.0,10.0,10.0,25.0,15.0,15.0,20.0,10.0,10.0,7.0,8.0,8.0,8.0,8.0,7.0,8.0,8.0,8.0,8.0,0.0,6.0,7.0,7.0,7.0,7.0,6.0,6.0,5.0,0.0,3.0,17.65,15.79,18.42,15.91,10.0,12.5,7.0,8.0,8.0,8.0,8.0,6.0,1.0,2.0,30.0,10.0,15.0,20.0,5.0,10.0,20.0,16.67,19.05,18.37,10.0,13.0,25.0,10.0,10.0,15.0,10.0,10.0,25.0,15.0,15.0,18.52,10.0,13.95,7.0,8.0,8.0,8.0,8.0,7.0,8.0,8.0,7.0,7.0,0.0,1.0,0.0,1.0,1.0,20.0,16.67,20.0,16.33,10.0,14.29,25.0,15.0,18.0,17.0,10.0,10.0,20.0,10.0,10.0,12.0,9.0,10.0,20.0,10.0,10.0,15.0,10.0,10.0,7.0,8.0,8.0,8.0,8.0,7.0,8.0,8.0,7.0,7.0
75%,408.0,13.0,1.0,26.0,2.0,15.0,20.0,13.0,14.0,13.0,13.0,408.0,0.0,0.43,1.0,28.0,4.0,25.0,20.0,23.81,20.0,15.0,16.0,1.0,8.0,8.0,8.0,8.0,8.0,7.0,7.0,7.0,2.0,28.0,,10.0,,,,4.0,6.0,6.0,,,,2.0,6.0,3.0,,7.0,9.0,7.0,8.0,9.0,9.0,8.0,8.0,6.0,8.0,9.0,7.0,9.0,9.0,8.0,9.0,8.0,7.0,7.0,8.0,25.0,20.0,23.81,20.0,15.0,16.0,35.0,15.0,16.0,20.0,15.0,15.0,40.0,18.75,20.0,20.0,15.0,15.63,8.0,9.0,9.0,9.0,9.0,8.0,9.0,9.0,9.0,9.0,1.0,8.0,8.0,8.0,8.0,8.0,7.0,7.0,7.0,2.0,4.0,25.0,20.0,20.0,20.0,15.0,16.28,8.0,9.0,9.0,9.0,9.0,7.0,3.0,3.0,40.0,20.0,20.0,24.0,10.0,20.0,30.0,20.0,20.0,20.0,15.0,16.67,40.0,15.0,15.0,20.0,10.0,15.0,38.46,19.23,17.39,20.0,15.09,16.515,8.0,9.0,9.0,9.0,9.0,8.0,8.0,9.0,8.0,8.0,1.0,1.0,1.0,1.0,1.0,30.0,20.0,20.0,20.0,15.0,16.67,40.0,20.0,20.0,20.0,10.0,20.0,37.0,15.0,15.0,20.0,10.0,15.0,35.0,15.0,15.0,20.0,10.0,15.0,8.0,9.0,9.0,9.0,9.0,8.0,9.0,9.0,8.0,8.0


In [17]:
def nomalize_wave_preferences(row):
    # For all batch of preferences, rescale values to have sum() == 100
    for columns in [pref_1_1_cols, pref_4_1_cols, pref_2_1_cols, pref_1_2_cols]:
        preferences = row[columns]

        pref_total = preferences.sum()
        
        if pref_total > 0:
            # rescale pref_total -> 100 
            def normalize_pref(p):
                if p is None:
                    return None

                return ((p / pref_total) * 100)

            row[columns] = preferences.apply(normalize_pref)

    return row

waves_6_9_mask = speed_dating_df['wave'].between(6,9, inclusive='both')

speed_dating_df[waves_6_9_mask] = speed_dating_df[waves_6_9_mask].apply(nomalize_wave_preferences, axis=1)

print("Normalized:")
display(speed_dating_df.describe(include='all'))

Normalized:


Unnamed: 0,iid,id,gender,idg,condtn,wave,round,position,positin1,order,partner,pid,match,int_corr,samerace,age_o,race_o,pf_o_att,pf_o_sin,pf_o_int,pf_o_fun,pf_o_amb,pf_o_sha,dec_o,attr_o,sinc_o,intel_o,fun_o,amb_o,shar_o,like_o,prob_o,met_o,age,field,field_cd,undergra,mn_sat,tuition,race,imprace,imprelig,from,zipcode,income,goal,date,go_out,career,career_c,sports,tvsports,exercise,dining,museums,art,hiking,gaming,clubbing,reading,tv,theater,movies,concerts,music,shopping,yoga,exphappy,expnum,attr1_1,sinc1_1,intel1_1,fun1_1,amb1_1,shar1_1,attr4_1,sinc4_1,intel4_1,fun4_1,amb4_1,shar4_1,attr2_1,sinc2_1,intel2_1,fun2_1,amb2_1,shar2_1,attr3_1,sinc3_1,fun3_1,intel3_1,amb3_1,attr5_1,sinc5_1,intel5_1,fun5_1,amb5_1,dec,attr,sinc,intel,fun,amb,shar,like,prob,met,match_es,attr1_s,sinc1_s,intel1_s,fun1_s,amb1_s,shar1_s,attr3_s,sinc3_s,intel3_s,fun3_s,amb3_s,satis_2,length,numdat_2,attr7_2,sinc7_2,intel7_2,fun7_2,amb7_2,shar7_2,attr1_2,sinc1_2,intel1_2,fun1_2,amb1_2,shar1_2,attr4_2,sinc4_2,intel4_2,fun4_2,amb4_2,shar4_2,attr2_2,sinc2_2,intel2_2,fun2_2,amb2_2,shar2_2,attr3_2,sinc3_2,intel3_2,fun3_2,amb3_2,attr5_2,sinc5_2,intel5_2,fun5_2,amb5_2,you_call,them_cal,date_3,numdat_3,num_in_3,attr1_3,sinc1_3,intel1_3,fun1_3,amb1_3,shar1_3,attr7_3,sinc7_3,intel7_3,fun7_3,amb7_3,shar7_3,attr4_3,sinc4_3,intel4_3,fun4_3,amb4_3,shar4_3,attr2_3,sinc2_3,intel2_3,fun2_3,amb2_3,shar2_3,attr3_3,sinc3_3,intel3_3,fun3_3,amb3_3,attr5_3,sinc5_3,intel5_3,fun5_3,amb5_3
count,8368.0,8367.0,8368.0,8368.0,8368.0,8368.0,8368.0,8368.0,6532.0,8368.0,8368.0,8368.0,8368.0,8210.0,8368.0,8274.0,8305.0,8289.0,8289.0,8289.0,8280.0,8271.0,8249.0,8368.0,8166.0,8091.0,8072.0,8018.0,7656.0,7302.0,8128.0,8060.0,7993.0,8274.0,8305,8286.0,4914,3133.0,3583.0,8305.0,8289.0,8289.0,8289,7305.0,4273.0,8289.0,8271.0,8289.0,8279,8230.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8289.0,8267.0,1790.0,8289.0,8289.0,8289.0,8280.0,8271.0,8249.0,6489.0,6489.0,6489.0,6489.0,6489.0,6467.0,8289.0,8289.0,8289.0,8289.0,8280.0,8280.0,8263.0,8263.0,8263.0,8263.0,8263.0,4906.0,4906.0,4906.0,4906.0,4906.0,8368.0,8166.0,8091.0,8072.0,8018.0,7656.0,7302.0,8128.0,8060.0,7993.0,7195.0,4096.0,4096.0,4096.0,4096.0,4096.0,4096.0,4000.0,4000.0,4000.0,4000.0,4000.0,7454.0,7454.0,7424.0,1984.0,1955.0,1984.0,1984.0,1955.0,1974.0,7436.0,7454.0,7454.0,7454.0,7454.0,7454.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,5775.0,7454.0,7454.0,7454.0,7454.0,7454.0,4377.0,4377.0,4377.0,4377.0,4377.0,3969.0,3969.0,3969.0,1496.0,666.0,3969.0,3969.0,3969.0,3969.0,3969.0,3969.0,2016.0,2016.0,2016.0,2016.0,2016.0,2016.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2959.0,2016.0,3969.0,3969.0,3969.0,3969.0,3969.0,2016.0,2016.0,2016.0,2016.0,2016.0
unique,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,259,,241,68.0,115.0,,,,269,409.0,261.0,,,,367,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
top,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Business,,UC Berkeley,1400.0,26908.0,,,,New York,0.0,55080.0,,,,Finance,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
freq,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,521,,107,403.0,241.0,,,,521,355.0,123.0,,,,202,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
mean,283.863767,8.964384,0.5,17.335325,1.829828,11.358509,16.880258,9.048757,9.295775,8.931764,8.965942,283.863767,0.164914,0.1963,0.396272,26.364999,2.756653,22.495347,17.396867,20.270759,17.459714,10.685375,11.84593,0.420053,6.19133,7.175256,7.369301,6.40222,6.778409,5.47487,6.134498,5.208251,1.960215,26.364999,,7.665339,,,,2.756653,3.785981,3.653879,,,,2.121848,5.007375,2.158523,,5.277886,6.424418,4.574255,6.2443,7.783448,6.987453,6.716371,5.737483,3.850042,5.745325,7.6616,5.304379,6.77814,7.920618,6.824587,7.850887,5.631077,4.338038,5.534051,5.560335,22.495286,17.396816,20.270701,17.459657,10.685318,11.845878,29.169494,13.21319,14.704961,18.077304,11.666339,13.095526,30.36128,13.264729,14.41728,18.427822,11.744474,11.855521,7.084352,8.295776,7.704345,8.404454,7.577877,6.941908,7.927232,8.284346,7.426213,7.617611,0.420053,6.19035,7.175256,7.369301,6.400474,6.778409,5.47487,6.134498,5.208251,0.947579,3.206991,20.791624,15.434255,17.243708,15.260869,11.144619,12.457925,7.21125,8.082,8.25775,7.6925,7.58925,5.711833,1.843708,2.337823,32.819556,13.529923,15.293851,18.868448,7.286957,12.156028,26.197743,15.872213,17.817057,17.656804,9.918071,12.762308,26.806234,11.929177,12.10303,15.16381,9.342511,11.320866,29.344369,13.89823,13.958265,17.967233,11.909735,12.887976,7.124497,7.931983,8.238664,7.601959,7.486048,6.827964,7.394106,7.838702,7.279415,7.332191,0.781053,0.982111,0.376921,1.230615,0.935435,24.364853,16.588065,19.416904,16.237488,10.902986,12.705062,31.330357,15.654266,16.679563,16.418155,7.823909,12.207837,25.610341,10.751267,11.524839,14.276783,9.207503,11.253802,24.970936,10.923285,11.952687,14.959108,9.526191,11.96627,7.239859,8.093474,8.389771,7.658856,7.391786,6.81002,7.615079,7.93254,7.155258,7.048611
std,158.584899,5.492409,0.50003,10.94329,0.375806,5.995461,4.354579,5.515476,5.650199,5.478099,5.493929,158.584899,0.371125,0.303522,0.489151,3.563648,1.230689,12.569802,7.044003,6.782895,6.085526,6.126544,6.362746,0.493597,1.947317,1.740575,1.550501,1.948535,1.79408,2.156163,1.841258,2.129354,0.245925,3.563648,,3.759695,,,,1.230689,2.846506,2.805641,,,,1.406983,1.444439,1.105428,,3.309885,2.619419,2.801521,2.419612,1.755105,2.0516,2.262518,2.570595,2.492039,2.502831,1.969231,2.52958,2.233341,1.700264,2.156155,1.792227,2.609472,2.717712,1.733627,4.75251,12.569823,7.044018,6.782919,6.085528,6.126475,6.36271,14.007675,6.556884,6.014907,6.15581,7.090767,5.92559,16.252975,6.964819,6.265198,6.577423,6.888882,6.169699,1.395527,1.407226,1.564041,1.076268,1.778718,1.498653,1.627054,1.283657,1.779129,1.773094,0.493597,1.950178,1.740575,1.550501,1.953816,1.79408,2.156163,1.841258,2.129354,0.989878,2.444377,12.968524,6.915322,6.59642,5.356969,5.514028,5.921789,1.41545,1.455741,1.179317,1.626839,1.793136,1.82161,0.975682,0.63134,17.15527,7.977482,7.292868,8.535963,6.125187,8.241906,14.371919,6.657137,6.535511,6.124522,5.675565,6.652701,16.402836,6.401556,5.990607,7.290107,5.856329,6.296155,14.551171,6.17169,5.398621,6.100307,6.313281,5.615691,1.371758,1.503184,1.18039,1.548276,1.74498,1.411096,1.588145,1.280936,1.647478,1.521854,1.612446,1.382652,0.484676,1.294557,0.75416,13.704025,7.473194,6.124299,5.163801,5.90215,6.55751,17.55154,9.336288,7.880088,7.231325,6.100502,8.615985,17.477134,5.740351,6.004222,6.927869,6.385852,6.516178,17.007669,6.226283,7.01065,7.935509,6.403117,7.012067,1.576162,1.609442,1.458259,1.744381,1.96169,1.507341,1.504551,1.340868,1.672787,1.717988
min,1.0,1.0,0.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,0.0,-0.83,0.0,18.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,18.0,,1.0,,,,1.0,0.0,1.0,,,,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,2.0,2.0,1.0,3.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,3.0,1.0,4.0,3.0,2.0,1.0,1.0,1.0,10.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,2.0,1.0,2.0,2.0,4.0,1.0,1.0
25%,154.0,4.0,0.0,8.0,2.0,7.0,14.0,4.0,4.0,4.0,4.0,154.0,0.0,-0.01,0.0,24.0,2.0,15.0,15.0,17.39,15.0,5.0,9.52,0.0,5.0,6.0,6.0,5.0,6.0,4.0,5.0,4.0,2.0,24.0,,5.0,,,,2.0,1.0,1.0,,,,1.0,4.0,1.0,,2.0,4.0,2.0,5.0,7.0,6.0,5.0,4.0,2.0,4.0,7.0,3.0,5.0,7.0,5.0,7.0,4.0,2.0,5.0,2.0,15.0,15.0,17.39,15.0,5.0,9.519048,20.0,10.0,10.0,15.0,5.555556,10.0,20.0,10.0,10.0,15.0,6.0,10.0,6.0,8.0,7.0,8.0,7.0,6.0,7.0,8.0,6.0,7.0,0.0,5.0,6.0,6.0,5.0,6.0,4.0,5.0,4.0,0.0,2.0,14.81,10.0,10.0,10.0,7.0,9.0,7.0,7.0,8.0,7.0,7.0,5.0,1.0,2.0,20.0,10.0,10.0,10.0,0.0,5.0,16.67,10.0,15.0,15.0,5.0,10.0,10.0,8.0,8.0,9.0,5.0,7.0,19.15,10.0,10.0,15.0,10.0,10.0,6.25,7.0,8.0,7.0,7.0,6.0,6.0,7.0,6.0,6.0,0.0,0.0,0.0,1.0,1.0,15.22,10.0,16.67,14.81,5.0,10.0,20.0,10.0,10.0,10.0,0.0,5.0,10.0,7.0,7.0,9.0,5.0,7.0,10.0,7.0,7.0,9.0,6.0,5.0,7.0,7.0,8.0,7.0,6.0,6.0,7.0,7.0,6.0,6.0
50%,281.0,8.0,0.5,16.0,2.0,11.0,18.0,8.0,9.0,8.0,8.0,281.0,0.0,0.21,0.0,26.0,2.0,20.0,18.37,20.0,18.0,10.0,10.64,0.0,6.0,7.0,7.0,7.0,7.0,6.0,6.0,5.0,2.0,26.0,,8.0,,,,2.0,3.0,3.0,,,,2.0,5.0,2.0,,6.0,7.0,4.0,6.0,8.0,7.0,7.0,6.0,3.0,6.0,8.0,6.0,7.0,8.0,7.0,8.0,6.0,4.0,6.0,4.0,20.0,18.366327,20.0,18.0,10.0,10.64,25.0,14.0,15.0,19.512195,10.0,14.0,25.0,15.0,15.0,20.0,10.0,10.0,7.0,8.0,8.0,8.0,8.0,7.0,8.0,8.0,8.0,8.0,0.0,6.0,7.0,7.0,7.0,7.0,6.0,6.0,5.0,0.0,3.0,17.65,15.79,18.42,15.91,10.0,12.5,7.0,8.0,8.0,8.0,8.0,6.0,1.0,2.0,30.0,10.0,15.0,20.0,5.0,10.0,20.0,16.666667,19.046191,18.366327,10.0,13.0,25.0,10.0,10.0,15.0,10.0,10.0,25.0,15.0,15.0,18.52,10.0,13.95,7.0,8.0,8.0,8.0,8.0,7.0,8.0,8.0,7.0,7.0,0.0,1.0,0.0,1.0,1.0,20.0,16.67,20.0,16.33,10.0,14.29,25.0,15.0,18.0,17.0,10.0,10.0,20.0,10.0,10.0,12.0,9.0,10.0,20.0,10.0,10.0,15.0,10.0,10.0,7.0,8.0,8.0,8.0,8.0,7.0,8.0,8.0,7.0,7.0
75%,408.0,13.0,1.0,26.0,2.0,15.0,20.0,13.0,14.0,13.0,13.0,408.0,0.0,0.43,1.0,28.0,4.0,25.0,20.0,23.81,20.0,15.0,16.0,1.0,8.0,8.0,8.0,8.0,8.0,7.0,7.0,7.0,2.0,28.0,,10.0,,,,4.0,6.0,6.0,,,,2.0,6.0,3.0,,7.0,9.0,7.0,8.0,9.0,9.0,8.0,8.0,6.0,8.0,9.0,7.0,9.0,9.0,8.0,9.0,8.0,7.0,7.0,8.0,25.0,20.0,23.81,20.0,15.0,16.0,35.0,17.5,18.918919,20.0,15.217391,16.981132,40.0,18.75,20.0,20.0,15.0,15.626875,8.0,9.0,9.0,9.0,9.0,8.0,9.0,9.0,9.0,9.0,1.0,8.0,8.0,8.0,8.0,8.0,7.0,7.0,7.0,2.0,4.0,25.0,20.0,20.0,20.0,15.0,16.28,8.0,9.0,9.0,9.0,9.0,7.0,3.0,3.0,40.0,20.0,20.0,24.0,10.0,20.0,30.0,20.0,20.0,20.0,15.0,16.671667,40.0,15.0,15.0,20.0,10.0,15.0,38.46,19.23,17.39,20.0,15.09,16.515,8.0,9.0,9.0,9.0,9.0,8.0,8.0,9.0,8.0,8.0,1.0,1.0,1.0,1.0,1.0,30.0,20.0,20.0,20.0,15.0,16.67,40.0,20.0,20.0,20.0,10.0,20.0,37.0,15.0,15.0,20.0,10.0,15.0,35.0,15.0,15.0,20.0,10.0,15.0,8.0,9.0,9.0,9.0,9.0,8.0,9.0,9.0,8.0,8.0


### Typage des données

In [18]:
# When numeric indexes, categorical columns can be converted to int
id_columns = ["iid", "id", "idg", "wave", "position", "positin1", "order", "partner", "pid"]
categorical_columns = ["gender", "condtn", "match", "dec", "dec_o", "samerace", "race", "race_o",
                       "field_cd", "goal", "go_out", "career_c", "date"]

speed_dating_df[id_columns] = speed_dating_df[id_columns].astype('Int64')
speed_dating_df[categorical_columns] = speed_dating_df[categorical_columns].astype('Int64')

speed_dating_df["gender_label"] = speed_dating_df["gender"].map(LabelDecoder.get_gender_label)

### Amélioration de la lisibilité du dataset
- Labels des variables catégorielles

In [19]:
# Gender
speed_dating_df["gender_label"] = speed_dating_df["gender"].map(LabelDecoder.get_gender_label)

# Race (race, race_o) Other = 6
speed_dating_df.fillna({"race": 6}, inplace=True)
speed_dating_df["race_label"] = speed_dating_df["race"].map(LabelDecoder.get_race_label)

speed_dating_df.fillna({"race_o": 6}, inplace=True)
speed_dating_df["race_label_o"] = speed_dating_df["race_o"].map(LabelDecoder.get_race_label)

# # Goal (goal) Other = 6
speed_dating_df.fillna({"goal": 6}, inplace=True)
speed_dating_df["goal_label"] = speed_dating_df["goal"].map(LabelDecoder.get_goal_label)

# # Field Coded (field_cd) Other = 18
speed_dating_df.fillna({"field_cd": 18}, inplace=True)
speed_dating_df["field_cd_label"] = speed_dating_df["field_cd"].map(LabelDecoder.get_field_cd_label)

# Career (career_c) Other = 15
speed_dating_df.fillna({"career_c": 15}, inplace=True)
speed_dating_df["career_c_label"] = speed_dating_df["career_c"].map(LabelDecoder.get_career_label)

In [20]:
display(speed_dating_df)

Unnamed: 0,iid,id,gender,idg,condtn,wave,round,position,positin1,order,partner,pid,match,int_corr,samerace,age_o,race_o,pf_o_att,pf_o_sin,pf_o_int,pf_o_fun,pf_o_amb,pf_o_sha,dec_o,attr_o,sinc_o,intel_o,fun_o,amb_o,shar_o,like_o,prob_o,met_o,age,field,field_cd,undergra,mn_sat,tuition,race,imprace,imprelig,from,zipcode,income,goal,date,go_out,career,career_c,sports,tvsports,exercise,dining,museums,art,hiking,gaming,clubbing,reading,tv,theater,movies,concerts,music,shopping,yoga,exphappy,expnum,attr1_1,sinc1_1,intel1_1,fun1_1,amb1_1,shar1_1,attr4_1,sinc4_1,intel4_1,fun4_1,amb4_1,shar4_1,attr2_1,sinc2_1,intel2_1,fun2_1,amb2_1,shar2_1,attr3_1,sinc3_1,fun3_1,intel3_1,amb3_1,attr5_1,sinc5_1,intel5_1,fun5_1,amb5_1,dec,attr,sinc,intel,fun,amb,shar,like,prob,met,match_es,attr1_s,sinc1_s,intel1_s,fun1_s,amb1_s,shar1_s,attr3_s,sinc3_s,intel3_s,fun3_s,amb3_s,satis_2,length,numdat_2,attr7_2,sinc7_2,intel7_2,fun7_2,amb7_2,shar7_2,attr1_2,sinc1_2,intel1_2,fun1_2,amb1_2,shar1_2,attr4_2,sinc4_2,intel4_2,fun4_2,amb4_2,shar4_2,attr2_2,sinc2_2,intel2_2,fun2_2,amb2_2,shar2_2,attr3_2,sinc3_2,intel3_2,fun3_2,amb3_2,attr5_2,sinc5_2,intel5_2,fun5_2,amb5_2,you_call,them_cal,date_3,numdat_3,num_in_3,attr1_3,sinc1_3,intel1_3,fun1_3,amb1_3,shar1_3,attr7_3,sinc7_3,intel7_3,fun7_3,amb7_3,shar7_3,attr4_3,sinc4_3,intel4_3,fun4_3,amb4_3,shar4_3,attr2_3,sinc2_3,intel2_3,fun2_3,amb2_3,shar2_3,attr3_3,sinc3_3,intel3_3,fun3_3,amb3_3,attr5_3,sinc5_3,intel5_3,fun5_3,amb5_3,gender_label,race_label,race_label_o,goal_label,field_cd_label,career_c_label
0,1,1,0,1,1,1,10,7,,4,1,11,0,0.14,0,27.0,2,35.0,20.0,20.0,20.0,0.0,5.0,0,6.0,8.0,8.0,8.0,8.0,6.0,7.0,4.0,2.0,21.0,Law,1,,,,4,2.0,4.0,Chicago,60521,69487.00,2,7,1,lawyer,15,9.0,2.0,8.0,9.0,1.0,1.0,5.0,1.0,5.0,6.0,9.0,1.0,10.0,10.0,9.0,8.0,1.0,3.0,2.0,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,35.0,20.0,15.0,20.0,5.0,5.0,6.0,8.0,8.0,8.0,7.0,,,,,,1,6.0,9.0,7.0,7.0,6.0,5.0,7.0,6.0,2.0,4.0,,,,,,,,,,,,6.0,2.0,1.0,,,,,,,19.44,16.67,13.89,22.22,11.11,16.67,,,,,,,,,,,,,6.0,7.0,8.0,7.0,6.0,,,,,,1.0,1.0,0.0,,,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,,,,,,,,,,,,,5.0,7.0,7.0,7.0,7.0,,,,,,Woman,Asian/Pacific Islander/Asian-American,European/Caucasian-American,To meet new people,Law,Other
1,1,1,0,1,1,1,10,7,,3,2,12,0,0.54,0,22.0,2,60.0,0.0,0.0,40.0,0.0,0.0,0,7.0,8.0,10.0,7.0,7.0,5.0,8.0,4.0,2.0,21.0,Law,1,,,,4,2.0,4.0,Chicago,60521,69487.00,2,7,1,lawyer,15,9.0,2.0,8.0,9.0,1.0,1.0,5.0,1.0,5.0,6.0,9.0,1.0,10.0,10.0,9.0,8.0,1.0,3.0,2.0,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,35.0,20.0,15.0,20.0,5.0,5.0,6.0,8.0,8.0,8.0,7.0,,,,,,1,7.0,8.0,7.0,8.0,5.0,6.0,7.0,5.0,1.0,4.0,,,,,,,,,,,,6.0,2.0,1.0,,,,,,,19.44,16.67,13.89,22.22,11.11,16.67,,,,,,,,,,,,,6.0,7.0,8.0,7.0,6.0,,,,,,1.0,1.0,0.0,,,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,,,,,,,,,,,,,5.0,7.0,7.0,7.0,7.0,,,,,,Woman,Asian/Pacific Islander/Asian-American,European/Caucasian-American,To meet new people,Law,Other
2,1,1,0,1,1,1,10,7,,10,3,13,1,0.16,1,22.0,4,19.0,18.0,19.0,18.0,14.0,12.0,1,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,1.0,21.0,Law,1,,,,4,2.0,4.0,Chicago,60521,69487.00,2,7,1,lawyer,15,9.0,2.0,8.0,9.0,1.0,1.0,5.0,1.0,5.0,6.0,9.0,1.0,10.0,10.0,9.0,8.0,1.0,3.0,2.0,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,35.0,20.0,15.0,20.0,5.0,5.0,6.0,8.0,8.0,8.0,7.0,,,,,,1,5.0,8.0,9.0,8.0,5.0,7.0,7.0,,1.0,4.0,,,,,,,,,,,,6.0,2.0,1.0,,,,,,,19.44,16.67,13.89,22.22,11.11,16.67,,,,,,,,,,,,,6.0,7.0,8.0,7.0,6.0,,,,,,1.0,1.0,0.0,,,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,,,,,,,,,,,,,5.0,7.0,7.0,7.0,7.0,,,,,,Woman,Asian/Pacific Islander/Asian-American,Asian/Pacific Islander/Asian-American,To meet new people,Law,Other
3,1,1,0,1,1,1,10,7,,5,4,14,1,0.61,0,23.0,2,30.0,5.0,15.0,40.0,5.0,5.0,1,7.0,8.0,9.0,8.0,9.0,8.0,7.0,7.0,2.0,21.0,Law,1,,,,4,2.0,4.0,Chicago,60521,69487.00,2,7,1,lawyer,15,9.0,2.0,8.0,9.0,1.0,1.0,5.0,1.0,5.0,6.0,9.0,1.0,10.0,10.0,9.0,8.0,1.0,3.0,2.0,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,35.0,20.0,15.0,20.0,5.0,5.0,6.0,8.0,8.0,8.0,7.0,,,,,,1,7.0,6.0,8.0,7.0,6.0,8.0,7.0,6.0,2.0,4.0,,,,,,,,,,,,6.0,2.0,1.0,,,,,,,19.44,16.67,13.89,22.22,11.11,16.67,,,,,,,,,,,,,6.0,7.0,8.0,7.0,6.0,,,,,,1.0,1.0,0.0,,,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,,,,,,,,,,,,,5.0,7.0,7.0,7.0,7.0,,,,,,Woman,Asian/Pacific Islander/Asian-American,European/Caucasian-American,To meet new people,Law,Other
4,1,1,0,1,1,1,10,7,,7,5,15,1,0.21,0,24.0,3,30.0,10.0,20.0,10.0,10.0,20.0,1,8.0,7.0,9.0,6.0,9.0,7.0,8.0,6.0,2.0,21.0,Law,1,,,,4,2.0,4.0,Chicago,60521,69487.00,2,7,1,lawyer,15,9.0,2.0,8.0,9.0,1.0,1.0,5.0,1.0,5.0,6.0,9.0,1.0,10.0,10.0,9.0,8.0,1.0,3.0,2.0,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,35.0,20.0,15.0,20.0,5.0,5.0,6.0,8.0,8.0,8.0,7.0,,,,,,1,5.0,6.0,7.0,7.0,6.0,6.0,6.0,6.0,2.0,4.0,,,,,,,,,,,,6.0,2.0,1.0,,,,,,,19.44,16.67,13.89,22.22,11.11,16.67,,,,,,,,,,,,,6.0,7.0,8.0,7.0,6.0,,,,,,1.0,1.0,0.0,,,15.0,20.0,20.0,15.0,15.0,15.0,,,,,,,,,,,,,,,,,,,5.0,7.0,7.0,7.0,7.0,,,,,,Woman,Asian/Pacific Islander/Asian-American,Latino/Hispanic American,To meet new people,Law,Other
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8363,552,22,1,44,2,21,22,14,10,5,18,526,0,0.64,0,26.0,3,10.0,10.0,30.0,20.0,10.0,15.0,1,10.0,5.0,3.0,2.0,6.0,5.0,6.0,1.0,,25.0,Climate Dynamics,18,"Ecole Normale Suprieure, Paris",,,2,1.0,1.0,France,78110,,1,2,1,assistant master of the universe (otherwise it's too much work),15,8.0,2.0,5.0,10.0,10.0,10.0,7.0,1.0,9.0,8.0,3.0,7.0,9.0,10.0,10.0,7.0,3.0,10.0,,70.0,0.0,15.0,15.0,0.0,0.0,90.0,0.0,0.0,0.0,0.0,10.0,50.0,0.0,0.0,30.0,0.0,20.0,8.0,7.0,6.0,7.0,7.0,9.0,7.0,10.0,5.0,9.0,0,3.0,5.0,5.0,5.0,,,2.0,5.0,0.0,3.0,,,,,,,,,,,,5.0,1.0,2.0,70.0,0.0,15.0,10.0,0.0,5.0,70.00,0.00,15.00,10.00,0.00,5.00,80.0,0.0,5.0,5.0,0.0,10.0,50.0,5.0,10.0,20.0,5.0,10.0,9.0,3.0,7.0,6.0,9.0,9.0,3.0,9.0,4.0,7.0,2.0,0.0,0.0,,1.0,70.0,0.0,20.0,10.0,0.0,0.0,70.0,0.0,20.0,10.0,0.0,0.0,80.0,0.0,10.0,0.0,0.0,10.0,50.0,5.0,10.0,20.0,10.0,5.0,8.0,5.0,7.0,6.0,7.0,9.0,5.0,9.0,5.0,6.0,Man,European/Caucasian-American,Latino/Hispanic American,Seemed like a fun night out,Other,Other
8364,552,22,1,44,2,21,22,13,10,4,19,527,0,0.71,0,24.0,6,50.0,20.0,10.0,5.0,10.0,5.0,0,6.0,3.0,7.0,3.0,7.0,2.0,2.0,2.0,2.0,25.0,Climate Dynamics,18,"Ecole Normale Suprieure, Paris",,,2,1.0,1.0,France,78110,,1,2,1,assistant master of the universe (otherwise it's too much work),15,8.0,2.0,5.0,10.0,10.0,10.0,7.0,1.0,9.0,8.0,3.0,7.0,9.0,10.0,10.0,7.0,3.0,10.0,,70.0,0.0,15.0,15.0,0.0,0.0,90.0,0.0,0.0,0.0,0.0,10.0,50.0,0.0,0.0,30.0,0.0,20.0,8.0,7.0,6.0,7.0,7.0,9.0,7.0,10.0,5.0,9.0,0,4.0,6.0,8.0,4.0,4.0,,4.0,4.0,0.0,3.0,,,,,,,,,,,,5.0,1.0,2.0,70.0,0.0,15.0,10.0,0.0,5.0,70.00,0.00,15.00,10.00,0.00,5.00,80.0,0.0,5.0,5.0,0.0,10.0,50.0,5.0,10.0,20.0,5.0,10.0,9.0,3.0,7.0,6.0,9.0,9.0,3.0,9.0,4.0,7.0,2.0,0.0,0.0,,1.0,70.0,0.0,20.0,10.0,0.0,0.0,70.0,0.0,20.0,10.0,0.0,0.0,80.0,0.0,10.0,0.0,0.0,10.0,50.0,5.0,10.0,20.0,10.0,5.0,8.0,5.0,7.0,6.0,7.0,9.0,5.0,9.0,5.0,6.0,Man,European/Caucasian-American,Other,Seemed like a fun night out,Other,Other
8365,552,22,1,44,2,21,22,19,10,10,20,528,0,-0.46,0,29.0,3,40.0,10.0,30.0,10.0,10.0,,0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,25.0,Climate Dynamics,18,"Ecole Normale Suprieure, Paris",,,2,1.0,1.0,France,78110,,1,2,1,assistant master of the universe (otherwise it's too much work),15,8.0,2.0,5.0,10.0,10.0,10.0,7.0,1.0,9.0,8.0,3.0,7.0,9.0,10.0,10.0,7.0,3.0,10.0,,70.0,0.0,15.0,15.0,0.0,0.0,90.0,0.0,0.0,0.0,0.0,10.0,50.0,0.0,0.0,30.0,0.0,20.0,8.0,7.0,6.0,7.0,7.0,9.0,7.0,10.0,5.0,9.0,0,4.0,7.0,8.0,8.0,8.0,,6.0,5.0,0.0,3.0,,,,,,,,,,,,5.0,1.0,2.0,70.0,0.0,15.0,10.0,0.0,5.0,70.00,0.00,15.00,10.00,0.00,5.00,80.0,0.0,5.0,5.0,0.0,10.0,50.0,5.0,10.0,20.0,5.0,10.0,9.0,3.0,7.0,6.0,9.0,9.0,3.0,9.0,4.0,7.0,2.0,0.0,0.0,,1.0,70.0,0.0,20.0,10.0,0.0,0.0,70.0,0.0,20.0,10.0,0.0,0.0,80.0,0.0,10.0,0.0,0.0,10.0,50.0,5.0,10.0,20.0,10.0,5.0,8.0,5.0,7.0,6.0,7.0,9.0,5.0,9.0,5.0,6.0,Man,European/Caucasian-American,Latino/Hispanic American,Seemed like a fun night out,Other,Other
8366,552,22,1,44,2,21,22,3,10,16,21,529,0,0.62,0,22.0,4,10.0,25.0,25.0,10.0,10.0,20.0,1,5.0,7.0,5.0,5.0,3.0,6.0,6.0,4.0,2.0,25.0,Climate Dynamics,18,"Ecole Normale Suprieure, Paris",,,2,1.0,1.0,France,78110,,1,2,1,assistant master of the universe (otherwise it's too much work),15,8.0,2.0,5.0,10.0,10.0,10.0,7.0,1.0,9.0,8.0,3.0,7.0,9.0,10.0,10.0,7.0,3.0,10.0,,70.0,0.0,15.0,15.0,0.0,0.0,90.0,0.0,0.0,0.0,0.0,10.0,50.0,0.0,0.0,30.0,0.0,20.0,8.0,7.0,6.0,7.0,7.0,9.0,7.0,10.0,5.0,9.0,0,4.0,6.0,5.0,4.0,,5.0,5.0,5.0,0.0,3.0,,,,,,,,,,,,5.0,1.0,2.0,70.0,0.0,15.0,10.0,0.0,5.0,70.00,0.00,15.00,10.00,0.00,5.00,80.0,0.0,5.0,5.0,0.0,10.0,50.0,5.0,10.0,20.0,5.0,10.0,9.0,3.0,7.0,6.0,9.0,9.0,3.0,9.0,4.0,7.0,2.0,0.0,0.0,,1.0,70.0,0.0,20.0,10.0,0.0,0.0,70.0,0.0,20.0,10.0,0.0,0.0,80.0,0.0,10.0,0.0,0.0,10.0,50.0,5.0,10.0,20.0,10.0,5.0,8.0,5.0,7.0,6.0,7.0,9.0,5.0,9.0,5.0,6.0,Man,European/Caucasian-American,Asian/Pacific Islander/Asian-American,Seemed like a fun night out,Other,Other


Pour la suite, le code de chargment et de préparation du dataset est mutualisé dans le fichier [data_loader.py](data_loader.py)