In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Specify the file path
file_path = 'datingdataset.csv'

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)

# Change string numeric values to actual numeric values
for column in df.columns:
    df[column] = pd.to_numeric(df[column], errors='coerce')

# Display the DataFrame
#print(df.columns)

# Specify the column names you want to print
columns_to_print_section = ['caseid_new', 'w1_section', 'w2_section', 'w3_section', 'w3_partner_source']

# Print the specified columns
print(df[columns_to_print_section].shape)

(3510, 5)


In [3]:
# all the people that were in one relationship throughout all w1->w3
con_w1_partnered = df['w1_section'] == 1 # those who were partenered in 2017
con_w2_married_same = df['w2_section'] == 1 # who are still married to the same partner in wave 2
con_w2_part_same = df['w2_section'] == 3 # or still partnered with that same partner but not married (but still together) in wave 2 
# (note that we are not looking at those with new partners in w2/w3)
con_w3_married_same = df['w3_section'] == 1 # who are still married to the same partner in wave 3
con_w3_partnered_same = df['w3_section'] == 3 # or still partnered with that same partner but not married (but still together) in wave 2 
con_w3_same_part_fromw1 = df['w3_partner_source'] == 1

com_con_same_rel_all_waves = con_w1_partnered & (con_w2_married_same | con_w2_part_same) & ((con_w3_married_same | con_w3_partnered_same) & con_w3_same_part_fromw1)
same_rel_all_waves_df = df.loc[com_con_same_rel_all_waves]

In [4]:
# now creating dict 3 for those who had same rel w1->w3
rel_counter = 0
dict_same_rel = {}

df_dict_same_rel = same_rel_all_waves_df.to_dict(orient='records')

for record in df_dict_same_rel:
    dict_same_rel[rel_counter] = {
        'caseid_new': record['caseid_new'], #resp case id
        'w1_ppgender': record['w1_ppgender'],#resp gender
        'w1_same_sex_couple': record['w1_same_sex_couple'], #same sex couple? 0 no, 1 yes,
        'w1_ppage': record['w1_ppage'], #age
        'w1_q9': record['w1_q9'], #partner's age in 2017

        'w1_subject_race': record['w1_subject_race'], #resp race
        'w1_q6b': record['w1_q6b'], #partner's race
                
        'w1_ppeduc': record['w1_ppeduc'], #resp educ level
        'w1_q10': record['w1_q10'], #partner educ level
        
        'political_steps': np.abs(record['w1_partyid7']-record['w1_q12']),
        "rel_duration": record['w3_relationship_duration_yrs']
            }
    rel_counter += 1

print(dict_same_rel)
print(len(dict_same_rel))



{0: {'caseid_new': 71609, 'w1_ppgender': 2, 'w1_same_sex_couple': 0.0, 'w1_ppage': 68, 'w1_q9': 71.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 10.0, 'political_steps': 0.0, 'rel_duration': 57.4166679382324}, 1: {'caseid_new': 106983, 'w1_ppgender': 1, 'w1_same_sex_couple': 0.0, 'w1_ppage': 39, 'w1_q9': 49.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 11, 'w1_q10': 10.0, 'political_steps': 0.0, 'rel_duration': 22.3333339691162}, 2: {'caseid_new': 164061, 'w1_ppgender': 1, 'w1_same_sex_couple': 0.0, 'w1_ppage': 59, 'w1_q9': 52.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 12.0, 'political_steps': 0.0, 'rel_duration': 28.25}, 3: {'caseid_new': 212249, 'w1_ppgender': 2, 'w1_same_sex_couple': 0.0, 'w1_ppage': 55, 'w1_q9': 55.0, 'w1_subject_race': 2.0, 'w1_q6b': 2.0, 'w1_ppeduc': 9, 'w1_q10': 10.0, 'political_steps': 0.0, 'rel_duration': 38.9166679382324}, 4: {'caseid_new': 214227, 'w1_ppgender': 2, 'w1_same_sex_couple': 0.0, 'w1_ppage

In [40]:
#Generalizing what attributes take precedence 
# so when we match individual person to another person, if there is a same score for 2 different attributes, we consider this generic precedence algo 
#also checked from discord stat: 408+323+231

#if you are politically distant from your partner by 0-2, then add 1 to the counter (You are very similar)
political_ideology=0

for record in dict_same_rel:
    each_record = dict_same_rel[record]
    political_steps = each_record['political_steps']
    if (int(political_steps)>=0 and int(political_steps)<=2):
        political_ideology+=1

print(political_ideology)

962


In [44]:
#prefer same ethnicity 

same_ethnicity_number=0

for record in dict_same_rel:
    each_record = dict_same_rel[record]
    resp_eth=each_record['w1_subject_race']
    partner_eth=each_record['w1_q6b']
    if (resp_eth==partner_eth):
        same_ethnicity_number+=1

print(same_ethnicity_number)
        

947


In [42]:
#doing the same for partner's education being greater than the person

partner_ed_greater_than_or_equal=0

for record in dict_same_rel:
    each_record = dict_same_rel[record]
    person_ed = each_record['w1_ppeduc']
    partner_ed = each_record['w1_q10']
    if (partner_ed>=person_ed):
        partner_ed_greater_than_or_equal+=1

print(partner_ed_greater_than_or_equal)

736
