In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Specify the file path
file_path = 'datingdataset.csv'

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)

# Change string numeric values to actual numeric values
for column in df.columns:
    df[column] = pd.to_numeric(df[column], errors='coerce')

# Display the DataFrame
#print(df.columns)

# Specify the column names you want to print
columns_to_print_section = ['caseid_new', 'w1_section', 'w2_section', 'w3_section', 'w3_partner_source']

# Print the specified columns
print(df[columns_to_print_section].shape)

(3510, 5)


In [6]:
# all the people that were in one relationship throughout all w1->w3
con_w1_partnered = df['w1_section'] == 1 # those who were partenered in 2017
con_w2_married_same = df['w2_section'] == 1 # who are still married to the same partner in wave 2
con_w2_part_same = df['w2_section'] == 3 # or still partnered with that same partner but not married (but still together) in wave 2 
# (note that we are not looking at those with new partners in w2/w3)
con_w3_married_same = df['w3_section'] == 1 # who are still married to the same partner in wave 3
con_w3_partnered_same = df['w3_section'] == 3 # or still partnered with that same partner but not married (but still together) in wave 2 
con_w3_same_part_fromw1 = df['w3_partner_source'] == 1

com_con_same_rel_all_waves = con_w1_partnered & (con_w2_married_same | con_w2_part_same) & ((con_w3_married_same | con_w3_partnered_same) & con_w3_same_part_fromw1)
same_rel_all_waves_df = df.loc[com_con_same_rel_all_waves]
print(len(same_rel_all_waves_df))

1096


In [7]:
# now creating dict 3 for those who had same rel w1->w3
rel_counter = 0
dict_same_rel = {}

df_dict_same_rel = same_rel_all_waves_df.to_dict(orient='records')

for record in df_dict_same_rel:
    dict_same_rel[rel_counter] = {
        'caseid_new': record['caseid_new'], #resp case id
        'w1_ppgender': record['w1_ppgender'],#resp gender
        'w1_same_sex_couple': record['w1_same_sex_couple'], #same sex couple? 0 no, 1 yes,
        'w1_ppage': record['w1_ppage'], #age
        'w1_q9': record['w1_q9'], #partner's age in 2017

        'w1_subject_race': record['w1_subject_race'], #resp race
        'w1_q6b': record['w1_q6b'], #partner's race
                
        'w1_ppeduc': record['w1_ppeduc'], #resp educ level
        'w1_q10': record['w1_q10'], #partner educ level

        'w1_partyid7':record['w1_partyid7'],
        'w1_q12':record['w1_q12'],
        'political_steps': np.abs(record['w1_partyid7']-record['w1_q12']),
        "rel_duration": record['w3_relationship_duration_yrs']
            }
    rel_counter += 1

print(dict_same_rel)
#print(len(dict_same_rel))



{0: {'caseid_new': 71609, 'w1_ppgender': 2, 'w1_same_sex_couple': 0.0, 'w1_ppage': 68, 'w1_q9': 71.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 10.0, 'w1_partyid7': 3.0, 'w1_q12': 3.0, 'political_steps': 0.0, 'rel_duration': 57.4166679382324}, 1: {'caseid_new': 106983, 'w1_ppgender': 1, 'w1_same_sex_couple': 0.0, 'w1_ppage': 39, 'w1_q9': 49.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 11, 'w1_q10': 10.0, 'w1_partyid7': 7.0, 'w1_q12': 7.0, 'political_steps': 0.0, 'rel_duration': 22.3333339691162}, 2: {'caseid_new': 164061, 'w1_ppgender': 1, 'w1_same_sex_couple': 0.0, 'w1_ppage': 59, 'w1_q9': 52.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 12.0, 'w1_partyid7': 2.0, 'w1_q12': 2.0, 'political_steps': 0.0, 'rel_duration': 28.25}, 3: {'caseid_new': 212249, 'w1_ppgender': 2, 'w1_same_sex_couple': 0.0, 'w1_ppage': 55, 'w1_q9': 55.0, 'w1_subject_race': 2.0, 'w1_q6b': 2.0, 'w1_ppeduc': 9, 'w1_q10': 10.0, 'w1_partyid7': 1.0, 'w1_q12': 1.0,

In [44]:
# partner dict 
partner_info = {}

rec_counter = 1096

# 1096 couples
for record in df_dict_same_rel:
    partner_info [rec_counter] = {
        "r_caseid":record["caseid_new"],
        "r_gender": record["w1_ppgender"], # resp gender
        "p_gender": record["w1_q4"], # partner's gender
        
        "r_race": record["w1_ppethm"], # resp race
        "p_race": record["w1_q6b"], #partner's race
        
        "r_age": record["w1_ppage"], # resp age
        "p_age": record["w1_q9"], #partner age in 2017,
        
        "r_edu": record["w1_ppeduc"], # resp education
        "p_edu": record["w1_q10"], # partner education
        
        "r_politic": record["w1_partyid7"], # resp political affiliation
        "p_politic": record["w1_q12"] # partner political affiliation
        
    }
    rec_counter +=1

print(partner_info)

#print(rec_counter)

{1096: {'r_caseid': 71609, 'r_gender': 2, 'p_gender': 1.0, 'r_race': 1, 'p_race': 1.0, 'r_age': 68, 'p_age': 71.0, 'r_edu': 10, 'p_edu': 10.0, 'r_politic': 3.0, 'p_politic': 3.0}, 1097: {'r_caseid': 106983, 'r_gender': 1, 'p_gender': 2.0, 'r_race': 1, 'p_race': 1.0, 'r_age': 39, 'p_age': 49.0, 'r_edu': 11, 'p_edu': 10.0, 'r_politic': 7.0, 'p_politic': 7.0}, 1098: {'r_caseid': 164061, 'r_gender': 1, 'p_gender': 2.0, 'r_race': 1, 'p_race': 1.0, 'r_age': 59, 'p_age': 52.0, 'r_edu': 10, 'p_edu': 12.0, 'r_politic': 2.0, 'p_politic': 2.0}, 1099: {'r_caseid': 212249, 'r_gender': 2, 'p_gender': 1.0, 'r_race': 2, 'p_race': 2.0, 'r_age': 55, 'p_age': 55.0, 'r_edu': 9, 'p_edu': 10.0, 'r_politic': 1.0, 'p_politic': 1.0}, 1100: {'r_caseid': 214227, 'r_gender': 2, 'p_gender': 1.0, 'r_race': 1, 'p_race': 2.0, 'r_age': 73, 'p_age': 79.0, 'r_edu': 9, 'p_edu': 9.0, 'r_politic': 3.0, 'p_politic': 6.0}, 1101: {'r_caseid': 218351, 'r_gender': 1, 'p_gender': 2.0, 'r_race': 1, 'p_race': 1.0, 'r_age': 46, 'p_

In [10]:
# partner dict - same as above but just the key-names differ
partner_info_dict = {}

rec_counter = 1096

# 1096 couples
for record in df_dict_same_rel:
    partner_info_dict [rec_counter] = {
        "caseid_new":record['caseid_new'],
        "w1_ppgender": record["w1_ppgender"], # resp gender
        "w1_q4": record["w1_q4"], # partner's gender

        "w1_ppage": record["w1_ppage"], # resp age
        "w1_q9": record["w1_q9"], #partner age in 2017,
        
        'w1_subject_race': record['w1_subject_race'], # resp race
        "w1_q6b": record["w1_q6b"], #partner's race
        
        "w1_ppeduc": record["w1_ppeduc"], # resp education
        "w1_q10": record["w1_q10"], # partner education
        
        "w1_partyid7": record["w1_partyid7"], # resp political affiliation
        "w1_q12": record["w1_q12"] # partner political affiliation
        
    }
    rec_counter +=1

print(partner_info_dict)

#print(rec_counter)

{1096: {'caseid_new': 71609, 'w1_ppgender': 2, 'w1_q4': 1.0, 'w1_ppage': 68, 'w1_q9': 71.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 10.0, 'w1_partyid7': 3.0, 'w1_q12': 3.0}, 1097: {'caseid_new': 106983, 'w1_ppgender': 1, 'w1_q4': 2.0, 'w1_ppage': 39, 'w1_q9': 49.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 11, 'w1_q10': 10.0, 'w1_partyid7': 7.0, 'w1_q12': 7.0}, 1098: {'caseid_new': 164061, 'w1_ppgender': 1, 'w1_q4': 2.0, 'w1_ppage': 59, 'w1_q9': 52.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 12.0, 'w1_partyid7': 2.0, 'w1_q12': 2.0}, 1099: {'caseid_new': 212249, 'w1_ppgender': 2, 'w1_q4': 1.0, 'w1_ppage': 55, 'w1_q9': 55.0, 'w1_subject_race': 2.0, 'w1_q6b': 2.0, 'w1_ppeduc': 9, 'w1_q10': 10.0, 'w1_partyid7': 1.0, 'w1_q12': 1.0}, 1100: {'caseid_new': 214227, 'w1_ppgender': 2, 'w1_q4': 1.0, 'w1_ppage': 73, 'w1_q9': 79.0, 'w1_subject_race': 1.0, 'w1_q6b': 2.0, 'w1_ppeduc': 9, 'w1_q10': 9.0, 'w1_partyid7': 3.0, 'w1_q12': 6.0}, 110

In [11]:
#Generalizing what attributes take precedence 
# so when we match individual person to another person, if there is a same score for 2 different attributes, we consider this generic precedence algo 
#also checked from discord stat: 408+323+231

#if you are politically distant from your partner by 0-2, then add 1 to the counter (You are very similar)
political_ideology=0

for record in dict_same_rel:
    each_record = dict_same_rel[record]
    political_steps = each_record['political_steps']
    if (int(political_steps)>=0 and int(political_steps)<=2):
        political_ideology+=1

print(political_ideology)

962


In [15]:
#prefer same ethnicity 

same_ethnicity_number=0

for record in dict_same_rel:
    each_record = dict_same_rel[record]
    resp_eth=each_record['w1_subject_race']
    partner_eth=each_record['w1_q6b']
    if (resp_eth==partner_eth):
        same_ethnicity_number+=1

print(same_ethnicity_number)
        

947


In [None]:
#Age - 791 couples had an avg difference of 5

In [76]:
#doing the same for partner's education being greater than the person

partner_ed_greater_than_or_equal=0

for record in dict_same_rel:
    each_record = dict_same_rel[record]
    person_ed = each_record['w1_ppeduc']
    partner_ed = each_record['w1_q10']
    if (partner_ed>=person_ed):
        partner_ed_greater_than_or_equal+=1

print(partner_ed_greater_than_or_equal)

736


In [None]:
#########################################################################
###############  RANKING ALGO ##########################

In [17]:
#962 couples who lasted through all the waves have an avg age gap of 5 
#since it's a tie-breaker with political_ideology, we will look at relationship quality of couples who rated their partners excellent and good and see 
#what proportion of them prefer politics over age and vice-versa 
# depending on that, we will rank them 

#MOST IMPORTANT CELL - RANKED ALL ATTRIBUTES

married_adults1 = df['w1_partnership_status']==1
partnered_adults1 = df['w1_partnership_status']==2

married_adults_samep2 = df['w2_section']==1
partnered_adults_samep2 = df['w2_section']==3

married_adults_samep3 = df['w3_section']==1
partnered_adults_samep3 = df['w3_section']==3

relationship_quality_excellent = df['w1_q34']==1
relationship_quality_good = df['w1_q34']==2

caseids = ['caseid_new', 'w1_partyid7','w1_q12', 'w1_ppage', 'w1_q9', 'w1_subject_race','w1_q6b', 'w1_ppeduc', 'w1_q10' ]

cond = (married_adults1 | partnered_adults1) & (married_adults_samep2 | partnered_adults_samep2) & (married_adults_samep3 | partnered_adults_samep3) & (relationship_quality_excellent | relationship_quality_good)

relationship_quality_list = df.loc[cond,caseids]

relationship_quality_dict = relationship_quality_list.to_dict(orient='records')

political_ideology=0
for record in relationship_quality_dict:
    if (record['w1_partyid7']>=1 and record['w1_q12']>=1):
        political_steps = np.abs(record['w1_partyid7']-record['w1_q12'])
        if (int(political_steps)>=0 and int(political_steps)<=2):
            political_ideology+=1

age_diff_couples=0
for record in relationship_quality_dict:
    if (record['w1_ppage']>=1 and record['w1_q9']>=1):
        age_diff = np.abs(record['w1_ppage']-record['w1_q9'])
        if (age_diff>=0 and age_diff<=5):
            age_diff_couples+=1

same_ethnicity_number=0
for record in relationship_quality_dict:
    resp_eth=record['w1_subject_race']
    partner_eth=record['w1_q6b']
    if (resp_eth>=1 and partner_eth>=1):
        if (resp_eth==partner_eth):
            same_ethnicity_number+=1

partner_ed_greater_than_or_equal=0
for record in relationship_quality_dict:
    person_ed = record['w1_ppeduc']
    partner_ed = record['w1_q10']
    if (person_ed>=1 and partner_ed>=1):
        if (partner_ed>=person_ed):
            partner_ed_greater_than_or_equal+=1


print("Considering Relationship quality")
print("Number of people who have similar political ideology:",political_ideology)
print("Number of people whose ethnicity is similar:",same_ethnicity_number)
print("Number of people who have avg age difference of 5:",age_diff_couples)
print("Number of people whose partner's education is more than them:", partner_ed_greater_than_or_equal)



Considering Relationship quality
Number of people who have similar political ideology: 906
Number of people whose ethnicity is similar: 896
Number of people who have avg age difference of 5: 791
Number of people whose partner's education is more than them: 699


In [None]:
########################################################################################
##########  SCORING ALGORITHM ######################################################

In [42]:
#functions to calculate scores for individual attributes 

def cal_pol_steps(a,b):
    if (a>=1 and b>=1):
        pol_score= np.abs(np.abs(int(a)-int(b))-6)
        return round((pol_score/6)*4,3)
    else:
        return 0

def check_eth_same(a,b):
    same_eth=0
    if (a>=1 and b>=1):
        if (a==b):
            same_eth=1
            return round((same_eth/1)*3,3)
    return same_eth

def cal_age_diff(r_age,p_age):
    score_age=0
    if (r_age>=1 and p_age>=1):
        age_diff = np.abs(r_age-p_age)
        if (age_diff>=2 and age_diff<=5):
            score_age=1
            return round((score_age/1)*2,3)
    return score_age
    
def check_p_ed_greater(r_edu,p_edu):
    if (r_edu>=1 and p_edu>=1):    
        score_edu = np.abs(np.abs(r_edu-p_edu)-13)
        return round((score_edu/13)*1,3)
    else:
        return 0

In [45]:
#scoring existing resp-partner relationship
#individual scores for each attribute 

score_resp_partner={}

for record in partner_info:
    p_info=partner_info[record]

    #political steps difference -6 (6 is the max diff that exists in politics)
    score_political_steps=cal_pol_steps( int(p_info['r_politic']),int(p_info['p_politic']) )

    #if races are the same, assign a binary value of 1(true) or 0(false)
    score_race=check_eth_same(int(p_info['r_race']) , int(p_info['p_race']))

    #education difference - 13 (13 is the max difference that exists in the educational level)
    score_edu=check_p_ed_greater(int(p_info['r_edu']) , int(p_info['p_edu']))

    #age difference (if age gap within 5, binary value of 1. If not, 0)
    score_age=cal_age_diff(int(p_info['r_age']) , int(p_info['p_age']))

    total_score = score_political_steps+score_race+score_edu+score_age
    
    score_resp_partner[p_info['r_caseid']]={
        "p_id":record,
        "score_political_steps":score_political_steps,
        "score_race":score_race,
        "score_age":score_age,
        "score_edu":score_edu,
        "total_score":total_score

    }
print(score_resp_partner)


    


{71609: {'p_id': 1096, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 10.0}, 106983: {'p_id': 1097, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 0.923, 'total_score': 7.923}, 164061: {'p_id': 1098, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 0.846, 'total_score': 7.846}, 212249: {'p_id': 1099, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 0.923, 'total_score': 7.923}, 214227: {'p_id': 1100, 'score_political_steps': 2.0, 'score_race': 0, 'score_age': 0, 'score_edu': 1.0, 'total_score': 3.0}, 218351: {'p_id': 1101, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 0.923, 'total_score': 7.923}, 220655: {'p_id': 1102, 'score_political_steps': 2.667, 'score_race': 0, 'score_age': 0, 'score_edu': 0.846, 'total_score': 3.513}, 291177: {'p_id': 1103, 'score_political_steps': 1.333, 'score_race': 3.0, 'score_age': 0, 

In [37]:
#total score for each resp-partner relationship (ORIGINAL GRAPH)
#score= political_steps+race+education

total_score_rp = {}

for record in score_resp_partner:
    p_info=score_resp_partner[record]
    total_score = p_info['score_political_steps']+p_info['score_race']+p_info['score_edu']+p_info['score_age']

    total_score_rp[record]={
        "p_id":p_info['p_id'],
        "total_score":total_score
    }

print(total_score_rp[106983])

{'p_id': 1097, 'total_score': 7.923}


In [35]:
score_existing_partners ={}  #where existing partner is the key #original graph

for record in score_resp_partner:
    partner_info=score_resp_partner[record]
    score_existing_partners[partner_info['p_id']]={
        'r_id':record,
        'score_political_steps':partner_info['score_political_steps'],
        'score_race':partner_info['score_race'],
        'score_age':partner_info['score_age'],
        'score_edu':partner_info['score_edu'],
        'total_score':partner_info['total_score']
    }

print(score_existing_partners)
#print(len(score_existing_partners))

{1096: {'r_id': 71609, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 10.0}, 1097: {'r_id': 106983, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 0.923, 'total_score': 7.923}, 1098: {'r_id': 164061, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 0.846, 'total_score': 7.846}, 1099: {'r_id': 212249, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 0.923, 'total_score': 7.923}, 1100: {'r_id': 214227, 'score_political_steps': 2.0, 'score_race': 0, 'score_age': 0, 'score_edu': 1.0, 'total_score': 3.0}, 1101: {'r_id': 218351, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 0.923, 'total_score': 7.923}, 1102: {'r_id': 220655, 'score_political_steps': 2.667, 'score_race': 0, 'score_age': 0, 'score_edu': 0.846, 'total_score': 3.513}, 1103: {'r_id': 291177, 'score_political_steps': 1.333, 'score_race': 3.0, 'score_age': 0, 

In [None]:
##############################################################################################################
#                    MATCHING ALGORITHM 
##############################################################################################################

In [169]:
print(dict_same_rel)

{0: {'caseid_new': 71609, 'w1_ppgender': 2, 'w1_same_sex_couple': 0.0, 'w1_ppage': 68, 'w1_q9': 71.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 10.0, 'w1_partyid7': 3.0, 'w1_q12': 3.0, 'political_steps': 0.0, 'rel_duration': 57.4166679382324}, 1: {'caseid_new': 106983, 'w1_ppgender': 1, 'w1_same_sex_couple': 0.0, 'w1_ppage': 39, 'w1_q9': 49.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 11, 'w1_q10': 10.0, 'w1_partyid7': 7.0, 'w1_q12': 7.0, 'political_steps': 0.0, 'rel_duration': 22.3333339691162}, 2: {'caseid_new': 164061, 'w1_ppgender': 1, 'w1_same_sex_couple': 0.0, 'w1_ppage': 59, 'w1_q9': 52.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 12.0, 'w1_partyid7': 2.0, 'w1_q12': 2.0, 'political_steps': 0.0, 'rel_duration': 28.25}, 3: {'caseid_new': 212249, 'w1_ppgender': 2, 'w1_same_sex_couple': 0.0, 'w1_ppage': 55, 'w1_q9': 55.0, 'w1_subject_race': 2.0, 'w1_q6b': 2.0, 'w1_ppeduc': 9, 'w1_q10': 10.0, 'w1_partyid7': 1.0, 'w1_q12': 1.0,

In [170]:
print(partner_info_dict)

{1096: {'caseid_new': 71609, 'w1_ppgender': 2, 'w1_q4': 1.0, 'w1_ppage': 68, 'w1_q9': 71.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 10.0, 'w1_partyid7': 3.0, 'w1_q12': 3.0}, 1097: {'caseid_new': 106983, 'w1_ppgender': 1, 'w1_q4': 2.0, 'w1_ppage': 39, 'w1_q9': 49.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 11, 'w1_q10': 10.0, 'w1_partyid7': 7.0, 'w1_q12': 7.0}, 1098: {'caseid_new': 164061, 'w1_ppgender': 1, 'w1_q4': 2.0, 'w1_ppage': 59, 'w1_q9': 52.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 12.0, 'w1_partyid7': 2.0, 'w1_q12': 2.0}, 1099: {'caseid_new': 212249, 'w1_ppgender': 2, 'w1_q4': 1.0, 'w1_ppage': 55, 'w1_q9': 55.0, 'w1_subject_race': 2.0, 'w1_q6b': 2.0, 'w1_ppeduc': 9, 'w1_q10': 10.0, 'w1_partyid7': 1.0, 'w1_q12': 1.0}, 1100: {'caseid_new': 214227, 'w1_ppgender': 2, 'w1_q4': 1.0, 'w1_ppage': 73, 'w1_q9': 79.0, 'w1_subject_race': 1.0, 'w1_q6b': 2.0, 'w1_ppeduc': 9, 'w1_q10': 9.0, 'w1_partyid7': 3.0, 'w1_q12': 6.0}, 110

In [48]:
#pairing up respondents with other respondents - dict_same_rel

#political_steps = np.abs(record['w1_partyid7']-record['w1_q12'])
#age_diff = np.abs(record['w1_ppage']-record['w1_q9'])
#resp_eth=record['w1_subject_race']
#partner_eth=record['w1_q6b']
#person_ed = record['w1_ppeduc']
#partner_ed = record['w1_q10']

new_relationships_with_resp_dict={}

for r in dict_same_rel:
    record=dict_same_rel[r]
    r_id=record['caseid_new']
    r_politic = record['w1_partyid7']
    r_age=record['w1_ppage']
    r_eth=record['w1_subject_race']
    r_edu=record['w1_ppeduc']
    curr_partner_id=score_resp_partner[r_id]['p_id']
    curr_partner_score=score_resp_partner[r_id]['total_score']

    for newR in dict_same_rel:
        newRecord=dict_same_rel[newR]
        p_id=newRecord['caseid_new']
        p_politic= newRecord['w1_q12']
        p_age=newRecord['w1_q9']
        p_eth=newRecord['w1_q6b']
        p_edu=newRecord['w1_q10']

        if (r_id!=p_id):
            total_score_newP = cal_pol_steps(r_politic,p_politic)+cal_age_diff(r_age,p_age)+check_eth_same(r_eth,p_eth)+check_p_ed_greater(r_edu,p_edu)
            score_political_steps=cal_pol_steps(r_politic,p_politic)
            score_race=check_eth_same(r_eth,p_eth)
            score_age=cal_age_diff(r_age,p_age)
            score_edu=check_p_ed_greater(r_edu,p_edu)
            
            #if new score of the new record > curr_partner_score, then create a new relationship entry, or else keep the same partner
            if (total_score_newP > curr_partner_score):
                if r_id in new_relationships_with_resp_dict.keys():
                    new_relationships_with_resp_dict[r_id].append({'p_id':p_id, 
                                                                   "score_political_steps":score_political_steps, 
                                                                   "score_race":score_race,
                                                                    "score_age":score_age,
                                                                    "score_edu":score_edu,
                                                                   'new_score':total_score_newP})
                else:
                    new_relationships_with_resp_dict[r_id]=[{'p_id':p_id, 
                                                                   "score_political_steps":score_political_steps, 
                                                                   "score_race":score_race,
                                                                    "score_age":score_age,
                                                                    "score_edu":score_edu,
                                                                   'new_score':total_score_newP}]
            
print("Length of the new dict with new partners:",len(new_relationships_with_resp_dict))

print(list(new_relationships_with_resp_dict.items())[0])

#if did not get paired, keep the same partner as before 
items=set(score_resp_partner)-set(new_relationships_with_resp_dict)

print("The following respondents did not get paired (So, keep the same partners):",len(items))
for item in items:
    #print(item, ":", score_resp_partner[item])
    new_relationships_with_resp_dict[item]=[{'p_id':score_resp_partner[item]['p_id'], 'new_score':int(score_resp_partner[item]['total_score'])}]
print("New relationship dict should be 1096:",len(new_relationships_with_resp_dict))



Length of the new dict with new partners: 1031
(106983, [{'p_id': 876181, 'score_political_steps': 2.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 8.0}, {'p_id': 1230459, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 1.0, 'new_score': 8.0}, {'p_id': 1236545, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.923, 'new_score': 9.256}, {'p_id': 1596895, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.769, 'new_score': 9.769}, {'p_id': 1612001, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.769, 'new_score': 9.102}, {'p_id': 1692763, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 1.0, 'new_score': 8.0}, {'p_id': 1718309, 'score_political_steps': 2.667, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.846, 'new_score': 8.513}, {'p_id': 1756667, 'score_political_steps': 3.333, 'score_race': 3.0, 'sc

In [49]:
#find the minimum score from the scores respondent have with other partners such that it is greater than the score that the 
#respondent has with the existing partner, (somewhat used matching market strategy)
min_scores_with_other_respondents = {}

# Iterate over the items in both dictionaries
for key, records in new_relationships_with_resp_dict.items():

    min_score=float('inf')
    min_p_id=None
    for record in records:
        if (record['new_score']<min_score):
            min_score=record['new_score']
            min_p_id=record['p_id']
            
    min_scores_with_other_respondents[key]={'p_id':min_p_id, 'score':min_score}

#print(min_scores)
print(len(set(min_scores_with_other_respondents)))
print(min_scores_with_other_respondents)

1096
{106983: {'p_id': 876181, 'score': 8.0}, 164061: {'p_id': 650237, 'score': 7.923}, 214227: {'p_id': 1888381, 'score': 3.282}, 218351: {'p_id': 703017, 'score': 8.0}, 220655: {'p_id': 646157, 'score': 3.59}, 291177: {'p_id': 71609, 'score': 5.333}, 369975: {'p_id': 1218979, 'score': 8.0}, 428211: {'p_id': 621641, 'score': 4.923}, 497203: {'p_id': 1230459, 'score': 4.667}, 516823: {'p_id': 1300409, 'score': 9.769}, 582849: {'p_id': 2808641, 'score': 8.205}, 587125: {'p_id': 1157187, 'score': 8.0}, 589881: {'p_id': 582849, 'score': 8.0}, 608697: {'p_id': 1621115, 'score': 10.0}, 621641: {'p_id': 1256141, 'score': 7.615}, 632253: {'p_id': 1943525, 'score': 4.333}, 634833: {'p_id': 727775, 'score': 9.923}, 637531: {'p_id': 653633, 'score': 4.282}, 643423: {'p_id': 1473855, 'score': 8.948}, 643505: {'p_id': 1831591, 'score': 4.948}, 646023: {'p_id': 1860835, 'score': 8.948}, 646157: {'p_id': 703017, 'score': 7.179}, 648669: {'p_id': 1394211, 'score': 10.0}, 650237: {'p_id': 1284217, 'sc

In [50]:
#pairing up respondents with other partners from the pool of partners - partner_info_dict

#political_steps = np.abs(record['w1_partyid7']-record['w1_q12'])
#age_diff = np.abs(record['w1_ppage']-record['w1_q9'])
#resp_eth=record['w1_subject_race']
#partner_eth=record['w1_q6b']
#person_ed = record['w1_ppeduc']
#partner_ed = record['w1_q10']

new_relationships_with_p_dict={}

for r in dict_same_rel:
    record=dict_same_rel[r]
    r_id=record['caseid_new']
    r_politic = record['w1_partyid7']
    r_age=record['w1_ppage']
    r_eth=record['w1_subject_race']
    r_edu=record['w1_ppeduc']
    curr_partner_id=score_resp_partner[r_id]['p_id']
    curr_partner_score=score_resp_partner[r_id]['total_score']

    for newR in partner_info_dict:
        newRecord=partner_info_dict[newR]
        newr_id=newRecord['caseid_new']
        p_politic= newRecord['w1_q12']
        p_age=newRecord['w1_q9']
        p_eth=newRecord['w1_q6b']
        p_edu=newRecord['w1_q10']
        
        if (r_id!=newr_id):
            total_score_newP = cal_pol_steps(r_politic,p_politic)+cal_age_diff(r_age,p_age)+check_eth_same(r_eth,p_eth)+check_p_ed_greater(r_edu,p_edu)
            score_political_steps=cal_pol_steps(r_politic,p_politic)
            score_race=check_eth_same(r_eth,p_eth)
            score_age=cal_age_diff(r_age,p_age)
            score_edu=check_p_ed_greater(r_edu,p_edu)
            
            if (total_score_newP > curr_partner_score):
                if r_id in new_relationships_with_p_dict.keys():
                    new_relationships_with_p_dict[r_id].append({'p_id':p_id, 
                                                                   "score_political_steps":score_political_steps, 
                                                                   "score_race":score_race,
                                                                    "score_age":score_age,
                                                                    "score_edu":score_edu,
                                                                   'new_score':total_score_newP})
                else:
                    new_relationships_with_p_dict[r_id]=[{'p_id':p_id, 
                                                           "score_political_steps":score_political_steps, 
                                                           "score_race":score_race,
                                                            "score_age":score_age,
                                                            "score_edu":score_edu,
                                                           'new_score':total_score_newP}]

print("Length of the new dict with new partners:",len(new_relationships_with_p_dict))
#if did not get paired, keep the same partner as before 
items=set(score_resp_partner)-set((new_relationships_with_p_dict))
print("The following respondents did not get paired:",len(items))

print(list(new_relationships_with_p_dict.items())[0])
#print(items)
for item in items:
    #print(item, " ", score_resp_partner[item])
    new_relationships_with_p_dict[item]=[{'p_id':score_resp_partner[item]['p_id'], 'new_score':int(score_resp_partner[item]['total_score'])}]
print("The length of the new dict should be 1096: ",len(new_relationships_with_p_dict))    
        


Length of the new dict with new partners: 1031
The following respondents did not get paired: 65
(106983, [{'p_id': 2964987, 'score_political_steps': 2.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 8.0}, {'p_id': 2964987, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 1.0, 'new_score': 8.0}, {'p_id': 2964987, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.923, 'new_score': 9.256}, {'p_id': 2964987, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.769, 'new_score': 9.769}, {'p_id': 2964987, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.769, 'new_score': 9.102}, {'p_id': 2964987, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 1.0, 'new_score': 8.0}, {'p_id': 2964987, 'score_political_steps': 2.667, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.846, 'new_score': 8.513}, {'p_id': 2964987, 'sco

In [51]:

#finding min score from the score resp have with partners such that the new score > score with the existing partner
min_scores_with_partners = {}

# Iterate over the items in both dictionaries
for key, records in new_relationships_with_p_dict.items():

    min_score=float('inf')
    min_p_id=None
    for record in records:
        if (record['new_score']<min_score):
            min_score=record['new_score']
            min_p_id=record['p_id']
            
    min_scores_with_partners[key]={'p_id':min_p_id, 'score':min_score}

print(len(min_scores_with_partners))
print(min_scores_with_partners)

1096
{106983: {'p_id': 2964987, 'score': 8.0}, 164061: {'p_id': 2964987, 'score': 7.923}, 214227: {'p_id': 2964987, 'score': 3.282}, 218351: {'p_id': 2964987, 'score': 8.0}, 220655: {'p_id': 2964987, 'score': 3.59}, 291177: {'p_id': 2964987, 'score': 5.333}, 369975: {'p_id': 2964987, 'score': 8.0}, 428211: {'p_id': 2964987, 'score': 4.923}, 497203: {'p_id': 2964987, 'score': 4.667}, 516823: {'p_id': 2964987, 'score': 9.769}, 582849: {'p_id': 2964987, 'score': 8.205}, 587125: {'p_id': 2964987, 'score': 8.0}, 589881: {'p_id': 2964987, 'score': 8.0}, 608697: {'p_id': 2964987, 'score': 10.0}, 621641: {'p_id': 2964987, 'score': 7.615}, 632253: {'p_id': 2964987, 'score': 4.333}, 634833: {'p_id': 2964987, 'score': 9.923}, 637531: {'p_id': 2964987, 'score': 4.282}, 643423: {'p_id': 2964987, 'score': 8.948}, 643505: {'p_id': 2964987, 'score': 4.948}, 646023: {'p_id': 2964987, 'score': 8.948}, 646157: {'p_id': 2964987, 'score': 7.179}, 648669: {'p_id': 2964987, 'score': 10.0}, 650237: {'p_id': 2

In [53]:
#comparing partners from both the new relationships dict to see who is a better match.
#if score of one is greater than the other, keep that score (hence, a potential match)

final_matches={}

for key, record in min_scores_with_other_respondents.items():
    if (min_scores_with_partners[key]['score']<min_scores_with_other_respondents[key]['score']):
        final_matches[key]={'p_id':min_scores_with_partners[key]['p_id'], 'final_score':min_scores_with_partners[key]['score']}
    else:
        final_matches[key]={'p_id':min_scores_with_other_respondents[key]['p_id'], 'final_score':min_scores_with_other_respondents[key]['score']}        
        
print(len(final_matches))
print(final_matches)
    
    

1096
{106983: {'p_id': 876181, 'final_score': 8.0}, 164061: {'p_id': 650237, 'final_score': 7.923}, 214227: {'p_id': 1888381, 'final_score': 3.282}, 218351: {'p_id': 703017, 'final_score': 8.0}, 220655: {'p_id': 646157, 'final_score': 3.59}, 291177: {'p_id': 71609, 'final_score': 5.333}, 369975: {'p_id': 1218979, 'final_score': 8.0}, 428211: {'p_id': 621641, 'final_score': 4.923}, 497203: {'p_id': 1230459, 'final_score': 4.667}, 516823: {'p_id': 1300409, 'final_score': 9.769}, 582849: {'p_id': 2808641, 'final_score': 8.205}, 587125: {'p_id': 1157187, 'final_score': 8.0}, 589881: {'p_id': 582849, 'final_score': 8.0}, 608697: {'p_id': 1621115, 'final_score': 10.0}, 621641: {'p_id': 1256141, 'final_score': 7.615}, 632253: {'p_id': 1943525, 'final_score': 4.333}, 634833: {'p_id': 727775, 'final_score': 9.923}, 637531: {'p_id': 653633, 'final_score': 4.282}, 643423: {'p_id': 1473855, 'final_score': 8.948}, 643505: {'p_id': 1831591, 'final_score': 4.948}, 646023: {'p_id': 1860835, 'final_sco

In [156]:

#IGNORE THIS 
import random 

new_relationships={} 
visited_resp=[] 
matched_partners=[]

for currRecord in score_resp_partner: 
    info_old=score_resp_partner[currRecord] 
    curr_resp_id=currRecord 
    curr_partner_id=info_old['p_id'] 
    curr_score=info_old['total_score']

    visited_partners=set()
    
    #list of partner ids except for the curr_partner_id 
    keys_partners= [item for item in score_existing_partners.keys() if item!=curr_partner_id]
    
    partner_found=False
    while not partner_found:
        #choose a random partner to check if it can be paired
        random_partner_id=random.choice(keys_partners)
        new_partner=score_existing_partners[random_partner_id]
        score_new_partner=new_partner['total_score']

        #check if the visited_partners have reached the limit on the number of potential partners present
        if (len(visited_partners)!=1095):
            #if currRecord-new_partner don't already exist in new_relationships and new_partner_score>curr_resp_score, then 
            #add it to the new_relationships
            if ((currRecord not in visited_resp) and (random_partner_id not in visited_partners) and (random_partner_id not in matched_partners) and 
                (score_new_partner > curr_score)):
                
                new_relationships[currRecord]={
                    'p_id':random_partner_id,
                    'score_political_steps':new_partner['score_political_steps'],
                    'score_race':new_partner['score_race'],
                    'score_age':new_partner['score_age'],
                    'score_edu':new_partner['score_edu'],
                    'total_score':new_partner['total_score']
                }
                visited_resp.append(currRecord)
                matched_partners.append(random_partner_id)
                
                partner_found=True
                
            visited_partners.add(random_partner_id)
        else:
            break
        
#print(len(visited_resp)) 
print(len(new_relationships))
#print(new_relationships.keys())

787
