In [119]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Specify the file path
file_path = 'datingdataset.csv'

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)

# Change string numeric values to actual numeric values
for column in df.columns:
    df[column] = pd.to_numeric(df[column], errors='coerce')

# Display the DataFrame
#print(df.columns)

# Specify the column names you want to print
columns_to_print_section = ['caseid_new', 'w1_section', 'w2_section', 'w3_section', 'w3_partner_source']

# Print the specified columns
print(df[columns_to_print_section].shape)

(3510, 5)


In [120]:
# all the people that were in one relationship throughout all w1->w3
con_w1_partnered = df['w1_section'] == 1 # those who were partenered in 2017
con_w2_married_same = df['w2_section'] == 1 # who are still married to the same partner in wave 2
con_w2_part_same = df['w2_section'] == 3 # or still partnered with that same partner but not married (but still together) in wave 2 
# (note that we are not looking at those with new partners in w2/w3)
con_w3_married_same = df['w3_section'] == 1 # who are still married to the same partner in wave 3
con_w3_partnered_same = df['w3_section'] == 3 # or still partnered with that same partner but not married (but still together) in wave 2 
con_w3_same_part_fromw1 = df['w3_partner_source'] == 1

com_con_same_rel_all_waves = con_w1_partnered & (con_w2_married_same | con_w2_part_same) & ((con_w3_married_same | con_w3_partnered_same) & con_w3_same_part_fromw1)
same_rel_all_waves_df = df.loc[com_con_same_rel_all_waves]
print(len(same_rel_all_waves_df))

1096


In [121]:
# now creating dict 3 for those who had same rel w1->w3
rel_counter = 0
dict_same_rel = {}

df_dict_same_rel = same_rel_all_waves_df.to_dict(orient='records')

for record in df_dict_same_rel:
    dict_same_rel[rel_counter] = {
        'caseid_new': record['caseid_new'], #resp case id
        'w1_ppage': record['w1_ppage'], #age
        'w1_q9': record['w1_q9'], #partner's age in 2017

        'w1_subject_race': record['w1_subject_race'], #resp race
        'w1_q6b': record['w1_q6b'], #partner's race
                
        'w1_ppeduc': record['w1_ppeduc'], #resp educ level
        'w1_q10': record['w1_q10'], #partner educ level

        'w1_partyid7':record['w1_partyid7'],
        'w1_q12':record['w1_q12'],
        'political_steps': np.abs(record['w1_partyid7']-record['w1_q12']),
            }
    rel_counter += 1

#print(dict_same_rel)
#print(len(dict_same_rel))



In [122]:
# partner dict 
partner_info = {}

rec_counter = 1096

# 1096 couples
for record in df_dict_same_rel:
    partner_info [rec_counter] = {
        "r_caseid":record["caseid_new"],
        "r_gender": record["w1_ppgender"], # resp gender
        "p_gender": record["w1_q4"], # partner's gender
        
        "r_race": record["w1_ppethm"], # resp race
        "p_race": record["w1_q6b"], #partner's race
        
        "r_age": record["w1_ppage"], # resp age
        "p_age": record["w1_q9"], #partner age in 2017,
        
        "r_edu": record["w1_ppeduc"], # resp education
        "p_edu": record["w1_q10"], # partner education
        
        "r_politic": record["w1_partyid7"], # resp political affiliation
        "p_politic": record["w1_q12"] # partner political affiliation
        
    }
    rec_counter +=1

print(partner_info)

#print(rec_counter)

{1096: {'r_caseid': 71609, 'r_gender': 2, 'p_gender': 1.0, 'r_race': 1, 'p_race': 1.0, 'r_age': 68, 'p_age': 71.0, 'r_edu': 10, 'p_edu': 10.0, 'r_politic': 3.0, 'p_politic': 3.0}, 1097: {'r_caseid': 106983, 'r_gender': 1, 'p_gender': 2.0, 'r_race': 1, 'p_race': 1.0, 'r_age': 39, 'p_age': 49.0, 'r_edu': 11, 'p_edu': 10.0, 'r_politic': 7.0, 'p_politic': 7.0}, 1098: {'r_caseid': 164061, 'r_gender': 1, 'p_gender': 2.0, 'r_race': 1, 'p_race': 1.0, 'r_age': 59, 'p_age': 52.0, 'r_edu': 10, 'p_edu': 12.0, 'r_politic': 2.0, 'p_politic': 2.0}, 1099: {'r_caseid': 212249, 'r_gender': 2, 'p_gender': 1.0, 'r_race': 2, 'p_race': 2.0, 'r_age': 55, 'p_age': 55.0, 'r_edu': 9, 'p_edu': 10.0, 'r_politic': 1.0, 'p_politic': 1.0}, 1100: {'r_caseid': 214227, 'r_gender': 2, 'p_gender': 1.0, 'r_race': 1, 'p_race': 2.0, 'r_age': 73, 'p_age': 79.0, 'r_edu': 9, 'p_edu': 9.0, 'r_politic': 3.0, 'p_politic': 6.0}, 1101: {'r_caseid': 218351, 'r_gender': 1, 'p_gender': 2.0, 'r_race': 1, 'p_race': 1.0, 'r_age': 46, 'p_

In [123]:
# partner dict - same as above but just the key-names differ
partner_info_dict = {}

rec_counter = 1096

# 1096 couples
for record in df_dict_same_rel:
    partner_info_dict [rec_counter] = {
        "caseid_new":record['caseid_new'],
        "w1_ppgender": record["w1_ppgender"], # resp gender
        "w1_q4": record["w1_q4"], # partner's gender

        "w1_ppage": record["w1_ppage"], # resp age
        "w1_q9": record["w1_q9"], #partner age in 2017,
        
        'w1_subject_race': record['w1_subject_race'], # resp race
        "w1_q6b": record["w1_q6b"], #partner's race
        
        "w1_ppeduc": record["w1_ppeduc"], # resp education
        "w1_q10": record["w1_q10"], # partner education
        
        "w1_partyid7": record["w1_partyid7"], # resp political affiliation
        "w1_q12": record["w1_q12"], # partner political affiliation

        'political_steps': np.abs(record['w1_partyid7']-record['w1_q12'])

        
    }
    rec_counter +=1

print(partner_info_dict.get(1098))

#print(rec_counter)

{'caseid_new': 164061, 'w1_ppgender': 1, 'w1_q4': 2.0, 'w1_ppage': 59, 'w1_q9': 52.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 12.0, 'w1_partyid7': 2.0, 'w1_q12': 2.0, 'political_steps': 0.0}


In [124]:
########################################################################################
##########  SCORING ALGORITHM ######################################################

In [125]:
#functions to calculate scores for individual attributes 

def cal_pol_steps(a,b):
    if (a>=1 and b>=1):
        pol_score= np.abs(np.abs(int(a)-int(b))-6)
        return round((pol_score/6)*4,3)
    else:
        return 0

def check_eth_same(a,b):
    same_eth=0
    if (a>=1 and b>=1):
        if (a==b):
            same_eth=1
            return round((same_eth/1)*3,3)
    return same_eth

def cal_age_diff(r_age,p_age):
    score_age=0
    if (r_age>=1 and p_age>=1):
        age_diff = np.abs(r_age-p_age)
        if (age_diff>=2 and age_diff<=5):
            score_age=1
            return round((score_age/1)*2,3)
    return score_age
    
def check_p_ed_greater(r_edu,p_edu):
    if (r_edu>=1 and p_edu>=1):    
        score_edu = np.abs(np.abs(r_edu-p_edu)-13)
        return round((score_edu/13)*1,3)
    else:
        return 0

In [126]:
#check attribute value of each respondent and their partner -resp dict 
# Iterate over the dictionary
result=None
def return_attr_values_by_caseid_resp_dict(caseid):
    for key, value in dict_same_rel.items():
        # Check if the value of 'caseid_new' key is equal to 71609
        if value.get('caseid_new') == caseid:
            # If found, store the value in the result variable
            result = value
            # Break the loop since we found the desired value
            break
    
    # Print the result
    return (result)

print(return_attr_values_by_caseid_resp_dict(2014793))
print("\n")
print(return_attr_values_by_caseid_resp_dict(164061))

{'caseid_new': 2014793, 'w1_ppage': 56, 'w1_q9': 57.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 12, 'w1_q10': 11.0, 'w1_partyid7': 3.0, 'w1_q12': 3.0, 'political_steps': 0.0}


{'caseid_new': 164061, 'w1_ppage': 59, 'w1_q9': 52.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 12.0, 'w1_partyid7': 2.0, 'w1_q12': 2.0, 'political_steps': 0.0}


In [127]:
#check attribute value of each respondent and their partner - partner_info_dict
# Iterate over the dictionary

result=None
def return_attr_values_by_caseid_partner_info_dict(caseid):
    for key, value in partner_info_dict.items():
        # Check if the value of 'caseid_new' key is equal to 71609
        if key == caseid:
            # If found, store the value in the result variable
            result = value
            # Break the loop since we found the desired value
            break
    
    # Print the result
    return (result)

print(return_attr_values_by_caseid_partner_info_dict(1098))

{'caseid_new': 164061, 'w1_ppgender': 1, 'w1_q4': 2.0, 'w1_ppage': 59, 'w1_q9': 52.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 12.0, 'w1_partyid7': 2.0, 'w1_q12': 2.0, 'political_steps': 0.0}


In [128]:
#scoring existing resp-partner relationship
#individual scores for each attribute 

score_resp_partner={}

for record in partner_info:
    p_info=partner_info[record]

    #political steps difference -6 (6 is the max diff that exists in politics)
    score_political_steps=cal_pol_steps( int(p_info['r_politic']),int(p_info['p_politic']) )

    #if races are the same, assign a binary value of 1(true) or 0(false)
    score_race=check_eth_same(int(p_info['r_race']) , int(p_info['p_race']))

    #education difference - 13 (13 is the max difference that exists in the educational level)
    score_edu=check_p_ed_greater(int(p_info['r_edu']) , int(p_info['p_edu']))

    #age difference (if age gap within 5, binary value of 1. If not, 0)
    score_age=cal_age_diff(int(p_info['r_age']) , int(p_info['p_age']))

    total_score = score_political_steps+score_race+score_edu+score_age
    
    score_resp_partner[p_info['r_caseid']]={
        "p_id":record,
        "score_political_steps":score_political_steps,
        "score_race":score_race,
        "score_age":score_age,
        "score_edu":score_edu,
        "total_score":total_score

    }

#71609
#582849
#844189
print(list(score_resp_partner.items())[0])
print(list(score_resp_partner.items())[500])
print(list(score_resp_partner.items())[100])

(71609, {'p_id': 1096, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 10.0})
(2014793, {'p_id': 1596, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 0.923, 'total_score': 7.923})
(1001637, {'p_id': 1196, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 9.333})


In [129]:
#total score for each resp-partner relationship (ORIGINAL GRAPH)
#score= political_steps+race+education

total_score_rp = {}

for record in score_resp_partner:
    p_info=score_resp_partner[record]
    total_score = p_info['score_political_steps']+p_info['score_race']+p_info['score_edu']+p_info['score_age']

    total_score_rp[record]={
        "p_id":p_info['p_id'],
        "total_score":total_score
    }

#print(total_score_rp)

In [130]:
score_existing_partners ={}  #where existing partner is the key #original graph

for record in score_resp_partner:
    partner_info=score_resp_partner[record]
    score_existing_partners[partner_info['p_id']]={
        'r_id':record,
        'score_political_steps':partner_info['score_political_steps'],
        'score_race':partner_info['score_race'],
        'score_age':partner_info['score_age'],
        'score_edu':partner_info['score_edu'],
        'total_score':partner_info['total_score']
    }

#print(len(score_existing_partners))

print(list(score_existing_partners.items())[0])
print(list(score_existing_partners.items())[500])
print(list(score_existing_partners.items())[1000])

(1096, {'r_id': 71609, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 10.0})
(1596, {'r_id': 2014793, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 0, 'score_edu': 0.923, 'total_score': 7.923})
(2096, {'r_id': 2845201, 'score_political_steps': 2.667, 'score_race': 0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 5.667})


In [131]:
##############################################################################################################
#                    MATCHING ALGORITHM 
##############################################################################################################

In [132]:
print(list(dict_same_rel.items())[0])
print(list(dict_same_rel.items())[500])
print(list(dict_same_rel.items())[1000])

(0, {'caseid_new': 71609, 'w1_ppage': 68, 'w1_q9': 71.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 10.0, 'w1_partyid7': 3.0, 'w1_q12': 3.0, 'political_steps': 0.0})
(500, {'caseid_new': 2014793, 'w1_ppage': 56, 'w1_q9': 57.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 12, 'w1_q10': 11.0, 'w1_partyid7': 3.0, 'w1_q12': 3.0, 'political_steps': 0.0})
(1000, {'caseid_new': 2845201, 'w1_ppage': 36, 'w1_q9': 31.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 9, 'w1_q10': 9.0, 'w1_partyid7': 6.0, 'w1_q12': 4.0, 'political_steps': 2.0})


In [133]:
print(list(partner_info_dict.items())[0])
print(list(partner_info_dict.items())[500])
print(list(partner_info_dict.items())[1000])

(1096, {'caseid_new': 71609, 'w1_ppgender': 2, 'w1_q4': 1.0, 'w1_ppage': 68, 'w1_q9': 71.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 10, 'w1_q10': 10.0, 'w1_partyid7': 3.0, 'w1_q12': 3.0, 'political_steps': 0.0})
(1596, {'caseid_new': 2014793, 'w1_ppgender': 1, 'w1_q4': 2.0, 'w1_ppage': 56, 'w1_q9': 57.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 12, 'w1_q10': 11.0, 'w1_partyid7': 3.0, 'w1_q12': 3.0, 'political_steps': 0.0})
(2096, {'caseid_new': 2845201, 'w1_ppgender': 1, 'w1_q4': 2.0, 'w1_ppage': 36, 'w1_q9': 31.0, 'w1_subject_race': 1.0, 'w1_q6b': 1.0, 'w1_ppeduc': 9, 'w1_q10': 9.0, 'w1_partyid7': 6.0, 'w1_q12': 4.0, 'political_steps': 2.0})


In [134]:
#pairing up respondents with other respondents - dict_same_rel

#political_steps = np.abs(record['w1_partyid7']-record['w1_q12'])
#age_diff = np.abs(record['w1_ppage']-record['w1_q9'])
#resp_eth=record['w1_subject_race']
#partner_eth=record['w1_q6b']
#person_ed = record['w1_ppeduc']
#partner_ed = record['w1_q10']

new_relationships_with_resp_dict={}

for r in dict_same_rel:
    record=dict_same_rel[r]
    r_id=record['caseid_new']
    r_politic = record['w1_partyid7']
    r_age=record['w1_ppage']
    r_eth=record['w1_subject_race']
    r_edu=record['w1_ppeduc']
    curr_partner_id=score_resp_partner[r_id]['p_id']
    curr_partner_score=score_resp_partner[r_id]['total_score']

    for newR in dict_same_rel:
        newRecord=dict_same_rel[newR]
        p_id=newRecord['caseid_new']
        p_politic= newRecord['w1_partyid7']
        p_age=newRecord['w1_ppage']
        p_eth=newRecord['w1_subject_race']
        p_edu=newRecord['w1_ppeduc']

        if (r_id!=p_id):

            score_political_steps=cal_pol_steps(r_politic,p_politic)
            score_race=check_eth_same(r_eth,p_eth)
            score_age=cal_age_diff(r_age,p_age)
            score_edu=check_p_ed_greater(r_edu,p_edu)
            total_score_newP = cal_pol_steps(r_politic,p_politic)+cal_age_diff(r_age,p_age)+check_eth_same(r_eth,p_eth)+check_p_ed_greater(r_edu,p_edu)
            
            
            #if new score of the new record > curr_partner_score, then create a new relationship entry, or else keep the same partner
            if (total_score_newP > curr_partner_score):
                if r_id in new_relationships_with_resp_dict.keys():
                    new_relationships_with_resp_dict[r_id].append({'p_id':p_id, 
                                                                   "score_political_steps":score_political_steps, 
                                                                   "score_race":score_race,
                                                                    "score_age":score_age,
                                                                    "score_edu":score_edu,
                                                                   'new_score':total_score_newP})
                else:
                    new_relationships_with_resp_dict[r_id]=[{'p_id':p_id, 
                                                                   "score_political_steps":score_political_steps, 
                                                                   "score_race":score_race,
                                                                    "score_age":score_age,
                                                                    "score_edu":score_edu,
                                                                   'new_score':total_score_newP}]
            
print("Length of the new dict with new partners:",len(new_relationships_with_resp_dict))

print(new_relationships_with_resp_dict.get(2014793))

Length of the new dict with new partners: 1036
[{'p_id': 164061, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.846, 'new_score': 9.179}, {'p_id': 497203, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.923, 'new_score': 9.256}, {'p_id': 589881, 'score_political_steps': 2.667, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 8.667}, {'p_id': 643505, 'score_political_steps': 2.667, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.769, 'new_score': 8.436}, {'p_id': 657869, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.923, 'new_score': 9.256}, {'p_id': 726431, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.923, 'new_score': 9.256}, {'p_id': 728327, 'score_political_steps': 2.667, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.923, 'new_score': 8.59}, {'p_id': 731629, 'score_political_steps': 3.333, 'score_race': 3.0

In [135]:
#if did not get paired, keep the same partner as before 
items=set(score_resp_partner)-set(new_relationships_with_resp_dict)
print("The following respondents did not get paired (So, keep the same partners):",len(items))

not_paired_resp_with_resp={}

for item in items:
    #print(item, ":", score_resp_partner[item])
    #new_relationships_with_resp_dict[item]=[{'p_id':score_resp_partner[item]['p_id'], 'new_score':int(score_resp_partner[item]['total_score'])}]
    not_paired_resp_with_resp[item]=[{'p_id':score_resp_partner[item]['p_id'], 'new_score':int(score_resp_partner[item]['total_score'])}]

The following respondents did not get paired (So, keep the same partners): 60


In [136]:
#pairing up respondents with other partners from the pool of partners - partner_info_dict

#political_steps = np.abs(record['w1_partyid7']-record['w1_q12'])
#age_diff = np.abs(record['w1_ppage']-record['w1_q9'])
#resp_eth=record['w1_subject_race']
#partner_eth=record['w1_q6b']
#person_ed = record['w1_ppeduc']
#partner_ed = record['w1_q10']

new_relationships_with_p_dict={}

for r in dict_same_rel:
    record=dict_same_rel[r]
    r_id=record['caseid_new']
    r_politic = record['w1_partyid7']
    r_age=record['w1_ppage']
    r_eth=record['w1_subject_race']
    r_edu=record['w1_ppeduc']
    curr_partner_id=score_resp_partner[r_id]['p_id']
    curr_partner_score=score_resp_partner[r_id]['total_score']

    for newR in partner_info_dict:
        newRecord=partner_info_dict[newR]
        p_id=newR
        newr_id=newRecord['caseid_new']
        p_politic= newRecord['w1_q12']
        p_age=newRecord['w1_q9']
        p_eth=newRecord['w1_q6b']
        p_edu=newRecord['w1_q10']
        
        if (r_id!=newr_id):
            total_score_newP = cal_pol_steps(r_politic,p_politic)+cal_age_diff(r_age,p_age)+check_eth_same(r_eth,p_eth)+check_p_ed_greater(r_edu,p_edu)
            score_political_steps=cal_pol_steps(r_politic,p_politic)
            score_race=check_eth_same(r_eth,p_eth)
            score_age=cal_age_diff(r_age,p_age)
            score_edu=check_p_ed_greater(r_edu,p_edu)
            
            if (total_score_newP > curr_partner_score):
                if r_id in new_relationships_with_p_dict.keys():
                    new_relationships_with_p_dict[r_id].append({'p_id':p_id, 
                                                                   "score_political_steps":score_political_steps, 
                                                                   "score_race":score_race,
                                                                    "score_age":score_age,
                                                                    "score_edu":score_edu,
                                                                   'new_score':total_score_newP})
                else:
                    new_relationships_with_p_dict[r_id]=[{'p_id':p_id, 
                                                           "score_political_steps":score_political_steps, 
                                                           "score_race":score_race,
                                                            "score_age":score_age,
                                                            "score_edu":score_edu,
                                                           'new_score':total_score_newP}]

print("Length of the new dict with new partners:",len(new_relationships_with_p_dict))

print(new_relationships_with_p_dict.get(2014793))

Length of the new dict with new partners: 1031
[{'p_id': 1098, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 9.333}, {'p_id': 1109, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 10.0}, {'p_id': 1110, 'score_political_steps': 2.667, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.923, 'new_score': 8.59}, {'p_id': 1117, 'score_political_steps': 2.667, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.846, 'new_score': 8.513}, {'p_id': 1121, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.769, 'new_score': 9.102}, {'p_id': 1122, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 9.333}, {'p_id': 1126, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 9.333}, {'p_id': 1127, 'score_political_steps': 3.333, 'score_race': 3.0, 'score_age': 2.0, 'scor

In [137]:
#if did not get paired, keep the same partner as before 
items=set(score_resp_partner)-set(new_relationships_with_p_dict)
print("The following respondents did not get paired:",len(items))

not_paired_resp_with_partners={}

#print(list(new_relationships_with_p_dict.items())[0])
#print(items)
for item in items:
    #print(item, " ", score_resp_partner[item])
    #new_relationships_with_p_dict[item]=[{'p_id':score_resp_partner[item]['p_id'], 'new_score':int(score_resp_partner[item]['total_score'])}]
    not_paired_resp_with_partners[item]=[{'p_id':score_resp_partner[item]['p_id'], 'new_score':int(score_resp_partner[item]['total_score'])}]

The following respondents did not get paired: 65


In [138]:
#find the minimum score from the scores respondent have with other partners such that it is greater than the score that the 
#respondent has with the existing partner, (somewhat used matching market strategy)
min_scores_with_other_respondents = {}

# Iterate over the items in both dictionaries
for key, records in new_relationships_with_resp_dict.items():

    min_score=float('inf')
    min_p_id=None
    for record in records:
        if (record['new_score']<min_score):
            min_score=record['new_score']
            min_p_id=record['p_id']
            
    min_scores_with_other_respondents[key]={'p_id':min_p_id, 'score':min_score}

#print(min_scores)
print(len(set(min_scores_with_other_respondents)))
print(min_scores_with_other_respondents)

1036
{106983: {'p_id': 782789, 'score': 8.0}, 164061: {'p_id': 218351, 'score': 7.923}, 212249: {'p_id': 814285, 'score': 8.513}, 214227: {'p_id': 1548291, 'score': 3.282}, 218351: {'p_id': 497203, 'score': 8.0}, 220655: {'p_id': 846827, 'score': 3.59}, 291177: {'p_id': 71609, 'score': 5.333}, 369975: {'p_id': 844865, 'score': 8.0}, 428211: {'p_id': 497203, 'score': 4.923}, 497203: {'p_id': 106983, 'score': 4.667}, 516823: {'p_id': 1760991, 'score': 9.692}, 582849: {'p_id': 2338241, 'score': 8.359}, 587125: {'p_id': 2431217, 'score': 7.975}, 589881: {'p_id': 582849, 'score': 8.0}, 608697: {'p_id': 1283019, 'score': 10.0}, 621641: {'p_id': 1251293, 'score': 7.615}, 632253: {'p_id': 859445, 'score': 4.333}, 634833: {'p_id': 2142051, 'score': 9.846}, 637531: {'p_id': 2232929, 'score': 4.282}, 643423: {'p_id': 2417997, 'score': 9.025}, 643505: {'p_id': 1070983, 'score': 4.948}, 646023: {'p_id': 1385831, 'score': 9.025}, 646157: {'p_id': 218351, 'score': 7.179}, 648669: {'p_id': 703017, 'sc

In [139]:

#finding min score from the score resp have with partners such that the new score > score with the existing partner
min_scores_with_partners = {}

# Iterate over the items in both dictionaries
for key, records in new_relationships_with_p_dict.items():

    min_score=float('inf')
    min_p_id=None
    for record in records:
        if (record['new_score']<min_score):
            min_score=record['new_score']
            min_p_id=record['p_id']
            
    min_scores_with_partners[key]={'p_id':min_p_id, 'score':min_score}

print(len(min_scores_with_partners))
print(min_scores_with_partners)

1031
{106983: {'p_id': 1176, 'score': 8.0}, 164061: {'p_id': 1121, 'score': 7.923}, 214227: {'p_id': 1505, 'score': 3.282}, 218351: {'p_id': 1130, 'score': 8.0}, 220655: {'p_id': 1119, 'score': 3.59}, 291177: {'p_id': 1096, 'score': 5.333}, 369975: {'p_id': 1248, 'score': 8.0}, 428211: {'p_id': 1112, 'score': 4.923}, 497203: {'p_id': 1256, 'score': 4.667}, 516823: {'p_id': 1282, 'score': 9.769}, 582849: {'p_id': 2069, 'score': 8.205}, 587125: {'p_id': 1229, 'score': 8.0}, 589881: {'p_id': 1108, 'score': 8.0}, 608697: {'p_id': 1372, 'score': 10.0}, 621641: {'p_id': 1269, 'score': 7.615}, 632253: {'p_id': 1552, 'score': 4.333}, 634833: {'p_id': 1138, 'score': 9.923}, 637531: {'p_id': 1124, 'score': 4.282}, 643423: {'p_id': 1336, 'score': 8.948}, 643505: {'p_id': 1474, 'score': 4.948}, 646023: {'p_id': 1493, 'score': 8.948}, 646157: {'p_id': 1130, 'score': 7.179}, 648669: {'p_id': 1312, 'score': 10.0}, 650237: {'p_id': 1280, 'score': 4.692}, 651259: {'p_id': 1474, 'score': 7.385}, 651349:

In [140]:
'''
print(len(new_relationships_with_resp_dict))
print(len(not_paired_resp_with_resp))
print(len(new_relationships_with_p_dict))
print(len(not_paired_resp_with_partners))
print("\n")

print(len(set(score_resp_partner)-set(new_relationships_with_resp_dict)))
print(len(set(score_resp_partner)-set(new_relationships_with_p_dict)))
print(set(new_relationships_with_resp_dict)-set(new_relationships_with_p_dict))
print(set(new_relationships_with_p_dict)-set(new_relationships_with_resp_dict))

print("\n")

print(len(set(not_paired_resp_with_resp)-set(not_paired_resp_with_partners)))
print(len(set(not_paired_resp_with_partners)-set(not_paired_resp_with_resp)))
'''

all_potential_relationships = new_relationships_with_resp_dict.copy()
all_potential_relationships.update(new_relationships_with_p_dict)
print("Resp got rematched to potential partners",len(all_potential_relationships))


not_paired = not_paired_resp_with_resp.copy()
not_paired.update(not_paired_resp_with_partners)
#print("Not paired resp",len((not_paired)))

print("Not paired in matching",len(set(not_paired)-set(all_potential_relationships)))



Resp got rematched to potential partners 1040
Not paired in matching 56


In [141]:
people_with_partners= set()

In [142]:
#people who did not get rematched, will be matched with existing partner
people_not_paired=(set(not_paired)-set(all_potential_relationships))
print("People who did not get rematched, match them with their existing partner:",len(list(people_not_paired)))

final_matches_those_not_paired_after_matching={}

for notPaired in people_not_paired:
    if notPaired in score_resp_partner.keys():
        original_partner=score_resp_partner.get(notPaired)
        final_matches_those_not_paired_after_matching[notPaired]=original_partner
        people_with_partners.add(notPaired)
        people_with_partners.add(original_partner['p_id'])
        

print("Current number of partners matched:",len(people_with_partners)) #(56*2 (resp and partner))

#print(final_matches_those_not_paired_after_matching)



People who did not get rematched, match them with their existing partner: 56
Current number of partners matched: 112


In [143]:
final_matches_those_with_onematch_already={}

for key,values in all_potential_relationships.items():
    if key not in people_with_partners:
        if (len(values)==1):
            if values[0]['p_id'] not in people_with_partners:
                final_matches_those_with_onematch_already[key]=values[0]
                people_with_partners.add(key)
                people_with_partners.add(values[0]['p_id'])


print(len(final_matches_those_with_onematch_already))
print("Current number of partners matched:",len(people_with_partners))

print(final_matches_those_with_onematch_already)



11
Current number of partners matched: 134
{648669: {'p_id': 1312, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 10.0}, 1141793: {'p_id': 2681949, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.923, 'new_score': 9.923}, 1226941: {'p_id': 2738737, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 10.0}, 1318305: {'p_id': 964713, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 10.0}, 1785549: {'p_id': 1721, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.923, 'new_score': 9.923}, 1854475: {'p_id': 1639, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 10.0}, 1989553: {'p_id': 1842, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 10.0}, 2044441: {'p_id': 2171997, 'score_politi

In [144]:
final_match_others = {}


# Iterate over the items in both dictionaries
for key, records in all_potential_relationships.items():
    if key not in people_with_partners:
        min_score=float('inf')
        min_p_id=None
        for record in records:
            if record['p_id'] not in people_with_partners:
                if (record['new_score']<min_score):
                    min_score=record['new_score']
                    min_p_id=record['p_id']

        if min_p_id is not None:
            final_match_others[key]={'p_id':min_p_id, 'score':min_score}
            people_with_partners.add(key)
            people_with_partners.add(min_p_id)
        



print("Respondents who got rematched",len(final_match_others))



Respondents who got rematched 987


In [145]:
print(len(final_matches_those_not_paired_after_matching))
print(final_matches_those_not_paired_after_matching) #got matched with existing partner

56
{1738505: {'p_id': 1415, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 10.0}, 1902987: {'p_id': 1516, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 10.0}, 1823887: {'p_id': 1468, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 10.0}, 2065295: {'p_id': 1622, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 10.0}, 1791377: {'p_id': 1451, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.692, 'total_score': 9.692}, 1815571: {'p_id': 1466, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 10.0}, 1425299: {'p_id': 1321, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'total_score': 10.0}, 2458389: {'p_id': 1862, 'score_political_steps': 4.0, 'score_race': 3.0, 'sc

In [146]:
print(len(final_matches_those_with_onematch_already))
print(final_matches_those_with_onematch_already) #take out those with only one match after rematching

11
{648669: {'p_id': 1312, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 10.0}, 1141793: {'p_id': 2681949, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.923, 'new_score': 9.923}, 1226941: {'p_id': 2738737, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 10.0}, 1318305: {'p_id': 964713, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 10.0}, 1785549: {'p_id': 1721, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 0.923, 'new_score': 9.923}, 1854475: {'p_id': 1639, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 10.0}, 1989553: {'p_id': 1842, 'score_political_steps': 4.0, 'score_race': 3.0, 'score_age': 2.0, 'score_edu': 1.0, 'new_score': 10.0}, 2044441: {'p_id': 2171997, 'score_political_steps': 4.0, 'score_race': 3.0, 'sco

In [147]:
print(len(final_match_others))
print(final_match_others) #one to one matching for others    

987
{106983: {'p_id': 1176, 'score': 8.0}, 164061: {'p_id': 1121, 'score': 7.923}, 212249: {'p_id': 814285, 'score': 8.513}, 214227: {'p_id': 1505, 'score': 3.282}, 218351: {'p_id': 1130, 'score': 8.0}, 220655: {'p_id': 1119, 'score': 3.59}, 291177: {'p_id': 1114, 'score': 5.333}, 369975: {'p_id': 1610, 'score': 8.0}, 428211: {'p_id': 1112, 'score': 4.923}, 497203: {'p_id': 1256, 'score': 4.667}, 516823: {'p_id': 1282, 'score': 9.769}, 582849: {'p_id': 2069, 'score': 8.205}, 587125: {'p_id': 1229, 'score': 8.0}, 589881: {'p_id': 1108, 'score': 8.0}, 608697: {'p_id': 1457, 'score': 10.0}, 621641: {'p_id': 1269, 'score': 7.615}, 632253: {'p_id': 1552, 'score': 4.333}, 634833: {'p_id': 1138, 'score': 9.923}, 637531: {'p_id': 1124, 'score': 4.282}, 643423: {'p_id': 1336, 'score': 8.948}, 643505: {'p_id': 1474, 'score': 4.948}, 646023: {'p_id': 1493, 'score': 8.948}, 646157: {'p_id': 1133, 'score': 7.179}, 650237: {'p_id': 1280, 'score': 4.692}, 651259: {'p_id': 1393, 'score': 7.615}, 65134

In [148]:
#total matches: 1054 