# Encoding pedigree metadata from GEL participants table

We have a bunch of individuals who are relatives of the proband; we need to determine who the parents are for each individual and put them in a column. Let's use some synthetic data

In [54]:
import pandas as pd
import numpy as np

# Read in the data
df = pd.read_csv('data/family_data.csv')
df.set_index('platekey', inplace=True, drop=True)

In [55]:
df

Unnamed: 0_level_0,participant_type,rare_diseases_family_id,biological_relationship_to_proband,family_size,participant_phenotypic_sex
platekey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LP3806,Proband,23432,,4,Male
LP3528,Relative,23432,Mother,4,Female
LP8430,Relative,23432,Father,4,Male
LP2820,Relative,23432,Full Sibling,4,Male
LP2990,Proband,73834,,14,Female
LP8286,Relative,73834,Full Sibling,14,Male
LP6436,Relative,73834,Half Sibling with a shared Mother,14,Female
LP5342,Relative,73834,Son,14,Male
LP3564,Relative,73834,Daughter,14,Female
LP3899,Relative,73834,Half Sibling with a shared Father,14,Male


In [56]:
# Add two columns, parent0 and parent1, to the dataframe and initialise them to NaN

df['parent0'] = 'None'
df['parent1'] = 'None'

# If participant_type is Proband, overwrite the corresponding biological_relationship_to_proband column with 'Proband'

df.loc[df['participant_type'] == 'Proband', 'biological_relationship_to_proband'] = 'Proband'

In [57]:
df

Unnamed: 0_level_0,participant_type,rare_diseases_family_id,biological_relationship_to_proband,family_size,participant_phenotypic_sex,parent0,parent1
platekey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
LP3806,Proband,23432,Proband,4,Male,,
LP3528,Relative,23432,Mother,4,Female,,
LP8430,Relative,23432,Father,4,Male,,
LP2820,Relative,23432,Full Sibling,4,Male,,
LP2990,Proband,73834,Proband,14,Female,,
LP8286,Relative,73834,Full Sibling,14,Male,,
LP6436,Relative,73834,Half Sibling with a shared Mother,14,Female,,
LP5342,Relative,73834,Son,14,Male,,
LP3564,Relative,73834,Daughter,14,Female,,
LP3899,Relative,73834,Half Sibling with a shared Father,14,Male,,


In [58]:
# Write function which, given a platekey string and a string corresponding to a type of family member,
# checks if there is a family member of the participant with the platekey who is of that type.

def get_family_member(platekey, family_member_type):
    # Check that family_member_type is valid
    assert family_member_type in ['Mother', 'Father', 'Proband'] or 'Grand' in family_member_type, \
           "Invalid family_member_type"
    
    # Get the family id of the person
    family_id = df.loc[platekey, 'rare_diseases_family_id']
    # Get the platekeys of all family members
    family_members = df[df['rare_diseases_family_id'] == family_id].index
    # Check if any of these family members are of type family_member_type
    for member in family_members:
        if df.loc[member, 'biological_relationship_to_proband'] == family_member_type:
            return member
    # If no family members of type family_member_type are found, return NaN
    return 'None'


print(get_family_member("LP4074","Proband"))
print(get_family_member("LP4074","Maternal Grandmother"))
#get_family_member("LP4074","Full Sibling") gives error as expected

LP2990
LP7781


In [59]:
# Write a function which takes a person's platekey as input. If the person's biological_relationship_to_proband 
# is 'Mother' or 'Father', change the proband's parent0 or parent1 column, respectively to person's platekey using
# the get_family_member function. Do the same for children and grandparents of the proband. 

def set_direct_relatives(platekey):
    relationship_to_proband = df.loc[platekey, 'biological_relationship_to_proband']
    proband = get_family_member(platekey, 'Proband')
    proband_sex = df.loc[proband, 'participant_phenotypic_sex']
    mother = get_family_member(proband, 'Mother')
    father = get_family_member(proband, 'Father')
    
    match relationship_to_proband:
        # Direct descendants/parents
        case 'Mother':
            df.loc[proband, 'parent0'] = platekey
        case 'Father':
            df.loc[proband, 'parent1'] = platekey
        case 'Son' | 'Daughter' if proband_sex == 'Female':
            df.loc[platekey, 'parent0'] = proband
        case 'Son' | 'Daughter' if proband_sex == 'Male':
            df.loc[platekey, 'parent1'] = proband
        # Grandparents
        case 'Maternal Grandmother' if mother != 'None':
            df.loc[mother, 'parent0'] = platekey
        case 'Maternal Grandfather' if mother != 'None':
            df.loc[mother, 'parent1'] = platekey
        case 'Paternal Grandmother' if father != 'None':
            df.loc[father, 'parent0'] = platekey
        case 'Paternal Grandfather' if father != 'None':
            df.loc[father, 'parent1'] = platekey
        # Siblings
        case 'Full Sibling' | 'Twins Monozygous' | 'Twins Dizygous' | 'Twins Unknown':
            df.loc[platekey, 'parent0'] = mother   
            df.loc[platekey, 'parent1'] = father
        case 'Half Sibling with a shared Mother':
            df.loc[platekey, 'parent0'] = mother
        case 'Half Sibling with a shared Father':
            df.loc[platekey, 'parent1'] = father
        # Aunts and Uncles
        case 'Maternal Aunt' | 'Maternal Uncle':
            df.loc[platekey, 'parent0'] = get_family_member(platekey, 'Maternal Grandmother')
            df.loc[platekey, 'parent1'] = get_family_member(platekey, 'Maternal Grandfather')
        case 'Paternal Aunt' | 'Paternal Uncle':
            df.loc[platekey, 'parent0'] = get_family_member(platekey, 'Paternal Grandmother')
            df.loc[platekey, 'parent1'] = get_family_member(platekey, 'Paternal Grandfather')
        # Cousins and others
        case _:
            df.loc[platekey, 'parent0'] = 'TBD'
            df.loc[platekey, 'parent1'] = 'TBD'



In [60]:
for platekey in df.index:
    set_direct_relatives(platekey)

df

Unnamed: 0_level_0,participant_type,rare_diseases_family_id,biological_relationship_to_proband,family_size,participant_phenotypic_sex,parent0,parent1
platekey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
LP3806,Proband,23432,Proband,4,Male,LP3528,LP8430
LP3528,Relative,23432,Mother,4,Female,,
LP8430,Relative,23432,Father,4,Male,,
LP2820,Relative,23432,Full Sibling,4,Male,LP3528,LP8430
LP2990,Proband,73834,Proband,14,Female,LP4564,LP7239
LP8286,Relative,73834,Full Sibling,14,Male,LP4564,LP7239
LP6436,Relative,73834,Half Sibling with a shared Mother,14,Female,LP4564,
LP5342,Relative,73834,Son,14,Male,LP2990,
LP3564,Relative,73834,Daughter,14,Female,LP2990,
LP3899,Relative,73834,Half Sibling with a shared Father,14,Male,,LP7239
