In [1]:
from openpyxl import load_workbook
import pandas as pd
import numpy as np

In [2]:
# Read in data from workbook, remove empty rows, and reformat as dataframe with SG code as header
wb = load_workbook('MC_Compiled_working.xlsx', data_only=True)
ws = wb['16p12.2']
df = pd.DataFrame(ws.values)
df.dropna(how='all', inplace=True)
header=df.iloc[3]
df.drop(0, inplace=True)
df = df.rename(columns=header)
df = df.set_index("Child Code")
df.drop("Child Code", inplace=True)
df.replace('N/A',np.NaN, inplace=True)

In [3]:
# Function to add new column to the df with relevant domain for each phenotype, as applicable
# Domains are described in DomainsDefinitions_Jun01.xlsx
def add_domains(row):
    pheno=str(row.name).lower().rstrip()
    # Start by creating dictionary with relevant phenotypes as a list associated with domain keys
    d = {}

    # Developmental delay / motor delay / speech delay /intellectual disability
    # Global
    d['dd'] = ['global developmental delay']
    # Motor
    d['motor'] = ['motor delay']
    # Speech
    d['speech']=['speech delay']
    # ID
    d['id'] = ["intellectual disability", "learning disability", "impaired cognitive abilities"]

    # Behavioral
    # ADHD
    d['adhd'] = ['attention deficit hyperactivity disorder (adhd)']
    # Aggression
    d['agg'] = ['aggression']
    # Self-injurious behaviors
    d['self_inj'] = ['self-injurious behavior']
    # Pervaisive Developmental Delay / ASD
    d['pdd_asd'] = ['pervasive developmental delay', 'autism spectrum disorder']
    # Sleep disturbance
    d['sleep'] = ['sleep disturbance']

    # Psychiatric
    # Schizophrenia
    d['schiz'] = ['schizophrenic features']
    # Paranoia
    d['paranoia'] = ['severe paranoia']
    # Depression
    d['depression'] = ['depression']
    # Anxiety
    d['anx'] = ['anxiety']
    # Bipolar disorder
    d['bpd'] = ['bipolar disorder']

    # Nervous System
    # Nervous system abnormalities
    d['nervous'] = ["abnormal gait", "tremors", "myoclonus", "encephalopathy", "cerebral palsy",
                    "catatonia", "lack of coordination", "delayed myelination", "tethered cord",
                    "peripheral neuropathy", "spastic quadriparesis", "stroke", "other nervous features"]
    # Nervous system morphology
    d['nervous_morphology'] = ["cerebral/cerebellar atrophy", "decreased white matter",
                               "complete agenesis of corpus callosum", "brain malformation",
                               "hydrocephaly", "periventricular abnormalities", "different size of thalamus",
                               "enlargement of the frontal subarachnoid spaces",
                               "minimal enlargement of the bifrontal subdural spaces",
                               "asymmetrical lateral ventricle size", "other cns abnormality"]
    # Epilepsy / Seizures
    d['seizure'] = ["seizures", "epilepsy", "seizure-like episodes with normal eeg", "west syndrome", "staring spells",
                    "other seizure related"]

    # Congenital Anomalies
    # Muscular
    d['muscular'] =["hyperkinesis", "myopathic mouth", "muscle weakness", "muscle cramps",
                    "decreased tolerance to exercise", "other muscular features:"]

    # Cardiac
    d['cardiac'] = ["hypoplastic left heart", "double-outlet right ventricle", "hypoplastic aortic valve",
                    "abnormal heart sounds", "still’s murmur",
                    "ventricular septal defect", "atrial septal defect", "patent ductus arteriosus",
                    "pulmonary stenosis", "congenital stenosis of aortic valve",
                    "bicuspid aortic valve", "mitral valve atresia", "absence of anterior portion of pericardium",
                    "shone’s variant", "dextrocardia", "mesocardia", "right sided aortic arch",
                    "patent foramen ovale", "aberrant subclavian artery", "other cardiac defects"]

    # Uro-Gential
    d['uro-genital'] = ["neurogenic bladder", "small kidneys", "renal calices dilatation", "bilateral hydronephrosis",
                        "horseshoe kidney", "cystic kidney disease", "renal pelvis dilatation",
                        "other urinary system defects", "frequent urinary tract infections", "urinary health concerns?",
                        'other urinary system defects', "unilateral inguinal hernia", "sacral dimple",
                        "other reproductive system defects"]
    d['male-specific'] = ["undescended testicles", "hypospadias", "chordee"]
    d['female-specific'] = ["polycystic ovaries"]

    # Digestive system abnormalities
    d['digestive'] = ["gastroesophogeal reflux", "bowel obstruction", "regular loose bowel movements",
                      "regular constipation problems", "other digestive problems"]

    # Vision and Hearing Abnormalities
    d['vision-hearing'] = ['strabismus', 'iris abnormalities', 'micro cornea', 'ectopic pupils',
                           'nystagmus', 'coloboma of iris', 'incomplete retinal vascularization',
                           'peripheral vision loss', 'myopia', 'cataracts', 'retinal detachment',
                           'macular degeneration', 'hypoplastic optic discs', 'wear glasses',
                           'other vision problems', 'sensorineural hearing loss', 'hearing impairment']
    
    # Hypotonia/Hypertonia
    d['hypo/hypertonia'] = ['hypotonia', 'hypertonia']
    
    # Craniofacial/Skeletal Abnormalities 
    # Dysmorphic Facial Features
    d['severe_dysmorphia'] = ["plagiocephaly", "brachycephaly", "dolicocephaly", "frontal bossing", "bitemporal narrowing",
                              "craniosynostosis", "facial dysmorphology", "facial hypotonia", "midface hypoplasia",
                              "prognathism", "micrognathia", "retrognathia"]
    d['mild_dysmorphia'] = ["asymmetric face", "triangular face", "short neck",
                            "periorbital fullness (increased periorbital soft tissue)", "long eyelashes",
                            "synophrys", "hypertelorism", "telecanthus", "epicanthus", "inverted epicanthal folds",
                            "upslanting palpebral fissures", "downslanting palpebral fissures", "narrow palpebral fissures",
                            "abnormal features of the eyes", "small eyes", "deep set eyes", "ptosis", "other eye features:",
                            "low set (posteriorly rotated) ears", "prominent ears", "small ears", "abnormal ear shape",
                            "ear pit", "overfolded helices", "fleshy ears with small lobes", "other ear features:",
                            "depressed nasal bridge", "broad nasal bridge", "flat nasal bridge", "flat nasal root",
                            "bulbous nasal tip", "wide nose", "hypoplastic alae nasi", "upturned nose",
                            "anteverted nostrils", "other nose", "lip pits", "thin upper lip", "down-turned upper lip",
                            "high-arched palate", "bifid uvula", "broad uvula", "absence of uvula", "dental abnormalities",
                            "crowded teeth", "dental caries", "other mouth", "other head and neck features:",
                            "other facial features:"]

    # Microcephaly/Macrocephaly
    d['micro/macrocephaly'] = ['microcephaly', 'macrocephaly']
    d['head circumference'] = ['head circumference z score']

    # Growth Phenotypes
    d['undergrowth'] = ['failure to thrive", "growth retardation/growth delay']
    d['height'] = ['height z score']
    d['obesity'] = ["obesity (bmi z-score>1.645)"]
    d['bmi'] = ['bmi z score']
    
    # Skeletal
    d['skeletal'] = ["polydactyly", "camptodactyly", "syndactyly", "clinodactyly",
                     "overlapping toes", "hypoplastic feet", "hammer toes",
                     "pes cavus", "pes planus", "club foot", "short broad hands",
                     "hypermobile / hyperextensible fingers", "scoliosis", "kyphosis", "bow legs",
                     "limited extension of elbows", "other skeletal features"]
    
    # Return the domain associated with the phenotype
    for key in d.keys():
        for i in d[key]:
            if i==pheno:
                return key

    return '.'

In [4]:
# Add a second header with domain labels
df['Domain'] = df.apply(add_domains, axis=1)
idx = pd.MultiIndex.from_arrays([list(df['Domain']), df.index.tolist()])
df = df.set_index(idx)
df.drop('Domain', inplace=True, axis=1)

In [5]:
# Transpose dataframe so that each child is a row and remove empty rows
t = df.transpose()
t.dropna(how='all', inplace=True)

In [6]:
# Calculate scores for each domain
# Domain calculations are also in DomainDefinitions_Jun01.xlsx

## Developmental Delay / Motor Delay / Speech Delay / Intellectual Disability

In [7]:
# General function for traits with only a single relevant category
def general(row, trait):
    out = float(row.loc[row.index.get_level_values(0)==trait])
    if out!=out:
        return 'N/A'
    else:
        return out

In [8]:
t['Global Developmental Delay'] = t.apply(lambda row: general(row, 'dd'), axis=1)
t['Motor Delay'] = t.apply(lambda row: general(row, 'motor'), axis=1)
t['Speech Delay'] = t.apply(lambda row: general(row, 'speech'), axis=1)

In [9]:
def intel_disab(row):
    # ID is determined by 3 phenotypes
    ids = [float(i) for i in list(row.loc[row.index.get_level_values(0)=='id'])]
    
    # If there is at least one 1, return 1
    if 1 in ids:
        return 1
    # Otherwise, if there is at least one 0, return 0
    elif 0 in ids:
        return 0
    # If all 3 phenotypes are 'N/A', return N/A
    else:
        return 'N/A'

In [10]:
t['Intellectual Disability'] = t.apply(intel_disab, axis=1)

## Behavioral

In [11]:
# Most behavioral are defined by a single trait, so they can use a general function
t['ADHD'] = t.apply(lambda row: general(row, 'adhd'), axis=1)
t['Aggression'] = t.apply(lambda row: general(row, 'agg'), axis=1)
t['Self-Injurious Behaviors'] = t.apply(lambda row: general(row, 'self_inj'), axis=1)

In [12]:
def asd_pdd(row):
    lst = list(row.loc[row.index.get_level_values(0)=='pdd_asd'])
    
    if 1 in lst:
        return 1
    elif 0 in lst:
        return 0
    else:
        return "N/A"

In [13]:
t['ASD/PDD'] = t.apply(asd_pdd, axis=1)
t['Sleep Disturbance'] = t.apply(lambda row: general(row, 'sleep'), axis=1)

## Psychiatric

In [14]:
# Psychiatric are all defined by a single trait, so they can use a general function
t['Schizophrenic Features'] = t.apply(lambda row: general(row, 'schiz'), axis=1)
t['Severe Paranoia'] = t.apply(lambda row: general(row, 'paranoia'), axis=1)
t['Depression'] = t.apply(lambda row: general(row, 'depression'), axis=1)
t['Anxiety'] = t.apply(lambda row: general(row, 'anx'), axis=1)
t['Bipolar Disorder'] = t.apply(lambda row: general(row, 'bpd'), axis=1)

## Nervous System

In [15]:
def congenital(row, trait, other):
    # General function for congential abnormalities with several phenotypes and one row with "other" features
    features = list(row.loc[(row.index.get_level_values(0)==trait) & 
                       (row.index.get_level_values(1)!=other)])
    other_features = float(row.loc[row.index.get_level_values(1)==other])
    
    if 1 in features+[other_features]:
        return 1
    elif 0 in features:
        return 0
    else:
        return 'N/A'

In [16]:
t['Nervous'] = t.apply(lambda row: congenital(row, 'nervous', 'Other nervous features'), axis=1)

In [17]:
def nervous_morph(row):
    # Nervous system morphology
    morph = list(row.loc[row.index.get_level_values(0)=='nervous_morphology'])
    if 1 in morph:
        return 1
    elif 0 in morph:
        return 0
    else:
        return 'N/A'

In [18]:
t['Nervous System Morphology'] = t.apply(nervous_morph, axis=1)

In [19]:
t['Epilepsy/Seizure'] = t.apply(lambda row: congenital(row, 'seizure', 'Other seizure related'), axis=1)

## Congenital Anomalies

In [20]:
t['Muscular System'] = t.apply(lambda row: congenital(row, 'muscular', 'Other muscular features:'), axis=1)

In [21]:
t['Cardiac Features'] = t.apply(lambda row: congenital(row, 'cardiac', 'Other cardiac defects'), axis=1)

In [22]:
def urogenital(row):
    # Uro-Genital features
    # This function corrects for sex-specific phenotypes
    uro_genital = list(row.loc[(row.index.get_level_values(0)=='uro-genital') & 
                       ((row.index.get_level_values(1)!='Other reproductive system defects') |
                        (row.index.get_level_values(1)=='Other urinary system defects'))])
    genital_other = list(row.loc[row.index.get_level_values(1)=='Other reproductive system defects'] |
                        row.loc[row.index.get_level_values(1)=='Other urinary system defects'])
    
    sex = float(row.loc[row.index.get_level_values(1)=='Sex'])
    if sex==0:
        # Male
        sex='male-specific'
    elif sex==1:
        # Female
        sex='female-specific'
    
    sex_specific = list(row.loc[(row.index.get_level_values(0)==sex)])
    
    if 1 in uro_genital+sex_specific+[genital_other]:
        return 1
    elif 0 in uro_genital+sex_specific:
        return 0
    else:
        return 'N/A'        

In [23]:
t['Uro-Genital Defects'] = t.apply(urogenital, axis=1)

In [24]:
t['Digestive System'] = t.apply(lambda row: congenital(row, 'digestive', 'Other digestive problems'), axis=1)

In [25]:
def vision_hearing(row):
    # Vision and Hearing Issues
    vis = list(row.loc[row.index.get_level_values(0)=='vision-hearing'])
    
    if 1 in vis:
        return 1
    elif 0 in vis:
        return 0
    else:
        return 'N/A'

In [26]:
t['Vision and Hearing'] = t.apply(vision_hearing, axis=1)

In [27]:
def htonia(row):
    # Hypotonia/Hypertonia
    tonia = list(row.loc[row.index.get_level_values(0)=='hypo/hypertonia'])
    
    if 1 in tonia:
        return 1
    elif 0 in tonia:
        return 0
    else:
        return 'N/A'

In [28]:
t['Hypotonia/Hypertonia'] = t.apply(htonia, axis=1)

## Craniofacial/Skeletal Anomalies

In [29]:
def facial_dysmorphology(row):
    severe = list(row.loc[row.index.get_level_values(0)=='severe_dysmorphia'])
    mild = list(row.loc[row.index.get_level_values(0)=='mild_dysmorphia'])
    # Check if there is at least 1 severe feature
    if 1 in severe:
        return 2
    # Check if there is at least 2 mild features
    elif len([i for i in mild if i==1])>1:
        return 1
    # If there is at least 1 "0" in mild, return 0
    elif 0 in mild:
        return 0
    else:
        return "N/A"

In [30]:
t['Dysmorphic Facial Features'] = t.apply(facial_dysmorphology, axis=1)

In [31]:
def mcephaly(row):
    mcephaly = list(row.loc[row.index.get_level_values(0)=='micro/macrocephaly'])
    hc = float(row.loc[row.index.get_level_values(0)=='head circumference'])
    
    # Check if child has reported micro/macrocephaly, or has | head circumference | > 2
    if 1 in mcephaly:
        return 1
    elif hc < -2 or hc > 2:
        return 1
    elif 0 in mcephaly:
        return 0
    elif hc > -2 and hc < 2:
        return 0
    else:
        return 'N/A'

In [32]:
t['Microcephaly/Macrocephaly'] = t.apply(mcephaly, axis=1)

In [33]:
def undergrowth(row):
    undergrow = [float(i) for i in list(row.loc[row.index.get_level_values(0)=='undergrowth'])]
    height = float(row.loc[row.index.get_level_values(0)=='height'])
    
    # Check if child has undergrowth phenotypes or height less than 2
    if 1 in undergrow or height < -2:
        return 1
    elif 0 in undergrow or height > -2:
        return 0
    else:
        return 'N/A'

In [34]:
t['Undergrowth'] = t.apply(undergrowth, axis=1)

In [35]:
def tall(row):
    height = float(row.loc[row.index.get_level_values(0)=='height'])
    
    # Check if child has height greater than 2
    if height > 2:
        return 1
    if height < 2:
        return 0
    else:
        return 'N/A'

In [36]:
t['Tall Stature'] = t.apply(tall, axis=1)

In [37]:
def obese(row):
    obesity = float(row.loc[row.index.get_level_values(0)=='obesity'])
    bmi = float(row.loc[row.index.get_level_values(0)=='bmi'])
    
    # Check if child has obesity phenotype or BMI > 1.96
    if obesity == 1 or bmi > 1.96:
        return 1
    elif obesity == '0' or bmi < 1.96:
        return 0
    else:
        return 'N/A'

In [38]:
t['Obesity'] = t.apply(obese, axis=1)

In [39]:
def growth(row):
    under = row.loc[row.index.get_level_values(0)=='Undergrowth']
    over = row.loc[row.index.get_level_values(0)=='Tall Stature']
    obese = row.loc[row.index.get_level_values(0)=='Obesity']
    
    total=[]
    nas=0
    for i in [under, over, obese]:
        try:
            total.append(float(i))
        except:
            nas+=1

    if nas==3:
        return 'N/A'
    else:
        return sum(total)

In [40]:
t['Growth'] = t.apply(growth, axis=1)

In [41]:
t['Skeletal Features'] = t.apply(lambda row: congenital(row, 'skeletal', 'Other skeletal features'), axis=1)

## Age Thresholds - Skipping for now!

In [42]:
# Age thresholds are defined in Age_thresholds_May31.xlsx
# Some categories were already filtered for age (delays/ID and ASD/PDD), this captures the others

In [43]:
# # ADHD
# t.loc[(t[('.', "Age")]<2) & (t[("ADHD")]==0), "ADHD"] = 'N/A'
# # Aggression
# t.loc[(t[('.', "Age")]<2) & (t[("Aggression")]==0), "Aggression"] = 'N/A'
# # Self-injurious
# t.loc[(t[('.', "Age")]<2) & (t[("Self-Injurious Behaviors")]==0), "Self-Injurious Behaviors"] = 'N/A'
# # Schizophrenic Features
# t.loc[(t[('.', "Age")]<13) & (t[("Schizophrenic Features")]==0), "Schizophrenic Features"] = 'N/A'
# # Bipolar disorder
# t.loc[(t[('.', "Age")]<13) & (t[("BPD")]==0), "BPD"] = 'N/A'
# # Paranoia
# t.loc[(t[('.', "Age")]<13) & (t[("Severe Paranoia")]==0), "Severe Paranoia"] = 'N/A'
# # Depression
# t.loc[(t[('.', "Age")]<13) & (t[("Depression")]==0), "Depression"] = 'N/A'
# # Anxiety
# t.loc[(t[('.', "Age")]<6) & (t[("Anxiety")]==0), "Anxiety"] = 'N/A'
# # Sleep Disturbance
# t.loc[(t[('.', "Age")]<2) & (t[("Sleep Disturbance")]==0), "Sleep Disturbance"] = 'N/A'

## Finalize Format

In [47]:
domains = t.transpose()[-30:]
domains.drop(['Undergrowth', 'Tall Stature', 'Obesity'], inplace=True)

In [48]:
domains

Unnamed: 0,Unnamed: 1,SG001,SG011,SG012,SG013,SG021,SG022,SG023,SG026,SG155,SG032,...,SG546,SG553,SG561,SG583,SG584,SG587,SG589,SG598,SG620,SG622
Global Developmental Delay,,1.0,,0.0,1.0,1.0,0.0,0.0,1.0,0.0,,...,1.0,1.0,1.0,0.0,0.0,1.0,1.0,,1.0,0.0
Motor Delay,,1.0,,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,...,1.0,,1.0,0.0,0.0,0.0,1.0,,1.0,0.0
Speech Delay,,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,...,1.0,,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0
Intellectual Disability,,1.0,1.0,0.0,,1.0,0.0,0.0,1.0,0.0,1.0,...,0.0,1.0,1.0,0.0,0.0,1.0,,,1.0,0.0
ADHD,,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
Aggression,,0.0,0.0,0.0,,,0.0,0.0,0.0,1.0,1.0,...,0.0,,,0.0,0.0,1.0,1.0,,1.0,1.0
Self-Injurious Behaviors,,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0
ASD/PDD,,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,,...,0.0,,1.0,0.0,0.0,1.0,0.0,,1.0,1.0
Sleep Disturbance,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
Schizophrenic Features,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,,,0.0,0.0,,,,,0.0


In [49]:
writer = pd.ExcelWriter('prelim_child_domains_Jul16.xlsx')
domains.to_excel(writer)
writer.save()