In [1]:
import pandas as pd
import dxpy
import os

In [None]:
# Input and output files
BURDEN_16P="/path/to/input/16p12/sample/burden.csv" # Use the output for 16p12.1 deletion samples from script 3_identify_samples.ipynb
PRS_PATH="/path/to/UKB/16p12/PRS/data.csv" # Use the ouput of Variant calling/UKB/3_PRS/6_merge_scores.py
QUESTIONNAIRE_PATH="/path/to/questionnaire/interpretation.csv" # Use the output of script 4_parse_phenotypes.ipynb
ICD_PATH="/path/to/parsed/ICD10/data.csv" # Use the final output of script 5_parse_ICD10.ipynb 
ICD_CHAPTER="/path/to/parsed/ICD10/chapters.csv" # Use the ICD10 chapter output of script 5_parse_ICD10.ipynb 
OUTPUT_PATH="/path/to/output/files"

In [None]:
# Compile all the 16p12.1 deletion sample data into a single file
df=pd.read_csv(BURDEN_16P)

In [None]:
# Add PRS
prs=pd.read_csv(PRS_PATH)
prs['Sample']=prs.IID
prs=prs[['Sample', 'autism_PRS', 'intelligence_PRS', 'educational_attainment_PRS', 'schizophrenia_PRS']]
prs.columns=['Sample', 'Autism PRS', 'Intelligence PRS', 'Education PRS', 'SCZ PRS']

df=pd.merge(df, prs, on='Sample', how='left')

In [None]:
# Add in questionnaire phenotypes
quest=pd.read_csv(QUESTIONNAIRE_PATH)
quest.columns=['Sample', 'Depression', 'Sleep trouble', 'Mood lability', 'Anxiety', 'Addiction', 'Psychosis']

df=pd.merge(df, quest, on='Sample', how='left')

In [None]:
# Add in ICD10 phenotypes
icd=pd.read_csv(ICD_PATH)
icd.columns=['Sample', 'Sleep trouble (ICD10)', 'Addiction (ICD10)', 'Depression (ICD10)', 'Anxiety (ICD10)', 'Psychosis (ICD10)']

df=pd.merge(df, icd, on='Sample', how='left')

In [None]:
# Add in ICD10 Chapters
chapdf=pd.read_csv(ICD_CHAPTER)
df=pd.merge(df, chapdf, on='Sample', how='left')

In [None]:
# Save
df=df[['Sample', 'YOB', 'Sex',
       'All coding SNVs', 'All coding SNVs (LF)', 'Missense', 'Missense (LF)', 'LOF', 'LOF (LF)', 'Splice', 'Splice (LF)',
       'Genes del.', 'Genes del. (LF)', 'Genes dup.', 'Genes dup. (LF)',
       'Intelligence PRS', 'SCZ PRS', 'Education PRS', 'Autism PRS',
       'Depression', 'Sleep trouble', 'Mood lability', 'Anxiety', 'Addiction', 'Psychosis',
       'Sleep trouble (ICD10)', 'Addiction (ICD10)', 'Depression (ICD10)', 'Anxiety (ICD10)', 'Psychosis (ICD10)',
       'Neoplasms', 'Blood', 'Endocrine/Metabolic', 'Mental/behavioral disorders', 'Nervous system', 'Eye',
       'Ear', 'Circulatory system', 'Respiratory system', 'Digestive system', 'Skin/subcutaeous tissue',
       'Musc. system/connective tissue', 'Genitourinary system', 'Pregnancy/childbirth', 'Congenital malformations']]
df.to_csv('UKB.csv', index=False)
dxpy.upload_local_file('UKB.csv', folder=OUTPUT_PATH, parents=True)
os.remove('UKB.csv')