In [1]:
import pandas as pd
import re
from functools import reduce

# Legend

In [2]:
import pandas as pd

# Define the data
data = {
    "Name": [
        "Supplementary Table 1", "Supplementary Table 2", "Supplementary Table 3",
        "Supplementary Table 4", "Supplementary Table 5", "Supplementary Table 6",
        "Supplementary Table 7", "Supplementary Table 8", "Supplementary Table 9",
        "Supplementary Table 10", "Supplementary Table 11", "Supplementary Table 12",
        "Supplementary Table 13", "Supplementary Table 14", "Supplementary Table 15",
        "Supplementary Table 16", "Supplementary Table 17", "Supplementary Table 18",
        "Supplementary Table 19"
    ],
    "Description": [
        "Cross ancestry meta analysis statistics (two-sided Wald Z-tests from inverse-variance–weighted fixed-effects meta-analysis).",
        "Cross ancestry meta analysis statistics using METAL (two-sided Wald Z-tests from fixed-effects meta-analysis implemented in METAL).",
        "Cross ancestry meta analysis conditioned on common variants (two-sided Wald Z-tests from inverse-variance–weighted fixed-effects meta-analysis).",
        "Leave one variant out stats for five study-identified associations (two-sided Wald Z-tests from inverse-variance–weighted fixed-effects meta-analysis).",
        "Cochran's Q statistics for ancestral heterogeneity (Cochran’s Q test).",
        "Ancestral heterogeneity ANCOVA statistics for APBA1 gene (ANCOVA, two-sided F statistic).",
        "Ancestral heterogeneity ANCOVA statistics for GRM7 gene (ANCOVA, two-sided F statistic).",
        "Cross ancestry meta analysis conditioned on medications (two-sided Wald Z-tests from inverse-variance–weighted fixed-effects meta-analysis).",
        "Obesity category enrichment statistics for 13 genes (two-sided Fisher’s exact test).",
        "Obesity associated comorbidity statistics for 13 genes (two-sided Fisher’s exact test).",
        "Structural equation modelling statistics for four genes (SEM, two-sided Wald Z-tests for path coefficients).",
        "Singificant PheWAS results for five previously unrepoted genes (two-sided Wald Z-tests from inverse-variance–weighted fixed-effects meta-analysis).",
        "Interaction statistics between rare variants and PGS in BMI-associated genes (two-sided Wald Z-tests from inverse-variance–weighted fixed-effects meta-analysis).",
        "BMI distribution statistics in Europeans stratified by PGS quintiles (descriptive statistics with two-sided t-tests for group comparisons).",
        "Plasma protein model coefficients of BMI-associated gene carrier status (linear regression with two-sided t-tests statistics).",
        "Plasma protein model coefficients for BMI (linear regression with two-sided t-tests statistics).",
        "Cross ancestry meta analysis statistics of known obesity genes (two-sided Wald Z-tests from fixed-effects meta-analysis).",
        "Cross ancestry meta analysis statistics of known sex-specific obesity genes (two-sided Wald Z-tests from fixed-effects meta-analysis, stratified by sex).",
        "Comorbidity selected (list of ICD-10 codes for each comorbidity along with reference for the codes aggregated for each trait)."
    ],
    "Manuscript heading": [
        "Result 1", "Result 1", "Result 1", "Result 1", "Result 1", "Result 1", "Result 1", "Result 1",
        "Result 2", "Result 2", "Result 2", "Result 2", "Result 3", "Result 3",
        "Result 4", "Result 4", "Result 5", "Result 5", "Methods",

    ]
}

# Create the DataFrame
df = pd.DataFrame(data)



# STable 1: Cross ancestry meta analysis statistics

In [3]:
meta_file = "../data/meta/tables/all_ancestry_with_nvariants.xlsx"
meta_df = pd.read_excel(meta_file, index_col=[0,1], header=[0,1,2])

# STable 2: Cross ancestry meta analysis using METAL

In [4]:
metal_file = "../data/metal/bmi_rint_monogenic_meta_metal.xlsx"
metal_df = pd.read_excel(metal_file, index_col=[0], header=[0,1,2])

# STable 3: Shadow effects

In [5]:
shadow_file = "../data/meta/tables/shadow_effect.xlsx"
shadow_df = pd.read_excel(shadow_file, index_col=[0,1], header=[0,1,2])

# STable 4: LOVO

In [6]:
lovo_file = "../data/meta/tables/lovo.xlsx"
lovo_df = pd.read_excel(lovo_file, index_col=[0,1,2], header=[0,1])

# STable 5: Ancestral heterogeneity

In [7]:
hetz_file = "../data/meta/tables/qstats_ancestry.tsv"
hetz_df = pd.read_csv(hetz_file, sep="\t")

# STable 6: Medications

In [8]:
med_file = "../data/meta/tables/meds.xlsx"
med_df = pd.read_excel(med_file, index_col=[0,1], header=[0,1,2])

# STable 7: Obesity clinical category enrichment

In [9]:
bmi_cat_file = "../data/enrichment/bmi_cat/monogenic_enrich_bmi_cat.xlsx"
bmi_cat_df = pd.read_excel(bmi_cat_file, index_col=[0,1], header=[0,1])

# STable 8: Comorbidity enrichment

In [10]:
comorbid_file = "../data/enrichment/comorbid/monogenic_enrich_comorbid.xlsx"
comorbid_df = pd.read_excel(comorbid_file, index_col=[0,1], header=[0,1])

# STable 9: Structural Equation Modelling

In [11]:
sem_file = "../data/sem/sem_meta.xlsx"
sem_df = pd.read_excel(sem_file, index_col=[0,1], header=[0])

# STable 10: Phewas

In [12]:
phewas_df = pd.DataFrame()
for genes in ["RIF1", "YLPM1", "GIGYF1", "SLC5A3", "GRM7"]:
    phewas_file = f"../data/phewas/{genes}/phewas_meta.xlsx"
    phewas_df = pd.concat([phewas_df, pd.read_excel(phewas_file, index_col=[0,1,2,3], header=[0,1])])

# STable 11: PGS interaction

In [13]:
pgs_file = "../data/pgs_interaction/meta_pgs_interaction.xlsx"
pgs_df = pd.read_excel(pgs_file, index_col=[0,1], header=[0])

# STable 12: BMI distribution by PGS quantiles

In [14]:
bmi_pgs_file = "../data/pgs_interaction/bmi_by_pgs_cat.xlsx"
bmi_pgs_df = pd.read_excel(bmi_pgs_file, index_col=[0], header=[0,1])

# STable 13: PTV protein association

In [15]:
ptv_protein_file = "../data/proteomics/assoc_protein_gene_carrier.csv.gz"
ptv_protein_df = pd.read_csv(ptv_protein_file, index_col=[-1, 0], header=[0])

# STable 14: Protein BMI assoc

In [16]:
protein_bmi_file = "../data/proteomics/protein_bmi_assoc.csv.gz"
protein_bmi_df = pd.read_csv(protein_bmi_file, index_col=[0,1], header=[0])

# STable 15: Cross ancestry previous genes

In [17]:
previous_meta_file = "../data/known_genes/monogenic_meta_known_genes.xlsx"
previous_meta_df = pd.read_excel(previous_meta_file, index_col=[0,1], header=[0,1,2])

# STable 16: Cross ancestry previous sex specific

In [18]:
previous_meta_sex_dfs = []

for sex in ["male", "female"]:
    pms_file = f"../data/known_genes/sex_specific_known_{sex}.xlsx"
    pms_df = pd.read_excel(pms_file, index_col=[0,1], header=[0,1,2])
    pms_df = pd.concat([pms_df],  keys=[sex], names=["sex"], axis=1)
    previous_meta_sex_dfs.append(pms_df)

previous_meta_sex_df = pd.concat((previous_meta_sex_dfs), axis=1)
previous_meta_sex_df.columns = pd.MultiIndex.from_tuples(
            [(gene, mask, sex, stat) for sex, gene, mask, stat in previous_meta_sex_df.columns],
            names=["Gene", "Gene Mask", "Sex", "Statistic"]
        )

previous_meta_sex_df = previous_meta_sex_df.loc[:, 
    [(g,"pLoF",s,st) 
    for g in previous_meta_sex_df.columns.get_level_values("Gene").unique() 
    for s in ["female", "male"]
    for st in previous_meta_sex_df.columns.get_level_values("Statistic").unique()
    ]
]

# STable 17: Comorbidity diagnosis

In [19]:
comorbid_selection_df = pd.read_excel("../data/enrichment/comorbid/obesity_associated_diseases.xlsx", index_col=0)

# Organize dataframes

In [20]:
supp_dfs = [
    meta_df, metal_df, shadow_df, lovo_df, hetz_df, med_df, # Result 1
    bmi_cat_df, comorbid_df, sem_df, phewas_df,  # Result 2
    pgs_df, bmi_pgs_df, # Result 3
    ptv_protein_df, protein_bmi_df, # Result 4
    previous_meta_df, previous_meta_sex_df,
    comorbid_selection_df, # Methods
]

In [21]:
with pd.ExcelWriter('../manuscript/drafts_review3/Supplementary.xlsx', engine='xlsxwriter', mode="w") as writer:
    for i, df in enumerate(supp_dfs):
            df.to_excel(writer, sheet_name=f"Supplementary Table {i+1}", index=True)
