### Create tables for Supplemental Tables

Cleans up and makes it easier to read

In [10]:
import pandas as pd
import os
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
import yaml

from analysis.database import get_config

config = get_config()

# create dir for figures
figures_dir = Path(
    os.path.join("..", "figures", config.version_for_dir, "analysis_12"))

if not os.path.exists(figures_dir):
    print(f"Creating directory: {figures_dir}")
    os.makedirs(figures_dir)

In [11]:
df = pd.read_excel(
    f"../data/individual_all_data-{config.version}.xlsx",
    index_col=0,
)
df.head()

Unnamed: 0,individual_id,sex,publication_id,title,first_author,reference,doi,year,publication_database,resource_uri,...,treatment_taken::Implantable cardioverter-defibrillator,treatment_taken::Left cardiac sympathetic denervation,treatment_taken::Verapamil,effective::Beta blocker,effective::Catheter ablation,effective::Enalapril,effective::Flecainide,effective::Implantable cardioverter-defibrillator,effective::Left cardiac sympathetic denervation,effective::Verapamil
0,1,,1,Familial Evaluation in Catecholaminergic Polym...,Van Der Werf C,,10.1161/CIRCEP.112.970517,2012,PubMed,https://pubmed.ncbi.nlm.nih.gov/22787013,...,,,,,,,,,,
1,2,female,2,Flecainide therapy reduces exercise-induced ve...,"van der Werf, C",,10.1016/j.jacc.2011.01.026,2011,PubMed,https://pubmed.ncbi.nlm.nih.gov/21616285,...,0.0,0.0,0.0,0.0,,,1.0,,,
2,3,male,3,Structural abnormalities on cardiac magnetic r...,"Gerber, D",,10.1016/j.jacep.2020.03.006,2020,PubMed,https://pubmed.ncbi.nlm.nih.gov/32553227,...,,,,,,,,,,
3,4,female,4,Genetic Background of Catecholaminergic Polymo...,"Kawamura, M",,10.1253/circj.cj-12-1460,2013,PubMed,https://pubmed.ncbi.nlm.nih.gov/23595086,...,0.0,0.0,0.0,,,,,,,
4,5,male,5,Gender differences in the inheritance mode of ...,"Ohno, S.",,10.1371/journal.pone.0131517,2015,PubMed,https://pubmed.ncbi.nlm.nih.gov/26114861,...,,,,,,,,,,


In [30]:
df_pubs = df[
    ["publication_id",
     "title",
     "first_author",
     "reference",
     "doi",
     "year",
     "publication_database",
     "resource_uri",
     "resource_id",
     "doi_uri",
     "publication_type",
     "rob_publication_type"
     ]
]
# only unique publications
df_pubs = df_pubs.drop_duplicates(subset="publication_id")

print(f"Number of unique publications: {df_pubs.shape[0]}")

df_pubs.head()

Number of unique publications: 221


Unnamed: 0,publication_id,title,first_author,reference,doi,year,publication_database,resource_uri,resource_id,doi_uri,publication_type,rob_publication_type
0,1,Familial Evaluation in Catecholaminergic Polym...,Van Der Werf C,,10.1161/CIRCEP.112.970517,2012,PubMed,https://pubmed.ncbi.nlm.nih.gov/22787013,22787013,https://doi.org/10.1161/CIRCEP.112.970517,Article,Cohort or Cross-Sectional Like Study
1,2,Flecainide therapy reduces exercise-induced ve...,"van der Werf, C",,10.1016/j.jacc.2011.01.026,2011,PubMed,https://pubmed.ncbi.nlm.nih.gov/21616285,21616285,https://doi.org/10.1016/j.jacc.2011.01.026,Article,Cohort or Cross-Sectional Like Study
2,3,Structural abnormalities on cardiac magnetic r...,"Gerber, D",,10.1016/j.jacep.2020.03.006,2020,PubMed,https://pubmed.ncbi.nlm.nih.gov/32553227,32553227,https://doi.org/10.1016/j.jacep.2020.03.006,Research Letter,Cohort or Cross-Sectional Like Study
3,4,Genetic Background of Catecholaminergic Polymo...,"Kawamura, M",,10.1253/circj.cj-12-1460,2013,PubMed,https://pubmed.ncbi.nlm.nih.gov/23595086,23595086,https://doi.org/10.1253/circj.cj-12-1460,Article,Case Series
4,5,Gender differences in the inheritance mode of ...,"Ohno, S.",,10.1371/journal.pone.0131517,2015,PubMed,https://pubmed.ncbi.nlm.nih.gov/26114861,26114861,https://doi.org/10.1371/journal.pone.0131517,Article,Case Series


### Check if doi resolve

Limit to 10 resolutions/second (note doi has a limit of 1000 resolutions/5 min

In [23]:
import asyncio
import aiohttp

sem = asyncio.Semaphore(10)


async def fetch_doi(sess, doi):
    """
    Returns the status code
    """
    url = f"https://doi.org/{doi}"
    async with sem:
        async with sess.get(url) as res:
            return doi, res.status


# jupyter allows for "top level" await
async with aiohttp.ClientSession() as session:
    tasks = []

    for doi in df_pubs["doi"].dropna().unique():
        tasks.append(fetch_doi(session, doi))

    results = await asyncio.gather(*tasks)

results_df = pd.DataFrame(results, columns=["doi", "status"])
results_df.head()

Unnamed: 0,doi,status
0,10.1161/CIRCEP.112.970517,403
1,10.1016/j.jacc.2011.01.026,200
2,10.1016/j.jacep.2020.03.006,200
3,10.1253/circj.cj-12-1460,200
4,10.1371/journal.pone.0131517,200


In [24]:
# save results to csv
results_df.to_csv(figures_dir / f"doi_status{config.version}.csv", index=False)

In [35]:
# merge results with df_pubs
df_pubs_checked = df_pubs.merge(results_df, on="doi", how="left")
# for 404 status codes, replace doi with None
df_pubs_checked["doi_uri"] = df_pubs_checked["doi_uri"].where(
    df_pubs_checked["status"] != 404, None)
df_pubs_checked.head()

Unnamed: 0,publication_id,title,first_author,reference,doi,year,publication_database,resource_uri,resource_id,doi_uri,publication_type,rob_publication_type,status
0,1,Familial Evaluation in Catecholaminergic Polym...,Van Der Werf C,,10.1161/CIRCEP.112.970517,2012,PubMed,https://pubmed.ncbi.nlm.nih.gov/22787013,22787013,https://doi.org/10.1161/CIRCEP.112.970517,Article,Cohort or Cross-Sectional Like Study,403.0
1,2,Flecainide therapy reduces exercise-induced ve...,"van der Werf, C",,10.1016/j.jacc.2011.01.026,2011,PubMed,https://pubmed.ncbi.nlm.nih.gov/21616285,21616285,https://doi.org/10.1016/j.jacc.2011.01.026,Article,Cohort or Cross-Sectional Like Study,200.0
2,3,Structural abnormalities on cardiac magnetic r...,"Gerber, D",,10.1016/j.jacep.2020.03.006,2020,PubMed,https://pubmed.ncbi.nlm.nih.gov/32553227,32553227,https://doi.org/10.1016/j.jacep.2020.03.006,Research Letter,Cohort or Cross-Sectional Like Study,200.0
3,4,Genetic Background of Catecholaminergic Polymo...,"Kawamura, M",,10.1253/circj.cj-12-1460,2013,PubMed,https://pubmed.ncbi.nlm.nih.gov/23595086,23595086,https://doi.org/10.1253/circj.cj-12-1460,Article,Case Series,200.0
4,5,Gender differences in the inheritance mode of ...,"Ohno, S.",,10.1371/journal.pone.0131517,2015,PubMed,https://pubmed.ncbi.nlm.nih.gov/26114861,26114861,https://doi.org/10.1371/journal.pone.0131517,Article,Case Series,200.0


In [36]:
# for the uris only keep doi, if not then resource_uri
df_pubs_checked["link"] = df_pubs_checked["doi_uri"].fillna(
    df_pubs_checked["resource_uri"])
df_pubs_checked.head()

Unnamed: 0,publication_id,title,first_author,reference,doi,year,publication_database,resource_uri,resource_id,doi_uri,publication_type,rob_publication_type,status,link
0,1,Familial Evaluation in Catecholaminergic Polym...,Van Der Werf C,,10.1161/CIRCEP.112.970517,2012,PubMed,https://pubmed.ncbi.nlm.nih.gov/22787013,22787013,https://doi.org/10.1161/CIRCEP.112.970517,Article,Cohort or Cross-Sectional Like Study,403.0,https://doi.org/10.1161/CIRCEP.112.970517
1,2,Flecainide therapy reduces exercise-induced ve...,"van der Werf, C",,10.1016/j.jacc.2011.01.026,2011,PubMed,https://pubmed.ncbi.nlm.nih.gov/21616285,21616285,https://doi.org/10.1016/j.jacc.2011.01.026,Article,Cohort or Cross-Sectional Like Study,200.0,https://doi.org/10.1016/j.jacc.2011.01.026
2,3,Structural abnormalities on cardiac magnetic r...,"Gerber, D",,10.1016/j.jacep.2020.03.006,2020,PubMed,https://pubmed.ncbi.nlm.nih.gov/32553227,32553227,https://doi.org/10.1016/j.jacep.2020.03.006,Research Letter,Cohort or Cross-Sectional Like Study,200.0,https://doi.org/10.1016/j.jacep.2020.03.006
3,4,Genetic Background of Catecholaminergic Polymo...,"Kawamura, M",,10.1253/circj.cj-12-1460,2013,PubMed,https://pubmed.ncbi.nlm.nih.gov/23595086,23595086,https://doi.org/10.1253/circj.cj-12-1460,Article,Case Series,200.0,https://doi.org/10.1253/circj.cj-12-1460
4,5,Gender differences in the inheritance mode of ...,"Ohno, S.",,10.1371/journal.pone.0131517,2015,PubMed,https://pubmed.ncbi.nlm.nih.gov/26114861,26114861,https://doi.org/10.1371/journal.pone.0131517,Article,Case Series,200.0,https://doi.org/10.1371/journal.pone.0131517


In [37]:
# check for missing values
# THESE NEED TO BE MANUALLY FILLED IN!!!
df_pubs_checked[df_pubs_checked["link"].isna()]

Unnamed: 0,publication_id,title,first_author,reference,doi,year,publication_database,resource_uri,resource_id,doi_uri,publication_type,rob_publication_type,status,link
36,82,Ryanodine receptor 2 mutation: Not only catech...,"She, Y","Yingfang She, Yide Li, Hang Yu, Liemin Zhou. R...",,2020,,,,,Article,Case Report,,
129,548,Novel ryanodine receptor mutation associated w...,"Alvarado, F.",,10.1161/circ.128.suppl_22.A16239,2018,,,,,Conference / Poster Abstract,Case Report,404.0,
153,742,Occult structural disease in patients with cat...,"Gerber, DA",,10.1161/circ.134.suppl_1.20771,2016,,,,,Conference / Poster Abstract,Cohort or Cross-Sectional Like Study,404.0,


In [40]:
df_pubs_cleaned = df_pubs_checked[[
    "publication_id",
    "title",
    "first_author",
    "year",
    "link",
    "publication_database",
    "publication_type",
    "rob_publication_type"
]].copy()
# rename rob_publication_type to study_type
df_pubs_cleaned.rename(columns={"rob_publication_type": "study_design_type"},
                       inplace=True)
df_pubs_cleaned.head()

Unnamed: 0,publication_id,title,first_author,year,link,publication_database,publication_type,study_design_type
0,1,Familial Evaluation in Catecholaminergic Polym...,Van Der Werf C,2012,https://doi.org/10.1161/CIRCEP.112.970517,PubMed,Article,Cohort or Cross-Sectional Like Study
1,2,Flecainide therapy reduces exercise-induced ve...,"van der Werf, C",2011,https://doi.org/10.1016/j.jacc.2011.01.026,PubMed,Article,Cohort or Cross-Sectional Like Study
2,3,Structural abnormalities on cardiac magnetic r...,"Gerber, D",2020,https://doi.org/10.1016/j.jacep.2020.03.006,PubMed,Research Letter,Cohort or Cross-Sectional Like Study
3,4,Genetic Background of Catecholaminergic Polymo...,"Kawamura, M",2013,https://doi.org/10.1253/circj.cj-12-1460,PubMed,Article,Case Series
4,5,Gender differences in the inheritance mode of ...,"Ohno, S.",2015,https://doi.org/10.1371/journal.pone.0131517,PubMed,Article,Case Series


In [41]:
# save unique publications to excel
df_pubs_cleaned.to_excel(
    figures_dir / f"publications_unique{config.version}.xlsx",
    index=False)

### Summary of the studies included

In [64]:
# load in the "fixed" data
df_filled = pd.read_excel(
    figures_dir / f"publications_unique{config.version}-filled.xlsx",
)
df_filled.head()


Unnamed: 0,publication_id,title,first_author,year,link,publication_database,publication_type,study_design_type
0,1,Familial Evaluation in Catecholaminergic Polym...,Van Der Werf C,2012,https://doi.org/10.1161/CIRCEP.112.970517,PubMed,Article,Cohort or Cross-Sectional Like Study
1,2,Flecainide therapy reduces exercise-induced ve...,"van der Werf, C",2011,https://doi.org/10.1016/j.jacc.2011.01.026,PubMed,Article,Cohort or Cross-Sectional Like Study
2,3,Structural abnormalities on cardiac magnetic r...,"Gerber, D",2020,https://doi.org/10.1016/j.jacep.2020.03.006,PubMed,Research Letter,Cohort or Cross-Sectional Like Study
3,4,Genetic Background of Catecholaminergic Polymo...,"Kawamura, M",2013,https://doi.org/10.1253/circj.cj-12-1460,PubMed,Article,Case Series
4,5,Gender differences in the inheritance mode of ...,"Ohno, S.",2015,https://doi.org/10.1371/journal.pone.0131517,PubMed,Article,Case Series


In [97]:
study_types = df_filled["study_design_type"].value_counts()
study_types

study_design_type
Case Series                             84
Case Report                             77
Cohort or Cross-Sectional Like Study    33
Basic Science                           11
Case Control Study                       8
Diagnostic Accuracy Study                4
Systematic Review                        2
Gene Association Study                   2
Name: count, dtype: int64

In [98]:
db_queried = df_filled["publication_database"].value_counts()
db_queried

publication_database
PubMed    177
Embase     25
Name: count, dtype: int64

In [99]:
article_types = df_filled["publication_type"].value_counts()
article_types

publication_type
Article                         164
Conference / Poster Abstract     37
Research Letter                  13
Brief Communication               4
Review                            2
Textbook                          1
Name: count, dtype: int64

In [102]:
# export these to excel
with pd.ExcelWriter(
        figures_dir / "publication_summary.xlsx",
        engine="openpyxl",
        mode="w",
) as writer:
    study_types.to_excel(writer, sheet_name="study_types")
    db_queried.to_excel(writer, sheet_name="db_queried")
    article_types.to_excel(writer, sheet_name="article_types")

## Risk of Bias Tables

In [68]:
rob_tables_info = pd.read_excel(
    "data_commit/rob_analysis/rob_table_contents.xlsx"
)
rob_tables_info

Unnamed: 0,file,supplemental_table,description
0,,Table S2,All publications used in the database
1,rob_case_control.xlsx,Table S3,Risk of Bias Analysis for Case Control Studies...
2,rob_case_report.xlsx,Table S4,Risk of Bias Analysis for Case Reports using t...
3,rob_case_series.xlsx,Table S5,Risk of Bias Analysis for Case Series using t...
4,rob_cohort.xlsx,Table S6,Risk of Bias Analysis for Cohort or Cross-Sect...
5,rob_diagnostic_accuracy.xlsx,Table S7,Risk of Bias Analysis for Diagnostic Accuracy ...
6,others.xlsx,Table S8,"Basic Science, Reviews, Clinical Trials Public..."


In [96]:
columns_to_keep_rob = [
    "publication_id",
    "title",
    "link"
]

with pd.ExcelWriter(
        figures_dir / f"Supplementary_Tables_S2-S9-{config.version}.xlsx",
        engine="openpyxl",
        mode="w",
) as writer:
    # add a sheet in the beginning "Table of Contents"
    table_of_contents = rob_tables_info[[
        "supplemental_table",
        "description",
    ]].copy()

    table_of_contents.columns = ["Table", "Description"]

    # add a row that says "Supplementary Data" as the first row
    table_of_contents = pd.concat([
        pd.DataFrame([["Supplementary Data", ""]],
                     columns=["Table", "Description"]),
        table_of_contents,
    ])

    table_of_contents.to_excel(
        writer,
        sheet_name="Table of Contents",
        index=False,
    )

    for _, row in rob_tables_info.iterrows():
        if row["supplemental_table"] == "Table S2":
            df_filled.to_excel(
                writer,
                sheet_name=row["supplemental_table"],
                index=False,
            )
            continue

        df_rob = pd.read_excel(
            f"data_commit/rob_analysis/{row["file"]}"
        )

        # make sure all publication_ids are in df_filled
        if set(df_rob["publication_id"]) - set(df_filled["publication_id"]):
            print(
                f"Missing publication_ids in df_filled for {row['supplemental_table']}")
            continue

        #join with df_filled for the "link"
        df_rob = df_rob.merge(
            df_filled[[
                "publication_id",
                "link",
            ]],
            on="publication_id",
            how="left",
        )

        col_keep = columns_to_keep_rob + [
            # any column that starts with a number
            col for col in df_rob.columns if col[0].isdigit()
        ]

        df_rob = df_rob[col_keep]

        # if its table S8, add the publication_type and study_design_type
        if row["supplemental_table"] == "Table S8":
            df_rob = df_rob.merge(
                df_filled[[
                    "publication_id",
                    "publication_type",
                    "study_design_type"
                ]],
                on="publication_id",
                how="left",
            )

        print(f"Creating {row['supplemental_table']}")
        display(df_rob.head())

        df_rob.to_excel(
            writer,
            sheet_name=row["supplemental_table"],
            index=False,
        )




Creating Table S3


Unnamed: 0,publication_id,title,link,1. Was the research question or objective in this paper clearly stated and appropriate?,2. Was the study population clearly specified and defined?,3. Did the authors include a sample size justification?,4. Were controls selected or recruited from the same or similar population that gave rise to the cases (including the same timeframe)?,"5. Were the definitions, inclusion and exclusion criteria, algorithms or processes used to identify or select cases and controls valid, reliable, and implemented consistently across all study participants?",6. Were the cases clearly defined and differentiated from controls?,"7. If less than 100 percent of eligible cases and/or controls were selected for the study, were the cases and/or controls randomly selected from those eligible?",8. Was there use of concurrent controls?,9. Were the investigators able to confirm that the exposure/risk occurred prior to the development of the condition or event that defined a participant as a case?,"10. Were the measures of exposure/risk clearly defined, valid, reliable, and implemented consistently (including the same time period) across all study participants?",11. Were the assessors of exposure/risk blinded to the case or control status of participants?,"12. Were key potential confounding variables measured and adjusted statistically in the analyses? If matching was used, did the investigators account for matching during study analysis?"
0,59,Mutations of the Cardiac Ryanodine Receptor (R...,https://doi.org/10.1161/01.CIR.103.4.485,YES,YES,NO,YES,CD,YES,,NO,YES,YES,NO,NO
1,80,Molecular genetics of exercise-induced polymor...,https://doi.org/10.1038/sj.ejhg.5201061,YES,YES,NO,YES,CD,YES,,NO,YES,YES,NO,NO
2,109,Cardiac genetic disposition in sudden infant d...,https://doi.org/10.1016/j.jacc.2018.01.030,YES,YES,NO,NO,YES,YES,,NO,YES,YES,NO,NO
3,116,Utility of post-mortem genetic testing in case...,https://doi.org/10.1016/j.jacc.2017.02.046,YES,YES,NO,NO,YES,YES,,NO,YES,YES,NO,YES
4,712,Genetic analysis of hyperemesis gravidarum rev...,https://doi.org/10.1016/j.mce.2016.09.017,YES,YES,NO,YES,YES,YES,,NO,YES,YES,NM,NM


Creating Table S4


Unnamed: 0,publication_id,title,link,1. Were patient’s demographic characteristics clearly described?,2. Was the patient’s history clearly described and presented as a timeline?,3. Was the current clinical condition of the patient on presentation clearly described?,4. Were diagnostic tests or assessment methods and the results clearly described?,5. Was the intervention(s) or treatment procedure(s) clearly described?,6. Was the post-intervention clinical condition clearly described?,7. Were adverse events (harms) or unanticipated events identified and described?,8. Does the case report provide takeaway lessons?
0,9,A de novo ryanodine receptor 2 gene variant in...,https://doi.org/10.1007/s00414-019-02160-8,YES,NO,YES,YES,,,,YES
1,48,Successful treatment of catecholaminergic poly...,https://doi.org/10.1093/europace/euq517,YES,YES,YES,YES,YES,YES,YES,YES
2,70,A novel mutation in the cardiac ryanodine rece...,https://doi.org/10.1016/j.ijcard.2010.10.062,YES,NO,YES,YES,,,,YES
3,82,Ryanodine receptor 2 mutation: Not only catech...,https://www.neurology-asia.org/articles/neuroa...,YES,NO,YES,YES,YES,YES,NO,YES
4,90,Catecholaminergic Polymorphic Ventricular Tach...,https://doi.org/10.4070/kcj.2012.42.2.129,YES,NO,YES,YES,YES,YES,YES,YES


Creating Table S5


Unnamed: 0,publication_id,title,link,1. Was the study question or objective clearly stated?,"2. Was the study population clearly and fully described, including a case definition?",3. Were the cases consecutive?,4. Were the subjects comparable?,5. Was the intervention clearly described?,"6. Were the outcome measures clearly defined, valid, reliable, and implemented consistently across all study participants?",7. Was the length of follow-up adequate?,8. Were the statistical methods well-described?,9. Were the results well-described?
0,4,Genetic Background of Catecholaminergic Polymo...,https://doi.org/10.1253/circj.cj-12-1460,YES,YES,YES,YES,YES,YES,YES,YES,YES
1,5,Gender differences in the inheritance mode of ...,https://doi.org/10.1371/journal.pone.0131517,YES,YES,NO,YES,NO,CD,YES,YES,YES
2,8,Refractory tachyarrhythmias caused by ryanodin...,https://www.embase.com/records?id=L71893770,NO,NO,NO,YES,NO,NO,CD,NO,NO
3,16,Prevalence and significance of rare RYR2 varia...,https://doi.org/10.1016/j.hrthm.2014.07.020,YES,YES,CD,YES,YES,YES,YES,YES,YES
4,24,Familial dilated cardiomyopathy associated wit...,https://doi.org/10.5603/CJ.a2020.0099,YES,YES,NO,YES,YES,YES,YES,NM,YES


Creating Table S6


Unnamed: 0,publication_id,title,link,1. Was the research question or objective in this paper clearly stated?,2. Was the study population clearly specified and defined?,3. Was the participation rate of eligible persons at least 50%?,4. Were all the subjects selected or recruited from the same or similar populations (including the same time period)? Were inclusion and exclusion criteria for being in the study prespecified and applied uniformly to all participants?,"5. Was a sample size justification, power description, or variance and effect estimates provided?","6. For the analyses in this paper, were the exposure(s) of interest measured prior to the outcome(s) being measured?",7. Was the timeframe sufficient so that one could reasonably expect to see an association between exposure and outcome if it existed?,"8. For exposures that can vary in amount or level, did the study examine different levels of the exposure as related to the outcome (e.g., categories of exposure, or exposure measured as continuous variable)?","9. Were the exposure measures (independent variables) clearly defined, valid, reliable, and implemented consistently across all study participants?",10. Was the exposure(s) assessed more than once over time?,"11. Were the outcome measures (dependent variables) clearly defined, valid, reliable, and implemented consistently across all study participants?",12. Were the outcome assessors blinded to the exposure status of participants?,13. Was loss to follow-up after baseline 20% or less?,14. Were key potential confounding variables measured and adjusted statistically for their impact on the relationship between exposure(s) and outcome(s)?
0,1,Familial Evaluation in Catecholaminergic Polym...,https://doi.org/10.1161/CIRCEP.112.970517,YES,YES,YES,NO,NO,YES,YES,,YES,,YES,NO,YES,NO
1,2,Flecainide therapy reduces exercise-induced ve...,https://doi.org/10.1016/j.jacc.2011.01.026,YES,YES,CD,NO,NO,YES,YES,NO,YES,,YES,NO,YES,NM
2,3,Structural abnormalities on cardiac magnetic r...,https://doi.org/10.1016/j.jacep.2020.03.006,YES,YES,CD,YES,NO,YES,YES,,NO,,NO,NO,YES,NO
3,6,Bradycardia is a specific phenotype of catecho...,https://doi.org/10.2169/internalmedicine.9843-17,YES,YES,CD,YES,NO,YES,YES,,YES,,YES,NO,YES,NO
4,11,Linking the heart and the brain: Neurodevelopm...,https://doi.org/10.1016/j.hrthm.2018.08.025,YES,YES,YES,YES,NO,YES,YES,,YES,,YES,NO,YES,YES


Creating Table S7


Unnamed: 0,publication_id,title,link,1.\tWas a consecutive or random sample of patients enrolled?,2.\tWas a case control design avoided?,3.\tDid the study avoid inappropriate exclusions?,4.\tWere the index test results interpreted without knowledge of the results of the reference standard?,"5.\tIf a threshold was used, was it pre-specified?",6.\tIs the reference standard likely to correctly classify the target condition?,7.\tWere the reference standard results interpreted without knowledge of the results of the index test?,8.\tWas there an appropriate interval between index test and reference standard?,9.\tDid all patients receive the same reference standard?,10.\tWere all patients included in the analysis?
0,13,Assessment and validation of a phenotype-enhan...,https://doi.org/10.1161/CIRCGEN.119.002510,NO,YES,YES,CD,YES,YES,YES,YES,YES,YES
1,36,Differential diagnosis between catecholaminerg...,https://doi.org/10.1253/circj.CJ-17-1032,NO,YES,YES,CD,NO,NO,CD,YES,YES,YES
2,190,Next Generation Sequencing Challenges in the A...,https://doi.org/10.1002/elps.201400148,NO,YES,YES,NO,,YES,CD,YES,YES,NO
3,1163,Intravenous Epinephrine Infusion Test in Diagn...,https://doi.org/10.1111/j.1540-8167.2011.02188.x,NO,YES,YES,CD,,YES,CD,YES,YES,YES


Creating Table S8


Unnamed: 0,publication_id,title,link,publication_type,study_design_type
0,40,Denaturing HPLC-based approach for detecting R...,https://doi.org/10.1373/clinchem.2003.030734,Article,Basic Science
1,147,Flecainide prevents catecholaminergic polymorp...,https://doi.org/10.1038/nm.1942,Brief Communication,Basic Science
2,148,Catecholaminergic polymorphic ventricular tach...,https://doi.org/10.1136/heartjnl-2012-302033,Review,Systematic Review
3,157,Dantrolene rescues arrhythmogenic RYR2 defect ...,https://doi.org/10.1002/emmm.201100194,Article,Basic Science
4,192,Exome Analyses of Long QT Syndrome Reveal Cand...,https://doi.org/10.1371/journal.pone.0130329,Article,Gene Association Study


## Table S9

This table contains all the data used for the analysis

In [80]:
df_for_table_s9 = df.join(
    df_filled.set_index("publication_id")[
        [
            "link",
            "study_design_type",
        ]
    ],
    on="publication_id",

)

df_for_table_s9.head()

Unnamed: 0,individual_id,sex,publication_id,title,first_author,reference,doi,year,publication_database,resource_uri,...,treatment_taken::Verapamil,effective::Beta blocker,effective::Catheter ablation,effective::Enalapril,effective::Flecainide,effective::Implantable cardioverter-defibrillator,effective::Left cardiac sympathetic denervation,effective::Verapamil,link,study_design_type
0,1,,1,Familial Evaluation in Catecholaminergic Polym...,Van Der Werf C,,10.1161/CIRCEP.112.970517,2012,PubMed,https://pubmed.ncbi.nlm.nih.gov/22787013,...,,,,,,,,,https://doi.org/10.1161/CIRCEP.112.970517,Cohort or Cross-Sectional Like Study
1,2,female,2,Flecainide therapy reduces exercise-induced ve...,"van der Werf, C",,10.1016/j.jacc.2011.01.026,2011,PubMed,https://pubmed.ncbi.nlm.nih.gov/21616285,...,0.0,0.0,,,1.0,,,,https://doi.org/10.1016/j.jacc.2011.01.026,Cohort or Cross-Sectional Like Study
2,3,male,3,Structural abnormalities on cardiac magnetic r...,"Gerber, D",,10.1016/j.jacep.2020.03.006,2020,PubMed,https://pubmed.ncbi.nlm.nih.gov/32553227,...,,,,,,,,,https://doi.org/10.1016/j.jacep.2020.03.006,Cohort or Cross-Sectional Like Study
3,4,female,4,Genetic Background of Catecholaminergic Polymo...,"Kawamura, M",,10.1253/circj.cj-12-1460,2013,PubMed,https://pubmed.ncbi.nlm.nih.gov/23595086,...,0.0,,,,,,,,https://doi.org/10.1253/circj.cj-12-1460,Case Series
4,5,male,5,Gender differences in the inheritance mode of ...,"Ohno, S.",,10.1371/journal.pone.0131517,2015,PubMed,https://pubmed.ncbi.nlm.nih.gov/26114861,...,,,,,,,,,https://doi.org/10.1371/journal.pone.0131517,Case Series


In [91]:
# get the list of columns starting from "Adult-onset primary generalised epilepsy" to "Weight loss" excluding any columns that say "Other"

excluded_cols = {
    "primary_diagnosis",
    # was broken up into individual columns
    "condition_descriptions",
    # becomes "onset_symptoms::Catecholaminergic polymorphic ventricular tachycardia 1
    "condition_onset_symptoms",
}

disease_cols_s9 = [
    col for i, col in enumerate(df_for_table_s9.columns)
    if (
            df_for_table_s9.columns.get_loc(
                "Adult-onset primary generalised epilepsy") <= i <= df_for_table_s9.columns.get_loc(
        "effective::Verapamil")
            and col not in excluded_cols
            and "Other" not in col
    )
]

disease_cols_s9

['Adult-onset primary generalised epilepsy',
 'Arrhythmia at rest',
 'Arrhythmogenic right ventricular cardiomyopathy',
 'Arteriovenous malformation',
 'Ascending aortic aneurysm',
 'Atrial fibrillation',
 'Atrial flutter',
 'Atrial standstill',
 'Atrial tachycardia',
 'Attention deficit hyperactivity disorder',
 'Baseline/resting electrocardiogram abnormality',
 'Bradycardia',
 'Bronchial asthma',
 'Brugada syndrome',
 'Catecholaminergic polymorphic ventricular tachycardia 1',
 'Catecholaminergic polymorphic ventricular tachycardia 1 asymptomatic carrier',
 'Chest pain, unspecified',
 'Chronotropic incompetence',
 'Colon cancer',
 'Congenital atrioventricular block',
 'Developmental delay',
 'Diabetes mellitus',
 'Dilated cardiomyopathy',
 'Dizziness',
 'Dyspnea',
 'Effort-induced polymorphic ventricular arrhythmia',
 'Electrical storm',
 'Exercise/stress induced Long QT syndrome',
 'Exercise/stress induced atrial fibrillation',
 'Exercise/stress induced atrial tachycardia',
 'Exercis

In [92]:
# where are condition_descriptions or condition_onset_symptoms not null
df_for_table_s9[df_for_table_s9[
    "condition_onset_symptoms"].notnull()
][["onset_symptoms::Catecholaminergic polymorphic ventricular tachycardia 1",
   "condition_onset_symptoms"]]

Unnamed: 0,onset_symptoms::Catecholaminergic polymorphic ventricular tachycardia 1,condition_onset_symptoms
1,none,none
27,Ritalin-induced irregular heart beats,Ritalin-induced irregular heart beats
32,syncope,syncope
33,none,none
47,syncope,syncope
...,...,...
1329,syncope,syncope
1330,syncope,syncope
1331,Syncope w/ convulsion,Syncope w/ convulsion
1338,SCD Drowning,SCD Drowning


In [100]:
df_columns = [
                 "individual_id",
                 "sex",
                 "publication_id",
                 "title",
                 "first_author",
                 "year",
                 "link",
                 "publication_type",
                 "study_design_type",
                 "variant_id",
                 "clinvar_uri",
                 "hgvs_string",
                 "c_hgvs_string",
                 "p_hgvs_string",
                 "p_hgvs_aa1",
                 "cdna_change_type",
                 "protein_change_type",
                 "zygosity",
                 "inheritance",
                 "exon_start",
                 "exon_end",
                 "structure_domains",
                 "domain",
                 "subdomain",
             ] + disease_cols_s9

df_for_table_s9_filtered = df_for_table_s9[df_columns]
df_for_table_s9_filtered.head()

Unnamed: 0,individual_id,sex,publication_id,title,first_author,year,link,publication_type,study_design_type,variant_id,...,treatment_taken::Implantable cardioverter-defibrillator,treatment_taken::Left cardiac sympathetic denervation,treatment_taken::Verapamil,effective::Beta blocker,effective::Catheter ablation,effective::Enalapril,effective::Flecainide,effective::Implantable cardioverter-defibrillator,effective::Left cardiac sympathetic denervation,effective::Verapamil
0,1,,1,Familial Evaluation in Catecholaminergic Polym...,Van Der Werf C,2012,https://doi.org/10.1161/CIRCEP.112.970517,Article,Cohort or Cross-Sectional Like Study,609,...,,,,,,,,,,
1,2,female,2,Flecainide therapy reduces exercise-induced ve...,"van der Werf, C",2011,https://doi.org/10.1016/j.jacc.2011.01.026,Article,Cohort or Cross-Sectional Like Study,599,...,0.0,0.0,0.0,0.0,,,1.0,,,
2,3,male,3,Structural abnormalities on cardiac magnetic r...,"Gerber, D",2020,https://doi.org/10.1016/j.jacep.2020.03.006,Research Letter,Cohort or Cross-Sectional Like Study,681,...,,,,,,,,,,
3,4,female,4,Genetic Background of Catecholaminergic Polymo...,"Kawamura, M",2013,https://doi.org/10.1253/circj.cj-12-1460,Article,Case Series,3698,...,0.0,0.0,0.0,,,,,,,
4,5,male,5,Gender differences in the inheritance mode of ...,"Ohno, S.",2015,https://doi.org/10.1371/journal.pone.0131517,Article,Case Series,3698,...,,,,,,,,,,


In [101]:
df_for_table_s9_filtered.to_excel(
    figures_dir / f"Supplementary_Table_S9-{config.version}.xlsx",
    index=False,
)