In [None]:
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

variant_table = pd.read_csv('input.txt', sep='\t')
variant_table
included = variant_table[(variant_table['Exclude?']=='No') | (variant_table['Exclude?']=='NO_NEW')][['ID', 'Exclude?', 'Exclude_Reason', 'narrative']].drop_duplicates()
print(len(included), 'included')
success_table = variant_table[(variant_table['Exclude?']=='No') | (variant_table['Exclude?']=='NO_NEW')][['ID', 'Exclude?', 'Exclude_Reason', 'issues/notes','narrative', 'unfiltered_result', 'filtered_result', 'genomic_coordinates_hg38', 'effect', 'MOI', 'zygosity', 'inheritance', 'Family_Type', 'variant_type']]
print(len(set(success_table['ID'])),'genes')
print(len(success_table), 'variants')

In [None]:
success_table = pd.read_csv('fig2_input.tsv', sep='\t')

In [None]:
def create_df(highest_rank, compare, successTable):
    x = range(1,highest_rank)
    denom = len(successTable)
    data=[]
    for runtype in compare:
        success = successTable.loc[(successTable['Variant_Level_noMOI_'+str(runtype)]=='Variant_Present_noMOI') | (successTable['Variant_Level_noMOI_'+str(runtype)]=='Variant_Present_noMOI')]
        for i in x:
            num = len(success.loc[success['Variant_Level_noMOI_rank_'+str(runtype)] <= i])
            data.append([i,num, (num/denom)*100, runtype])
        print('highest rank:', max(success['Variant_Level_noMOI_rank_'+str(runtype)]), 'max%:', (num/denom)*100, num)

    df = pd.DataFrame(data, columns=['Rank', 'NumPatients', 'Percent_Variants', 'Run_Type'])
    return df, denom

In [None]:
def create_plot(denom, source, compare, color_scheme, domain):
    title=str(denom) +'Diagnostic Variants (Variant Level - no MOI requirement)'
    bigChart = alt.Chart(source, title=title).mark_line().encode(
        x=alt.X('Rank', title='Exomiser Rank of Causal Variant'),
        y=alt.Y('Percent_Variants', title='Percent of Causal Variants within Exomiser Rank', scale=alt.Scale(domain=[0,100])),
        color=alt.Color('Run_Type:N',legend=None,sort=compare,scale=alt.Scale(domain=domain,scheme=color_scheme)),
        tooltip=['Rank', 'Percent_Variants', 'Run_Type']
    ).properties(
        width=600,
        height=500)
    
    zoom_source = source.loc[source['Rank'] <=30]

    zoomChart = alt.Chart(zoom_source, title=title).mark_line(point=alt.OverlayMarkDef(size=50)).encode(
        x=alt.X('Rank', title='Exomiser Rank of Causal Variant'),
        y=alt.Y('Percent_Variants', title='Percent of Causal Variants within Exomiser Rank', scale=alt.Scale(domain=[0,100])),
        color=alt.Color('Run_Type:N', sort=domain, scale=alt.Scale(domain=domain,scheme=color_scheme)),
        tooltip=['Rank', 'Percent_Variants', 'Run_Type']
    ).properties(
        width=500,
        height=400)
    plot=alt.vconcat(bigChart, zoomChart)#.configure_legend(labelLimit=0).configure_axis(
        # labelFontSize=15,
        # titleFontSize=15).configure_legend(labelLimit=0,labelFontSize=15, titleFontSize=15)#.configure_axis(grid=False)
    plot=alt.vconcat(zoomChart)
    return plot


## A. Algorithms for phenotypic similarity

In [None]:
domain= ['noN_filtered_15_85_all_models_revel_mvp_noWL','noN_filtered_15_85_mouse_revel_mvp_alphaM_noWL','noN_filtered_15_85_phenixPrioritiser_revel_mvp_noWL','noN_filtered_15_85_noPhenoPrior_revel_mvp_noWL','', 'noN_filtered_15_85_human_ppi_revel_mvp_noWL','b','c', 'noN_filtered_15_85_human_revel_mvp_noWL']#'noN_filtered_15_85_human_REMM_revel_mvp_PSF0.501_noWL_genomiser','noN_filtered_15_85_all_models_REMM_revel_mvp_PSF0.501_noWL_genomiser']
run_types = ['noN_filtered_15_85_mouse_revel_mvp_alphaM_noWL','noN_filtered_15_85_all_models_revel_mvp_noWL', 'noN_filtered_15_85_phenixPrioritiser_revel_mvp_noWL','noN_filtered_15_85_noPhenoPrior_revel_mvp_noWL' ]
##PHIVE, hiPHIVE, PhenIX, no prioritization
run_types = ['noN_filtered_15_85_mouse_revel_mvp_alphaM_noWL','noN_filtered_15_85_all_models_revel_mvp_noWL', 'noN_filtered_15_85_phenixPrioritiser_revel_mvp_noWL','noN_filtered_15_85_noPhenoPrior_revel_mvp_noWL','noN_filtered_15_85_human_revel_mvp_noWL','noN_filtered_15_85_human_ppi_revel_mvp_noWL','noN_filtered_15_85_human_ppi_mouse_revel_mvp_noWL']

a_df,denom = create_df(300, run_types, success_table)
plot_1a = create_plot(denom, a_df, run_types, 'category20', domain)

# plot_1a.configure_legend(labelLimit=0).configure_axis(
#         labelFontSize=15,
#         titleFontSize=15).configure_legend(labelLimit=0,labelFontSize=15, titleFontSize=15)

In [None]:
plot_1a.configure_axis(grid=True,
    labelPadding= 5,
    labelLimit=0,
    labelFontSize=15, 
    titleFontSize=15, labelFont='arial', tickSize=8).configure_legend(
        labelLimit=0,labelFontSize=15, titleFontSize=15, labelFont='arial')
