In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
import pandas
import altair as alt

In [9]:
df = pandas.read_csv('benchmarks/straingr/straingrcompare_0.5.tsv', sep='\t')
df['samestrain'] = df.samestrain.astype(bool)

color_domain = [False, True]
color_range = ["grey", "#d62728"]

c_diff = alt.Chart(df[~df.samestrain]).mark_point(opacity=0.2, color="grey").encode(
    alt.X('gapJaccardSim', scale=alt.Scale(zero=False, domain=[0.91, 1.0]), title="Gap Similarity"),
    alt.Y('singleAgreePct', scale=alt.Scale(zero=False, domain=[99.2, 100]), title="Pairwise ACNI [%]"),
    color=alt.Color('samestrain', type='nominal', title="Identical strain", 
                    scale=alt.Scale(domain=color_domain, range=color_range)),
    size=alt.Size('commonPct', type="quantitative", title="Common Callable [%]",
                  scale=alt.Scale(range=(10, 500))),
    tooltip=['strain', 'strain1ani', 'strain2ani', 'gapJaccardSim', 'singleAgreePct', 'commonPct']
)

c_same = alt.Chart(df[df.samestrain]).mark_point(opacity=0.6, color="firebrick").encode(
    alt.X('gapJaccardSim', scale=alt.Scale(zero=False, domain=[0.91, 1.0]), title="Gap Similarity"),
    alt.Y('singleAgreePct', scale=alt.Scale(zero=False, domain=[99.2, 100]), title="Pairwise ACNI [%]"),
    color=alt.Color('samestrain', type='nominal', title="Identical strain", 
                    scale=alt.Scale(domain=color_domain, range=color_range)),
    size=alt.Size('commonPct', type="quantitative", title="Common Callable [%]",
                  scale=alt.Scale(range=(10, 500))),
    tooltip=['strain', 'strain1ani', 'strain2ani', 'gapJaccardSim', 'singleAgreePct', 'commonPct']
)

full = (c_diff + c_same).resolve_scale('shared').properties(width=300, height=300)

zoomed_df = df[(df['gapJaccardSim'] > 0.97) & (df['singleAgreePct'] > 99.9)]
c_diff_zoom = alt.Chart(zoomed_df[~zoomed_df.samestrain]).mark_point(opacity=0.2, color="grey").encode(
    alt.X('gapJaccardSim', scale=alt.Scale(zero=False, domain=[0.97, 1.0]), title="Gap Similarity"),
    alt.Y('singleAgreePct', scale=alt.Scale(zero=False, domain=[99.9, 100]), title="Pairwise ACNI [%]"),
    color=alt.Color('samestrain', type='nominal', title="Identical strain", 
                    scale=alt.Scale(domain=color_domain, range=color_range)),
    size=alt.Size('commonPct', type="quantitative", title="Common Callable [%]",
                  scale=alt.Scale(range=(10, 500))),
    tooltip=['strain', 'strain1ani', 'strain2ani', 'gapJaccardSim', 'singleAgreePct', 'commonPct']
)

c_same_zoom = alt.Chart(zoomed_df[zoomed_df.samestrain]).mark_point(opacity=0.6, color="firebrick").encode(
    alt.X('gapJaccardSim', scale=alt.Scale(zero=False, domain=[0.97, 1.0]), title="Gap Similarity"),
    alt.Y('singleAgreePct', scale=alt.Scale(zero=False, domain=[99.9, 100]), title="Pairwise ACNI [%]"),
    color=alt.Color('samestrain', type='nominal', title="Identical strain", 
                    scale=alt.Scale(domain=color_domain, range=color_range)),
    size=alt.Size('commonPct', type="quantitative", title="Common Callable [%]",
                  scale=alt.Scale(range=(10, 500))),
    tooltip=['strain', 'strain1ani', 'strain2ani', 'gapJaccardSim', 'singleAgreePct', 'commonPct']
)

zoom = (c_diff_zoom + c_same_zoom).resolve_scale('shared').properties(width=300, height=150)

(full | zoom).resolve_scale('shared')