# Plot coverage at each site from pileups

Import Python modules:

In [None]:
import altair as alt

import altair_saver

import pandas as pd

_ = alt.data_transformers.disable_max_rows()

Get key variables from `Snakemake`:

In [None]:
pileup_files = snakemake.input.pileups
chart_all_html = snakemake.output.chart_all_html
chart_all_pdf = snakemake.output.chart_all_pdf
chart_region_html = snakemake.output.chart_region_html
chart_region_pdf = snakemake.output.chart_region_pdf
output_csv = snakemake.output.csv
aligners = snakemake.params.aligners
samples = snakemake.params.samples
patient_groups = snakemake.params.patient_groups
ref_name = snakemake.params.ref_name
region_of_interest = snakemake.params.region_of_interest
consensus_min_coverage = snakemake.params.consensus_min_coverage

assert len(pileup_files) == len(samples) == len(patient_groups)
assert region_of_interest['start'] <= region_of_interest['end']

Read the data:

In [None]:
nts = ['A', 'C', 'G', 'T']

pileups = (pd.concat([pd.read_csv(f)
                      .assign(sample=sample,
                              patient_group=patient_group)
                      for f, sample, patient_group in
                      zip(pileup_files, samples, patient_groups)])
           .query('aligner in @aligners')
           .assign(depth=lambda x: x[nts].sum(axis=1))
           .reset_index(drop=True)
           )

print(f"Writing the pileup data to {output_csv}")
pileups.to_csv(output_csv, index=False)

pileups

Get all sites, then drop zeros from pileup:

In [None]:
sites = pileups['site'].unique().tolist()

print(f"Number of rows in `pileups` that have 0 depth:")
display(pileups
        .assign(zero_depth=lambda x: x['depth'] == 0)
        .groupby('zero_depth')
        .aggregate(n_rows=pd.NamedAgg('site', 'count'))
        )

pileups_drop_zeros = (
    pileups
    .query('depth > 0')
    )

Make interactive pilup plot.
First a function that creates the plot:

In [None]:
def coverage_chart(just_region_of_interest,
                   include_site_zoom_bar):
    
    # some manipulations to decrease size of data frame to shrink chart
    if pileups_drop_zeros['aligner'].nunique() == 1:
        df = pileups_drop_zeros.drop(columns='aligner')
    else:
        df = pileups_drop_zeros
    site_df = pd.DataFrame({'site': sites})
    
    if just_region_of_interest:
        start, end = region_of_interest['start'], region_of_interest['end']
        df = df.query('site >= @start').query('site <= @end')
        site_df = site_df.query('site >= @start').query('site <= @end')
    else:
        start, end = sites[0], sites[-1]

    # set up selectors
    descriptor_names = {'aligner': 'read',
    # comment out sample as we facet rather than select it
    #                    'sample': 'viral',
                    }
    selections = {}
    for descriptor, descriptor_name in descriptor_names.items():
        options = pileups_drop_zeros[descriptor].unique().tolist()
        if len(options) == 1:
            continue  # only make selector if more than one option
        selections[descriptor] = alt.selection_single(
            name=descriptor_names[descriptor],
            fields=[descriptor],
            bind=alt.binding_select(options=options),
            init={descriptor: options[0]}
            )
    
    ncolumns = 5
    facet_width = 130

    # make site zoom bar
    zoom_brush = alt.selection_interval(
                encodings=['x'],
                mark=alt.BrushConfig(stroke='black', strokeWidth=2),
                )
    zoom_bar = (
        alt.Chart(site_df)
        .mark_rect(color='lightgray')
        .encode(x=alt.X('site:Q',
                        axis=alt.Axis(grid=False,
                                      labelAngle=-90,
                                      ),
                        scale=alt.Scale(domain=[start, end],
                                        nice=False,
                                        padding=0,
                                        ),
                        title='site zoom bar')
                )
        .add_selection(zoom_brush)
        .properties(width=ncolumns * facet_width,
                    height=15,
                    )
        )

    # make pileup chart
    if include_site_zoom_bar:
        x_domain = zoom_brush
    else:
        x_domain = [start, end]
    pileup_chart = (
        alt.Chart()
        .encode(x=alt.X('site:Q',
                        axis=alt.Axis(grid=False,
                                      labelAngle=-90,
                                      tickCount=5,
                                      ),
                        scale=alt.Scale(domain=x_domain,
                                        nice=False,
                                        padding=5 * int(not include_site_zoom_bar)
                                        ),
                        ),
                y=alt.Y('depth:Q',
                        title='depth',
                        axis=alt.Axis(grid=False,
                                      tickCount=10,
                                      ),
                        scale=alt.Scale(type='symlog',
                                        constant=2,  # increase slope of symlog scale near zero
                                        ),
                        ),
                color=alt.Color('patient_group:N',
                                title='patient group',
                                legend=alt.Legend(orient='right'),
                                scale=alt.Scale(range=['#E69F00', '#993e01']),
                                ),
                tooltip=['site:Q',
                         alt.Tooltip('reference:N',
                                     title=f"{ref_name} nt",
                                     ),
                         'depth:Q',
                         *[f"{nt}:N" for nt in nts],
                         ],
                )
        .mark_bar()
        .add_selection(zoom_brush)
        .properties(height=60,
                    width=facet_width,
                    )
        )

    # add any selections
    for selection in selections.values():
        pileup_chart = (pileup_chart
                        .add_selection(selection)
                        .transform_filter(selection)
                        )
        
    # horizontal line at coverage cutoff
    horiz_line = (
        alt.Chart(pd.DataFrame({'cutoff': [consensus_min_coverage]}))
        .mark_rule(color='#999999',
                   strokeWidth=1,
                   strokeDash=[3, 1],
                   )
        .encode(y='cutoff:Q')
        )

    chart = (
        alt.layer(pileup_chart, horiz_line, data=df)
        .facet(facet=alt.Facet('sample:N',
                                title=None,
                                header=alt.Header(labelFontStyle='bold',
                                                  labelPadding=1,
                                                  ),
                                sort=samples,
                                ),
               columns=ncolumns,
               spacing=5,
               )
        )
    if include_site_zoom_bar:
        chart = chart & zoom_bar
    coverage_chart = (
        chart
        .configure_view(strokeWidth=1,
                        stroke='black')
        .configure_axis(domain=False)
        )

    return coverage_chart

Make interactive plot just of region of interest:

In [None]:
chart = coverage_chart(just_region_of_interest=True,
                       include_site_zoom_bar=True)

print(f"Saving to {chart_region_html}")
chart.save(chart_region_html)

chart

Make interactive plot of entire genome:

In [None]:
chart = coverage_chart(just_region_of_interest=False,
                       include_site_zoom_bar=True)

print(f"Saving to {chart_all_html}")
chart.save(chart_all_html)

chart

Make static plot of region of interest:

In [None]:
chart = coverage_chart(just_region_of_interest=True,
                       include_site_zoom_bar=False)

print(f"Saving to {chart_region_pdf}")
altair_saver.save(chart, chart_region_pdf)

Make static plot of whole genome:

In [None]:
chart = coverage_chart(just_region_of_interest=False,
                       include_site_zoom_bar=False)

print(f"Saving to {chart_all_pdf}")
altair_saver.save(chart, chart_all_pdf)