In [None]:
import collections
import numpy as np
import pandas as pd
import plotly.express as px

import sys
sys.path.append('../')
import plotting

# Function

### Calculate base coverage by position

In [None]:
def collect_base_coverage(df, length=150):
    dfs = []
    for exp in df.exp.unique():
        fw_coverage = collections.Counter()
        rv_coverage = collections.Counter()
        n_reads = [0, 0]

        subdf = df[df['exp'] == exp]
        # go through each read and add the coverage to the respective strand
        for row in subdf.iterrows():
            data = row[1]
            start = int(data['break_5_pos']) if not np.isnan(data['break_5_pos']) else 0
            end = int(data['break_3_pos']) if not np.isnan(data['break_3_pos']) else length
            
            if data['read_direction'] == 1:
                fw_coverage.update(range(start+1, end+1))
                n_reads[0] += 1
            else:
                # if reverse read, we need to adjust the position to the end of the read
                rv_coverage.update(range(length-end+1, length-start+1))
                n_reads[1] += 1
            
        dfs.append(pd.DataFrame({'exp': exp, 'read_direction': 1, 'pos': list(fw_coverage.keys()), 'coverage': np.array(list(fw_coverage.values()))/n_reads[0]}))
        dfs.append(pd.DataFrame({'exp': exp, 'read_direction': 2, 'pos': list(rv_coverage.keys()), 'coverage': np.array(list(rv_coverage.values()))/n_reads[1]}))

    plot_df = pd.concat(dfs)
    plot_df.sort_values(by=['exp', 'pos'], inplace=True)
    return plot_df

### Plot

In [None]:
def plot_base_coverage(df, length=150, colors=None):
    fig = px.line(
        df,
        x='pos',
        y='coverage',
        line_dash='read_direction',
        color='exp',
        color_discrete_map=colors,
        line_dash_map={1: 'solid', 2: 'dash'},
        render_mode='svg',
    )

    fig.update_layout(
        height=140,
        width=210,
        showlegend=False,
        margin=dict(l=0, r=10, t=10, b=0),
    )
    fig.update_yaxes(tickformat=",.0%", range=[0, 1.01], title='Fraction of reads')
    fig.update_xaxes(range=[0, length], title='Position in design sequence')
    fig = plotting.standardize_plot(fig)
    return fig

# Plot data for Meiser et al.

In [None]:
df = plotting.read_breakage_data({
    'unaged': '../data_experimental/Aging_Meiser/unaged',
    'aged': '../data_experimental/Aging_Meiser/aged',
    'repaired': '../data_experimental/Aging_Meiser/repaired',
})
df

In [None]:
plot_df = collect_base_coverage(df, length=150)

In [None]:
fig = plot_base_coverage(
    plot_df, 
    length=150,
    colors={
        "unaged": "#969696",
        "aged": "#de2d26",
        "repaired": "#3182bd",
    },
)

fig.write_image('figures/meiser_base_coverage.svg')
fig.show()

# Plot data for Song et al.

In [None]:
df = plotting.read_breakage_data({
    'unaged': '../data_experimental/Aging_Song/0d',
    '28d': '../data_experimental/Aging_Song/28d',
    '70d': '../data_experimental/Aging_Song/70d',
})
df

In [None]:
plot_df = collect_base_coverage(df, length=200)

In [None]:
fig = plot_base_coverage(
    plot_df, 
    length=200,
    colors={
        "unaged": "#969696",
        "28d": "#fcae91",
        "70d": "#de2d26",
    },
)

fig.write_image('figures/song_base_coverage.svg')
fig.show()

# Plot data for simulated

In [None]:
df = plotting.read_breakage_data({
    'simulated': '../data_simulated/test_decay',
    'repaired': '../data_experimental/Aging_Meiser/repaired',
})
df

In [None]:
plot_df = collect_base_coverage(df, length=150)

In [None]:
fig = plot_base_coverage(
    plot_df, 
    length=150,
    colors={
        "simulated": "#3182bd",
        "repaired": "#969696",
    },
)

fig.write_image('SI_figures/simulated_base_coverage.svg')
fig.show()