In [None]:
import pandas as pd
import plotly.express as px

### set the path to the experiment folder

In [None]:
path = '<path/to/experiment/folder>'

# Overall error rate

compares the overall error rates between single and clustered analysis, can only be plotted when both analysis types were performed

In [None]:
rates = {}
for analysis in ['single', 'clustered']:
    subpath = f'./{path}/analysis_{analysis}/'
    rates[analysis] = open(f'{subpath}/errorrates_fw.txt', 'r').readline().split(',')
    rates[analysis] = [float(rate) for rate in rates[analysis][1:]]

df = pd.melt(pd.DataFrame(rates, index=['deletion', 'insertion', 'substitution']).T, var_name='Error type', value_name='Error rate', ignore_index=False)

px.bar(
    df,
    x=df.index,
    y='Error rate',
    color='Error type',
    title='Error rates between single and clustered analysis',
    labels={'index': 'Analysis type', 'y': 'Error rate'},
    template='simple_white',
    width=800,
    height=400,
)

# df

## select analysis type

the subsequent plots are specific to one analysis pipeline, which is defined with the selection below

In [None]:
analysis_type = 'analysis_single'

# change to this to see analysis with clustered reads
# analysis_type = 'analysis_clustered'

# Base bias in errors

### for deletions

In [None]:
bias = open(f'{path}/{analysis_type}/delbias_fw.txt', 'r').readline().split(',')
bias = [float(bias) for bias in bias if bias]

# stacked bar chart
px.bar(
    y=['', '', '', ''],
    x=bias,
    color=['A', 'C', 'G', 'T'],
    labels={'x': 'Fraction of deletions', 'y': '', 'color': 'Base'},
    title='Deletion bias',
    template='simple_white',
    orientation='h',
    range_x=[0, 1],
    width=800,
    height=400,
).update_layout(barmode='stack').update_yaxes(visible=False)

### for insertions

In [None]:
bias = open(f'{path}/{analysis_type}/insbias_fw.txt', 'r').readline().split(',')
bias = [float(bias) for bias in bias if bias]

# stacked bar chart
px.bar(
    y=['', '', '', ''],
    x=bias,
    color=['A', 'C', 'G', 'T'],
    labels={'x': 'Fraction of insertions', 'y': '', 'color': 'Base'},
    title='Insertion bias',
    template='simple_white',
    orientation='h',
    range_x=[0, 1],
    width=800,
    height=400,
).update_layout(barmode='stack').update_yaxes(visible=False)

### for substitions

In [None]:
bias = open(f'{path}/{analysis_type}/subbias_fw.txt', 'r').readline().split(',')
bias = [float(bias) for bias in bias if bias]

# stacked bar chart
px.bar(
    y=['', '', '', '', '', '', '', '', '', '', '', ''],
    x=bias,
    color=['A2C', 'A2G', 'A2T', 'C2A', 'C2G', 'C2T', 'G2A', 'G2C', 'G2T', 'T2A', 'T2C', 'T2G'],
    labels={'x': 'Fraction of substitutions', 'y': '', 'color': 'Base'},
    title='Substitution bias',
    template='simple_white',
    orientation='h',
    range_x=[0, 1],
    width=800,
    height=400,
).update_layout(barmode='stack').update_yaxes(visible=False)

# Position bias in errors

### for all error types together

In [None]:
bias = []
for errortype in ['del', 'ins', 'sub']:
    data = open(f'{path}/{analysis_type}/{errortype}position_fw.txt', 'r').readline().split(',')
    data = [float(d) for d in data]
    bias.append(data)

df = pd.DataFrame(bias, index=['deletion', 'insertion', 'substitution']).T

px.line(
    df,
    x=list(range(1, len(bias[0]) + 1)),
    y=df.columns,
    labels={'x': 'Position in read', 'y': 'Rate of deletions', 'variable': 'Error type'},
    title='Deletion position bias',
    template='simple_white',
    width=800,
    height=400,
    range_y=[0, None],
)

### for deletions

In [None]:
bias = open(f'{path}/{analysis_type}/delposition_fw.txt', 'r').readline().split(',')
bias = [float(bias) for bias in bias if bias]

px.line(
    x=list(range(1, len(bias) + 1)),
    y=bias,
    labels={'x': 'Position in read', 'y': 'Rate of deletions'},
    title='Deletion position bias',
    template='simple_white',
    width=800,
    height=400,
    range_y=[0, None],
)

### for insertions

In [None]:
bias = open(f'{path}/{analysis_type}/insposition_fw.txt', 'r').readline().split(',')
bias = [float(bias) for bias in bias if bias]

px.line(
    x=list(range(1, len(bias) + 1)),
    y=bias,
    labels={'x': 'Position in read', 'y': 'Rate of insertions'},
    title='Insertion position bias',
    template='simple_white',
    width=800,
    height=400,
    range_y=[0, None],
)

### for substitutions

In [None]:
bias = open(f'{path}/{analysis_type}/subposition_fw.txt', 'r').readline().split(',')
bias = [float(bias) for bias in bias if bias]

px.line(
    x=list(range(1, len(bias) + 1)),
    y=bias,
    labels={'x': 'Position in read', 'y': 'Rate of substitutions'},
    title='Substitution position bias',
    template='simple_white',
    width=800,
    height=400,
    range_y=[0, None],
)