In [None]:
import pandas as pd
import plotly.express as px

### set the path to the parent experiment folder

In [None]:
path = '<path/to/parent/folder>'

# Overall error rate

compares the overall error rates between single and clustered analysis, can only be plotted when both analysis types were performed

In [None]:
df = []
for analysis in ['single', 'clustered']:
    idf = pd.read_csv(f'{path}errorrates_fw_batch_analyze_{analysis}.txt', sep=',', names=['experiment', 'match', 'deletion', 'insertion', 'substitution'])
    idf['group'] = analysis
    df.append(idf)
df = pd.concat(df, ignore_index=True)
df = df.drop(columns=['match'])
df = pd.melt(df, id_vars=['experiment', 'group'], var_name='Error type', value_name='Error rate')

px.bar(
    df,
    x='experiment',
    y='Error rate',
    color='Error type',
    pattern_shape='group',
    title='Error rates between single and clustered analysis',
    labels={'experiment': 'Experiment', 'group': 'Analysis'},
    template='simple_white',
    width=800,
    height=400,
).update_layout(barmode='group')

## select analysis type

the subsequent plots are specific to one analysis pipeline, which is defined with the selection below

In [None]:
analysis_type = 'analyze_single'

# change to this to see analysis with clustered reads
# analysis_type = 'analyze_clustered'

# Error rate by experiment

In [None]:
df = pd.read_csv(f'{path}errorrates_fw_batch_{analysis_type}.txt', sep=',', names=['experiment', 'match', 'deletion', 'insertion', 'substitution'])
df = df.drop(columns=['match'])
df = pd.melt(df, id_vars=['experiment'], var_name='Error type', value_name='Error rate')

px.bar(
    df,
    x='experiment',
    y='Error rate',
    color='Error type',
    title='Error rates between experiments',
    labels={'experiment': 'Experiment', 'group': 'Analysis'},
    template='simple_white',
    width=800,
    height=400,
).update_layout(barmode='stack')

# Position bias in errors between experiments

### for deletions

In [None]:
df = pd.read_csv(f'{path}delposition_fw_batch_{analysis_type}.txt', sep=',', header=None)
df = df.rename(columns={0: 'Experiment'})
df = pd.melt(df, id_vars=['Experiment'], var_name='Position in read', value_name='Rate of deletions')

px.line(
    df,
    x='Position in read',
    y='Rate of deletions',
    color='Experiment',
    title='Deletion position bias',
    template='simple_white',
    width=800,
    height=400,
    range_y=[0, None],
)

### for insertions

In [None]:
df = pd.read_csv(f'{path}insposition_fw_batch_{analysis_type}.txt', sep=',', header=None)
df = df.rename(columns={0: 'Experiment'})
df = pd.melt(df, id_vars=['Experiment'], var_name='Position in read', value_name='Rate of insertions')

px.line(
    df,
    x='Position in read',
    y='Rate of insertions',
    color='Experiment',
    title='Insertion position bias',
    template='simple_white',
    width=800,
    height=400,
    range_y=[0, None],
)

### for substitutions

In [None]:
df = pd.read_csv(f'{path}subposition_fw_batch_{analysis_type}.txt', sep=',', header=None)
df = df.rename(columns={0: 'Experiment'})
df = pd.melt(df, id_vars=['Experiment'], var_name='Position in read', value_name='Rate of substitutions')

px.line(
    df,
    x='Position in read',
    y='Rate of substitutions',
    color='Experiment',
    title='Substitution position bias',
    template='simple_white',
    width=800,
    height=400,
    range_y=[0, None],
)