# Simulation experiment for detection thresholds for variant calling

This notebook is to get detection thresholds for the limit of the variant calling based on: 

1. Read depth
2. Error rate
3. Number of mutations

The data from this comes from: https://pubs.acs.org/doi/10.1021/acscentsci.7b00548 Ape AGW


In [1]:
import pandas as pd
# Visualisation things to make the figures look nice
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from sciutil import SciUtil


data_dir = 'ePCR_data/'
fig_dir = 'ePCR_figures/'


plt.rcParams['svg.fonttype'] = 'none'
axis_line_width = 1.0
axis_font_size = 12
title_font_size = 12
label_font_size = 10
figsize = (5, 4) # Figure size
font = 'Arial'
style = 'ticks'
font_family = 'sans-serif'

# Create a figure with 2x2 subplots
sns.set_style("whitegrid")
cmap = 'viridis'
palette = sns.color_palette("viridis", as_cmap=True)
sns.set_palette(cmap)

sns.set(rc={'figure.figsize': figsize, 'font.family': font_family,
            'font.sans-serif': font, 'font.size': label_font_size}, style=style)

def set_ax_params(ax):
    ax.tick_params(direction='out', length=2, width=axis_line_width)
    ax.spines['bottom'].set_linewidth(axis_line_width)
    ax.spines['top'].set_linewidth(0)
    ax.spines['left'].set_linewidth(axis_line_width)
    ax.spines['right'].set_linewidth(0)
    ax.tick_params(labelsize=axis_font_size)
    ax.tick_params(axis='x', which='major', pad=2.0)
    ax.tick_params(axis='y', which='major', pad=2.0)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='right')
    ax.tick_params(labelsize=label_font_size)


u = SciUtil()

# Generate mutations on a "real" sequence
# Decided to choose tauD from Ecoli K12

parent_sequence_aa = 'MTPSDIPGYDYGRVEKSPITDLEFDLLKKTVMLGEKDVMYLKKACDVLKDQVDEILDLAGGWVASNEHLIYYFSNPDTGEPIKEYLERVRARFGAWILDTTCRDYNREWLDYQYEVGLRHHRSKKGVTDGVRTVPHIPLRYLIAWIYPITATIKPFLAKKGGSPEDIEGMYNAWFKSVVLQVAIWSHPYTKENDWLEHHHHHH*'

parent_sequence = 'ATGACTCCCTCGGACATCCCGGGATATGATTATGGGCGTGTCGAGAAGTCACCCATCACGGACCTTGAGTTTGACCTTCTGAAGAAGACTGTCATGTTAGGTGAAAAGGACGTAATGTACTTGAAAAAGGCGTGTGACGTTCTGAAAGATCAAGTTGATGAGATCCTTGACTTGGCGGGTGGTTGGGTAGCATCAAATGAGCATTTGATTTATTACTTCTCCAATCCGGATACAGGAGAGCCTATTAAGGAATACCTGGAACGTGTACGCGCTCGCTTTGGAGCCTGGATTCTGGACACTACCTGCCGCGACTATAACCGTGAATGGTTAGACTACCAGTACGAAGTTGGGCTTCGTCATCACCGTTCAAAGAAAGGGGTCACAGACGGAGTACGCACCGTGCCCCATATCCCACTTCGTTATCTTATCGCATGGATCTATCCTATCACCGCCACTATCAAGCCATTTTTGGCTAAGAAAGGTGGCTCTCCGGAAGACATCGAAGGGATGTACAACGCTTGGTTCAAGTCTGTAGTTTTACAAGTTGCCATCTGGTCACACCCTTATACTAAGGAGAATGACTGGCTCGAGCACCACCACCACCACCACTGA'

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


# Introduce mutations at a given frequency and an error rate

Test setup:

Change to 0.1 - 5% 0.2% step size. 

1. For number of mutations from 1 to the sequence length test mutating each one and correlate this to the p value
2. For sequencing error rates from 0 to 100% make sequences ranging with this and see what the results are for the p value and the error
3. For different sequence lengths also check how sequence length corresponds to the pvalue

In [2]:
from minION import *
from tqdm import tqdm

label = 'ApeAGW'

## Experiment 1: Varying the sequencing error rate for a single mutation

In [3]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1

experiment_df = pd.DataFrame()
for sequencing_error in range(0, 50, 5):
    sequencing_error_rate = sequencing_error/100.0
    run_df = make_experiment(f'SeqError_{sequencing_error}', read_depth, sequencing_error_rate, parent_sequence,
                             library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

# Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment1.csv', index=False)

100%|███████████████████████████████████████████| 96/96 [00:32<00:00,  2.94it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:38<00:00,  2.52it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:38<00:00,  2.48it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:41<00:00,  2.34it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:40<00:00,  2.38it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:41<00:00,  2.30it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:43<00:00,  2.21it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████

-----------------------------------------
-----------------------------------------


  variant_df.at[current_well, "frequency"] = frequency
  1%|▍                                           | 1/96 [00:00<00:43,  2.19it/s]

-----------------------------------------
-----------------------------------------


  2%|▉                                           | 2/96 [00:00<00:42,  2.22it/s]

-----------------------------------------
-----------------------------------------


  3%|█▍                                          | 3/96 [00:01<00:41,  2.22it/s]

-----------------------------------------
-----------------------------------------


  4%|█▊                                          | 4/96 [00:01<00:41,  2.24it/s]

-----------------------------------------
-----------------------------------------


  5%|██▎                                         | 5/96 [00:02<00:40,  2.23it/s]

-----------------------------------------
-----------------------------------------


  6%|██▊                                         | 6/96 [00:02<00:43,  2.06it/s]

-----------------------------------------
-----------------------------------------


  7%|███▏                                        | 7/96 [00:03<00:42,  2.11it/s]

-----------------------------------------
-----------------------------------------


  8%|███▋                                        | 8/96 [00:03<00:40,  2.15it/s]

-----------------------------------------
-----------------------------------------


  9%|████▏                                       | 9/96 [00:04<00:39,  2.18it/s]

-----------------------------------------
-----------------------------------------


 10%|████▍                                      | 10/96 [00:04<00:39,  2.19it/s]

-----------------------------------------
-----------------------------------------


 11%|████▉                                      | 11/96 [00:05<00:38,  2.20it/s]

-----------------------------------------
-----------------------------------------


 12%|█████▍                                     | 12/96 [00:05<00:38,  2.21it/s]

-----------------------------------------
-----------------------------------------


 14%|█████▊                                     | 13/96 [00:05<00:37,  2.22it/s]

-----------------------------------------
-----------------------------------------


 15%|██████▎                                    | 14/96 [00:06<00:36,  2.26it/s]

-----------------------------------------
-----------------------------------------


 16%|██████▋                                    | 15/96 [00:06<00:35,  2.27it/s]

-----------------------------------------
-----------------------------------------


 17%|███████▏                                   | 16/96 [00:07<00:34,  2.29it/s]

-----------------------------------------
-----------------------------------------


 18%|███████▌                                   | 17/96 [00:07<00:35,  2.23it/s]

-----------------------------------------
-----------------------------------------


 19%|████████                                   | 18/96 [00:08<00:35,  2.19it/s]

-----------------------------------------
-----------------------------------------


 20%|████████▌                                  | 19/96 [00:08<00:35,  2.19it/s]

-----------------------------------------
-----------------------------------------


 21%|████████▉                                  | 20/96 [00:09<00:35,  2.16it/s]

-----------------------------------------
-----------------------------------------


 22%|█████████▍                                 | 21/96 [00:09<00:34,  2.17it/s]

-----------------------------------------
-----------------------------------------


 23%|█████████▊                                 | 22/96 [00:10<00:33,  2.19it/s]

-----------------------------------------
-----------------------------------------


 24%|██████████▎                                | 23/96 [00:10<00:35,  2.07it/s]

-----------------------------------------
-----------------------------------------


 25%|██████████▊                                | 24/96 [00:11<00:34,  2.10it/s]

-----------------------------------------
-----------------------------------------


 26%|███████████▏                               | 25/96 [00:11<00:33,  2.10it/s]

-----------------------------------------
-----------------------------------------


 27%|███████████▋                               | 26/96 [00:11<00:33,  2.10it/s]

-----------------------------------------
-----------------------------------------


 28%|████████████                               | 27/96 [00:12<00:31,  2.16it/s]

-----------------------------------------
-----------------------------------------


 29%|████████████▌                              | 28/96 [00:12<00:31,  2.18it/s]

-----------------------------------------
-----------------------------------------


 30%|████████████▉                              | 29/96 [00:13<00:31,  2.14it/s]

-----------------------------------------
-----------------------------------------


 31%|█████████████▍                             | 30/96 [00:13<00:31,  2.13it/s]

-----------------------------------------
-----------------------------------------


 32%|█████████████▉                             | 31/96 [00:14<00:30,  2.11it/s]

-----------------------------------------
-----------------------------------------


 33%|██████████████▎                            | 32/96 [00:14<00:30,  2.08it/s]

-----------------------------------------
-----------------------------------------


 34%|██████████████▊                            | 33/96 [00:15<00:30,  2.05it/s]

-----------------------------------------
-----------------------------------------


 35%|███████████████▏                           | 34/96 [00:15<00:30,  2.03it/s]

-----------------------------------------
-----------------------------------------


 36%|███████████████▋                           | 35/96 [00:16<00:29,  2.03it/s]

-----------------------------------------
-----------------------------------------


 38%|████████████████▏                          | 36/96 [00:16<00:29,  2.05it/s]

-----------------------------------------
-----------------------------------------


 39%|████████████████▌                          | 37/96 [00:17<00:28,  2.06it/s]

-----------------------------------------
-----------------------------------------


 40%|█████████████████                          | 38/96 [00:17<00:27,  2.08it/s]

-----------------------------------------
-----------------------------------------


 41%|█████████████████▍                         | 39/96 [00:18<00:26,  2.13it/s]

-----------------------------------------
-----------------------------------------


 42%|█████████████████▉                         | 40/96 [00:18<00:27,  2.03it/s]

-----------------------------------------
-----------------------------------------


 43%|██████████████████▎                        | 41/96 [00:19<00:26,  2.09it/s]

-----------------------------------------
-----------------------------------------


 44%|██████████████████▊                        | 42/96 [00:19<00:25,  2.12it/s]

-----------------------------------------
-----------------------------------------


 45%|███████████████████▎                       | 43/96 [00:20<00:24,  2.14it/s]

-----------------------------------------
-----------------------------------------


 46%|███████████████████▋                       | 44/96 [00:20<00:23,  2.17it/s]

-----------------------------------------
-----------------------------------------


 47%|████████████████████▏                      | 45/96 [00:20<00:23,  2.21it/s]

-----------------------------------------
-----------------------------------------


 48%|████████████████████▌                      | 46/96 [00:21<00:22,  2.22it/s]

-----------------------------------------
-----------------------------------------


 49%|█████████████████████                      | 47/96 [00:21<00:22,  2.21it/s]

-----------------------------------------
-----------------------------------------


 50%|█████████████████████▌                     | 48/96 [00:22<00:21,  2.22it/s]

-----------------------------------------
-----------------------------------------


 51%|█████████████████████▉                     | 49/96 [00:22<00:21,  2.23it/s]

-----------------------------------------
-----------------------------------------


 52%|██████████████████████▍                    | 50/96 [00:23<00:20,  2.24it/s]

-----------------------------------------
-----------------------------------------


 53%|██████████████████████▊                    | 51/96 [00:23<00:20,  2.22it/s]

-----------------------------------------
-----------------------------------------


 54%|███████████████████████▎                   | 52/96 [00:24<00:20,  2.20it/s]

-----------------------------------------
-----------------------------------------


 55%|███████████████████████▋                   | 53/96 [00:24<00:19,  2.20it/s]

-----------------------------------------
-----------------------------------------


 56%|████████████████████████▏                  | 54/96 [00:25<00:19,  2.20it/s]

-----------------------------------------
-----------------------------------------


 57%|████████████████████████▋                  | 55/96 [00:25<00:18,  2.20it/s]

-----------------------------------------
-----------------------------------------


 58%|█████████████████████████                  | 56/96 [00:25<00:18,  2.22it/s]

-----------------------------------------
-----------------------------------------


 59%|█████████████████████████▌                 | 57/96 [00:26<00:18,  2.10it/s]

-----------------------------------------
-----------------------------------------


 60%|█████████████████████████▉                 | 58/96 [00:26<00:17,  2.14it/s]

-----------------------------------------
-----------------------------------------


 61%|██████████████████████████▍                | 59/96 [00:27<00:17,  2.17it/s]

-----------------------------------------
-----------------------------------------


 62%|██████████████████████████▉                | 60/96 [00:27<00:16,  2.17it/s]

-----------------------------------------
-----------------------------------------


 64%|███████████████████████████▎               | 61/96 [00:28<00:16,  2.17it/s]

-----------------------------------------
-----------------------------------------


 65%|███████████████████████████▊               | 62/96 [00:28<00:16,  2.12it/s]

-----------------------------------------
-----------------------------------------


 66%|████████████████████████████▏              | 63/96 [00:29<00:15,  2.14it/s]

-----------------------------------------
-----------------------------------------


 67%|████████████████████████████▋              | 64/96 [00:29<00:14,  2.16it/s]

-----------------------------------------
-----------------------------------------


 68%|█████████████████████████████              | 65/96 [00:30<00:14,  2.19it/s]

-----------------------------------------
-----------------------------------------


 69%|█████████████████████████████▌             | 66/96 [00:30<00:13,  2.21it/s]

-----------------------------------------
-----------------------------------------


 70%|██████████████████████████████             | 67/96 [00:30<00:13,  2.23it/s]

-----------------------------------------
-----------------------------------------


 71%|██████████████████████████████▍            | 68/96 [00:31<00:12,  2.22it/s]

-----------------------------------------
-----------------------------------------


 72%|██████████████████████████████▉            | 69/96 [00:31<00:12,  2.23it/s]

-----------------------------------------
-----------------------------------------


 73%|███████████████████████████████▎           | 70/96 [00:32<00:11,  2.22it/s]

-----------------------------------------
-----------------------------------------


 74%|███████████████████████████████▊           | 71/96 [00:32<00:11,  2.22it/s]

-----------------------------------------
-----------------------------------------


 75%|████████████████████████████████▎          | 72/96 [00:33<00:10,  2.20it/s]

-----------------------------------------
-----------------------------------------


 76%|████████████████████████████████▋          | 73/96 [00:33<00:10,  2.14it/s]

-----------------------------------------
-----------------------------------------


 77%|█████████████████████████████████▏         | 74/96 [00:34<00:10,  2.04it/s]

-----------------------------------------
-----------------------------------------


 78%|█████████████████████████████████▌         | 75/96 [00:34<00:10,  2.08it/s]

-----------------------------------------
-----------------------------------------


 79%|██████████████████████████████████         | 76/96 [00:35<00:09,  2.14it/s]

-----------------------------------------
-----------------------------------------


 80%|██████████████████████████████████▍        | 77/96 [00:35<00:08,  2.16it/s]

-----------------------------------------
-----------------------------------------


 81%|██████████████████████████████████▉        | 78/96 [00:36<00:08,  2.18it/s]

-----------------------------------------
-----------------------------------------


 82%|███████████████████████████████████▍       | 79/96 [00:36<00:07,  2.20it/s]

-----------------------------------------
-----------------------------------------


 83%|███████████████████████████████████▊       | 80/96 [00:36<00:07,  2.23it/s]

-----------------------------------------
-----------------------------------------


 84%|████████████████████████████████████▎      | 81/96 [00:37<00:06,  2.23it/s]

-----------------------------------------
-----------------------------------------


 85%|████████████████████████████████████▋      | 82/96 [00:37<00:06,  2.24it/s]

-----------------------------------------
-----------------------------------------


 86%|█████████████████████████████████████▏     | 83/96 [00:38<00:05,  2.21it/s]

-----------------------------------------
-----------------------------------------


 88%|█████████████████████████████████████▋     | 84/96 [00:38<00:05,  2.21it/s]

-----------------------------------------
-----------------------------------------


 89%|██████████████████████████████████████     | 85/96 [00:39<00:04,  2.24it/s]

-----------------------------------------
-----------------------------------------


 90%|██████████████████████████████████████▌    | 86/96 [00:39<00:04,  2.24it/s]

-----------------------------------------
-----------------------------------------


 91%|██████████████████████████████████████▉    | 87/96 [00:40<00:03,  2.26it/s]

-----------------------------------------
-----------------------------------------


 92%|███████████████████████████████████████▍   | 88/96 [00:40<00:03,  2.28it/s]

-----------------------------------------
-----------------------------------------


 93%|███████████████████████████████████████▊   | 89/96 [00:40<00:03,  2.26it/s]

-----------------------------------------
-----------------------------------------


 94%|████████████████████████████████████████▎  | 90/96 [00:41<00:02,  2.26it/s]

-----------------------------------------
-----------------------------------------


 95%|████████████████████████████████████████▊  | 91/96 [00:41<00:02,  2.11it/s]

-----------------------------------------
-----------------------------------------


 96%|█████████████████████████████████████████▏ | 92/96 [00:42<00:01,  2.13it/s]

-----------------------------------------
-----------------------------------------


 97%|█████████████████████████████████████████▋ | 93/96 [00:42<00:01,  2.15it/s]

-----------------------------------------
-----------------------------------------


 98%|██████████████████████████████████████████ | 94/96 [00:43<00:00,  2.15it/s]

-----------------------------------------
-----------------------------------------


 99%|██████████████████████████████████████████▌| 95/96 [00:43<00:00,  2.17it/s]

-----------------------------------------
-----------------------------------------


100%|███████████████████████████████████████████| 96/96 [00:44<00:00,  2.17it/s]
  0%|                                                    | 0/96 [00:00<?, ?it/s]

-----------------------------------------
-----------------------------------------


  variant_df.at[current_well, "frequency"] = frequency
  1%|▍                                           | 1/96 [00:00<00:43,  2.20it/s]

-----------------------------------------
-----------------------------------------


  2%|▉                                           | 2/96 [00:00<00:42,  2.19it/s]

-----------------------------------------
-----------------------------------------


  3%|█▍                                          | 3/96 [00:01<00:42,  2.20it/s]

-----------------------------------------
-----------------------------------------


  4%|█▊                                          | 4/96 [00:01<00:41,  2.20it/s]

-----------------------------------------
-----------------------------------------


  5%|██▎                                         | 5/96 [00:02<00:41,  2.22it/s]

-----------------------------------------
-----------------------------------------


  6%|██▊                                         | 6/96 [00:02<00:40,  2.21it/s]

-----------------------------------------
-----------------------------------------


  7%|███▏                                        | 7/96 [00:03<00:40,  2.19it/s]

-----------------------------------------
-----------------------------------------


  8%|███▋                                        | 8/96 [00:03<00:40,  2.20it/s]

-----------------------------------------
-----------------------------------------


  9%|████▏                                       | 9/96 [00:04<00:39,  2.21it/s]

-----------------------------------------
-----------------------------------------


 10%|████▍                                      | 10/96 [00:04<00:38,  2.21it/s]

-----------------------------------------
-----------------------------------------


 11%|████▉                                      | 11/96 [00:04<00:38,  2.21it/s]

-----------------------------------------
-----------------------------------------


 12%|█████▍                                     | 12/96 [00:05<00:40,  2.08it/s]

-----------------------------------------
-----------------------------------------


 14%|█████▊                                     | 13/96 [00:05<00:39,  2.11it/s]

-----------------------------------------
-----------------------------------------


 15%|██████▎                                    | 14/96 [00:06<00:38,  2.14it/s]

-----------------------------------------
-----------------------------------------


 16%|██████▋                                    | 15/96 [00:06<00:37,  2.16it/s]

-----------------------------------------
-----------------------------------------


 17%|███████▏                                   | 16/96 [00:07<00:37,  2.16it/s]

-----------------------------------------
-----------------------------------------


 18%|███████▌                                   | 17/96 [00:07<00:36,  2.17it/s]

-----------------------------------------
-----------------------------------------


 19%|████████                                   | 18/96 [00:08<00:35,  2.17it/s]

-----------------------------------------
-----------------------------------------


 20%|████████▌                                  | 19/96 [00:08<00:35,  2.19it/s]

-----------------------------------------
-----------------------------------------


 21%|████████▉                                  | 20/96 [00:09<00:34,  2.21it/s]

-----------------------------------------
-----------------------------------------


 22%|█████████▍                                 | 21/96 [00:09<00:34,  2.20it/s]

-----------------------------------------
-----------------------------------------


 23%|█████████▊                                 | 22/96 [00:10<00:33,  2.20it/s]

-----------------------------------------
-----------------------------------------


 24%|██████████▎                                | 23/96 [00:10<00:33,  2.20it/s]

-----------------------------------------
-----------------------------------------


 25%|██████████▊                                | 24/96 [00:10<00:32,  2.20it/s]

-----------------------------------------
-----------------------------------------


 26%|███████████▏                               | 25/96 [00:11<00:32,  2.21it/s]

-----------------------------------------
-----------------------------------------


 27%|███████████▋                               | 26/96 [00:11<00:31,  2.22it/s]

-----------------------------------------
-----------------------------------------


 28%|████████████                               | 27/96 [00:12<00:30,  2.23it/s]

-----------------------------------------
-----------------------------------------


 29%|████████████▌                              | 28/96 [00:12<00:30,  2.22it/s]

-----------------------------------------
-----------------------------------------


 30%|████████████▉                              | 29/96 [00:13<00:31,  2.11it/s]

-----------------------------------------
-----------------------------------------


 31%|█████████████▍                             | 30/96 [00:13<00:30,  2.17it/s]

-----------------------------------------
-----------------------------------------


 32%|█████████████▉                             | 31/96 [00:14<00:29,  2.17it/s]

-----------------------------------------
-----------------------------------------


 33%|██████████████▎                            | 32/96 [00:14<00:29,  2.19it/s]

-----------------------------------------
-----------------------------------------


 34%|██████████████▊                            | 33/96 [00:15<00:28,  2.23it/s]

-----------------------------------------
-----------------------------------------


 35%|███████████████▏                           | 34/96 [00:15<00:28,  2.21it/s]

-----------------------------------------
-----------------------------------------


 36%|███████████████▋                           | 35/96 [00:16<00:27,  2.20it/s]

-----------------------------------------
-----------------------------------------


 38%|████████████████▏                          | 36/96 [00:16<00:27,  2.19it/s]

-----------------------------------------
-----------------------------------------


 39%|████████████████▌                          | 37/96 [00:16<00:26,  2.20it/s]

-----------------------------------------
-----------------------------------------


 40%|█████████████████                          | 38/96 [00:17<00:26,  2.21it/s]

-----------------------------------------
-----------------------------------------


 41%|█████████████████▍                         | 39/96 [00:17<00:26,  2.18it/s]

-----------------------------------------
-----------------------------------------


 42%|█████████████████▉                         | 40/96 [00:18<00:25,  2.18it/s]

-----------------------------------------
-----------------------------------------


 43%|██████████████████▎                        | 41/96 [00:18<00:25,  2.17it/s]

-----------------------------------------
-----------------------------------------


 44%|██████████████████▊                        | 42/96 [00:19<00:24,  2.16it/s]

-----------------------------------------
-----------------------------------------


 45%|███████████████████▎                       | 43/96 [00:19<00:24,  2.18it/s]

-----------------------------------------
-----------------------------------------


 46%|███████████████████▋                       | 44/96 [00:20<00:23,  2.20it/s]

-----------------------------------------
-----------------------------------------


 47%|████████████████████▏                      | 45/96 [00:20<00:23,  2.21it/s]

-----------------------------------------
-----------------------------------------


 48%|████████████████████▌                      | 46/96 [00:21<00:24,  2.08it/s]

-----------------------------------------
-----------------------------------------


 49%|█████████████████████                      | 47/96 [00:21<00:22,  2.13it/s]

-----------------------------------------
-----------------------------------------


 50%|█████████████████████▌                     | 48/96 [00:22<00:22,  2.16it/s]

-----------------------------------------
-----------------------------------------


 51%|█████████████████████▉                     | 49/96 [00:22<00:21,  2.17it/s]

-----------------------------------------
-----------------------------------------


 52%|██████████████████████▍                    | 50/96 [00:22<00:21,  2.18it/s]

-----------------------------------------
-----------------------------------------


 53%|██████████████████████▊                    | 51/96 [00:23<00:20,  2.17it/s]

-----------------------------------------
-----------------------------------------


 54%|███████████████████████▎                   | 52/96 [00:23<00:20,  2.17it/s]

-----------------------------------------
-----------------------------------------


 55%|███████████████████████▋                   | 53/96 [00:24<00:19,  2.19it/s]

-----------------------------------------
-----------------------------------------


 56%|████████████████████████▏                  | 54/96 [00:24<00:19,  2.16it/s]

-----------------------------------------
-----------------------------------------


 57%|████████████████████████▋                  | 55/96 [00:25<00:18,  2.18it/s]

-----------------------------------------
-----------------------------------------


 58%|█████████████████████████                  | 56/96 [00:25<00:18,  2.20it/s]

-----------------------------------------
-----------------------------------------


 59%|█████████████████████████▌                 | 57/96 [00:26<00:17,  2.20it/s]

-----------------------------------------
-----------------------------------------


 60%|█████████████████████████▉                 | 58/96 [00:26<00:17,  2.21it/s]

-----------------------------------------
-----------------------------------------


 61%|██████████████████████████▍                | 59/96 [00:27<00:16,  2.22it/s]

-----------------------------------------
-----------------------------------------


 62%|██████████████████████████▉                | 60/96 [00:27<00:16,  2.23it/s]

-----------------------------------------
-----------------------------------------


 64%|███████████████████████████▎               | 61/96 [00:27<00:15,  2.23it/s]

-----------------------------------------
-----------------------------------------


 65%|███████████████████████████▊               | 62/96 [00:28<00:15,  2.20it/s]

-----------------------------------------
-----------------------------------------


 66%|████████████████████████████▏              | 63/96 [00:28<00:15,  2.07it/s]

-----------------------------------------
-----------------------------------------


 67%|████████████████████████████▋              | 64/96 [00:29<00:15,  2.12it/s]

-----------------------------------------
-----------------------------------------


 68%|█████████████████████████████              | 65/96 [00:29<00:14,  2.15it/s]

-----------------------------------------
-----------------------------------------


 69%|█████████████████████████████▌             | 66/96 [00:30<00:13,  2.17it/s]

-----------------------------------------
-----------------------------------------


 70%|██████████████████████████████             | 67/96 [00:30<00:13,  2.18it/s]

-----------------------------------------
-----------------------------------------


 71%|██████████████████████████████▍            | 68/96 [00:31<00:12,  2.20it/s]

-----------------------------------------
-----------------------------------------


 72%|██████████████████████████████▉            | 69/96 [00:31<00:12,  2.20it/s]

-----------------------------------------
-----------------------------------------


 73%|███████████████████████████████▎           | 70/96 [00:32<00:11,  2.17it/s]

-----------------------------------------
-----------------------------------------


 74%|███████████████████████████████▊           | 71/96 [00:32<00:11,  2.19it/s]

-----------------------------------------
-----------------------------------------


 75%|████████████████████████████████▎          | 72/96 [00:32<00:10,  2.19it/s]

-----------------------------------------
-----------------------------------------


 76%|████████████████████████████████▋          | 73/96 [00:33<00:10,  2.18it/s]

-----------------------------------------
-----------------------------------------


 77%|█████████████████████████████████▏         | 74/96 [00:33<00:10,  2.20it/s]

-----------------------------------------
-----------------------------------------


 78%|█████████████████████████████████▌         | 75/96 [00:34<00:09,  2.20it/s]

-----------------------------------------
-----------------------------------------


 79%|██████████████████████████████████         | 76/96 [00:34<00:09,  2.21it/s]

-----------------------------------------
-----------------------------------------


 80%|██████████████████████████████████▍        | 77/96 [00:35<00:08,  2.21it/s]

-----------------------------------------
-----------------------------------------


 81%|██████████████████████████████████▉        | 78/96 [00:35<00:08,  2.21it/s]

-----------------------------------------
-----------------------------------------


 82%|███████████████████████████████████▍       | 79/96 [00:36<00:07,  2.21it/s]

-----------------------------------------
-----------------------------------------


 83%|███████████████████████████████████▊       | 80/96 [00:36<00:07,  2.08it/s]

-----------------------------------------
-----------------------------------------


 84%|████████████████████████████████████▎      | 81/96 [00:37<00:07,  2.12it/s]

-----------------------------------------
-----------------------------------------


 85%|████████████████████████████████████▋      | 82/96 [00:37<00:06,  2.15it/s]

-----------------------------------------
-----------------------------------------


 86%|█████████████████████████████████████▏     | 83/96 [00:38<00:06,  2.16it/s]

-----------------------------------------
-----------------------------------------


 88%|█████████████████████████████████████▋     | 84/96 [00:38<00:05,  2.17it/s]

-----------------------------------------
-----------------------------------------


 89%|██████████████████████████████████████     | 85/96 [00:38<00:05,  2.17it/s]

-----------------------------------------
-----------------------------------------


 90%|██████████████████████████████████████▌    | 86/96 [00:39<00:04,  2.18it/s]

-----------------------------------------
-----------------------------------------


 91%|██████████████████████████████████████▉    | 87/96 [00:39<00:04,  2.19it/s]

-----------------------------------------
-----------------------------------------


 92%|███████████████████████████████████████▍   | 88/96 [00:40<00:03,  2.21it/s]

-----------------------------------------
-----------------------------------------


 93%|███████████████████████████████████████▊   | 89/96 [00:40<00:03,  2.22it/s]

-----------------------------------------
-----------------------------------------


 94%|████████████████████████████████████████▎  | 90/96 [00:41<00:02,  2.22it/s]

-----------------------------------------
-----------------------------------------


 95%|████████████████████████████████████████▊  | 91/96 [00:41<00:02,  2.20it/s]

-----------------------------------------
-----------------------------------------


 96%|█████████████████████████████████████████▏ | 92/96 [00:42<00:01,  2.22it/s]

-----------------------------------------
-----------------------------------------


 97%|█████████████████████████████████████████▋ | 93/96 [00:42<00:01,  2.21it/s]

-----------------------------------------
-----------------------------------------


 98%|██████████████████████████████████████████ | 94/96 [00:43<00:00,  2.20it/s]

-----------------------------------------
-----------------------------------------


 99%|██████████████████████████████████████████▌| 95/96 [00:43<00:00,  2.20it/s]

-----------------------------------------
-----------------------------------------


100%|███████████████████████████████████████████| 96/96 [00:43<00:00,  2.18it/s]


## Experiment 2: varying read depth and it's effect on significance

In [4]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1

# Here we do it from 1-10 completely and then in intervals of 10 so that we can clearly see the impact
experiment_df = pd.DataFrame()
for read_depth in range(1, 10, 1):
    run_df = make_experiment(f'ReadDepth_{read_depth}', read_depth, sequencing_error, parent_sequence, 
                     library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

for read_depth in range(10, 100, 5):
    run_df = make_experiment(f'ReadDepth_{read_depth}', read_depth, sequencing_error, parent_sequence, 
                     library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

 # Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment2_ReadDepth_{label}.csv', index=False)

100%|███████████████████████████████████████████| 96/96 [00:29<00:00,  3.26it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:30<00:00,  3.18it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:30<00:00,  3.15it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:31<00:00,  3.07it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:31<00:00,  3.05it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:31<00:00,  3.01it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:32<00:00,  2.97it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████

## Experiment 3: effect of sequence length on significance


In [5]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1


experiment_df = pd.DataFrame()
for seq_len in range(5, 2000, 50):
    if seq_len >= len(parent_sequence):
        break
    run_df = make_experiment(f'SeqLen_{seq_len}', read_depth, sequencing_error, parent_sequence[:seq_len*3],
             library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

# Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment3_{label}.csv', index=False)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|███████████████████████████████████████████| 18/18 [00:00<00:00, 61.65it/s]
  variant_df['accuracy'] = np.array(corrects) / (np.array(corrects) + np.array(incorrects))


















































































  variant_df.at[current_well, "frequency"] = frequency
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|███████████████████████████████████████████| 93/93 [00:10<00:00,  9.23it/s]
  variant_df['accuracy'] = np.array(corrects) / (np.array(corrects) + np.array(incorrects))







  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:18<00:00,  5.13it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:26<00:00,  3.65it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.79it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.82it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.80it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.82it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.80it/s]
  variant_df.at[current_well, "frequency"] = fre

In [6]:
experiment_df

Unnamed: 0,index,Plate,Well,Path,Alignment_count,P value,Mixed Well,Variant,mutation,frequency,P adj.,True Variant,correct,incorrect,accuracy
0,0,SeqLen_5,Well 0,,25,,False,#PARENT#,,,,ATGACTCCCTCGGAC,15,0,1.0
1,1,SeqLen_5,Well 1,,25,3.162858e-18,False,C5T,,0.960000,,ATGATTCCCTCGGAC,1,0,1.0
2,2,SeqLen_5,Well 2,,25,2.514702e-17,False,T6G,,0.960000,,ATGACGCCCTCGGAC,1,0,1.0
3,3,SeqLen_5,Well 3,,25,6.741758e-15,False,C9A,,0.880000,,ATGACTCCATCGGAC,1,0,1.0
4,4,SeqLen_5,Well 4,,25,1.030559e-13,False,T10G,,0.840000,,ATGACTCCCGCGGAC,1,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,91,SeqLen_605,Well 91,,25,1.956783e-280,False,T41A_T71C_T90C_A125T_T159A_T168G_G181A_A192T_T...,,0.887619,1.313702e-278,ATGACTCCCTCGGACATCCCGGGATATGATTATGGGCGTGACGAGA...,21,0,1.0
92,92,SeqLen_605,Well 92,,25,7.037150e-132,False,T30G_A126T_G269A_G377T_C403A_C405T_G472C_G522C...,,0.897778,8.238615e-132,ATGACTCCCTCGGACATCCCGGGATATGAGTATGGGCGTGTCGAGA...,9,0,1.0
93,93,SeqLen_605,Well 93,,25,3.806669e-139,False,G48C_G130C_G148C_T218G_C256G_G280C_A317C_T328A...,,0.880000,4.745977e-139,ATGACTCCCTCGGACATCCCGGGATATGATTATGGGCGTGTCGAGA...,10,0,1.0
94,94,SeqLen_605,Well 94,,25,2.961978e-240,False,T56C_G73A_T80G_A147G_T211G_A244G_G310T_T347G_G...,,0.917500,4.062141e-239,ATGACTCCCTCGGACATCCCGGGATATGATTATGGGCGTGTCGAGA...,16,0,1.0


## Experiment 4: effect of frequency cutoff

In [7]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1

experiment_df = pd.DataFrame()
for frequency_cutoff in range(5, 100, 10):
    run_df = make_experiment(f'FreqCutoff_{frequency_cutoff}', read_depth, sequencing_error, parent_sequence, library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff/100.0)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

 # Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment4_{label}.csv', index=False)

  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.79it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.80it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.78it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.81it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.81it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.80it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:34<00:00,  2.82it/s]
  variant_df.at[current_well, "frequency"] = fre

## Experiment 5: ePCR mutation rate

In [8]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1


experiment_df = pd.DataFrame()
for epcr_mutation_rate in range(1, 20, 2):
    run_df = make_experiment(f'ePCR_{epcr_mutation_rate}', read_depth, sequencing_error, parent_sequence, library_number, number_of_wells, 
                             epcr_mutation_rate/1000.0, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

 # Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment5_{label}.csv', index=False)


  variant_df.at[current_well, "frequency"] = frequency
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|███████████████████████████████████████████| 40/40 [00:14<00:00,  2.78it/s]
  variant_df['accuracy'] = np.array(corrects) / (np.array(corrects) + np.array(incorrects))




























































  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|███████████████████████████████████████████| 79/79 [00:32<00:00,  2.47it/s]
  variant_df['accuracy'] = np.array(corrects) / (np.array(corrects) + np.array(incorrects))





















  variant_df.at[current_well, "frequency"] = frequency
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|███████████████████████████████████████████| 94/94 [00:42<00:00,  2.23it/s]
  variant_df['accuracy'] = np.array(corrects) / (np.array(corrects) + np.array(incorrects))






  variant_df.at[current_well, "frequency"] = frequency
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|███████████████████████████████████████████| 95/95 [00:52<00:00,  1.81it/s]
  variant_df['accuracy'] = np.array(corrects) / (np.array(corrects) + np.array(incorrects))





  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:54<00:00,  1.75it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:55<00:00,  1.72it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:05<00:00,  1.48it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:18<00:00,  1.23it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:09<00:00,  1.38it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:02<00:00,  1.53it/s]


## Experiment 6: mixed well rates


In [9]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1
number_wells_to_mix = 20
experiment_df = pd.DataFrame()

for mixture_rate in range(10, 90, 10):
    run_df = make_experiment(f'mixedWells_{mixture_rate}', read_depth, sequencing_error, parent_sequence, library_number, number_of_wells, 
                             epcr_mutation_rate, frequency_cutoff, number_wells_to_mix, mixture_rate/100.0, 
                             qc_files_path='qc_data/')
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

 # Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment6_{label}.csv', index=False)


  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:44<00:00,  2.17it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:43<00:00,  2.18it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:39<00:00,  2.41it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:39<00:00,  2.42it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:45<00:00,  2.09it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:48<00:00,  2.00it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:44<00:00,  2.17it/s]
  variant_df.at[current_well, "frequency"] = fre

In [10]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.2
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1
number_wells_to_mix = 20
experiment_df = pd.DataFrame()

for mixture_rate in range(10, 90, 10):
    run_df = make_experiment(f'mixedWells_{mixture_rate}', read_depth, sequencing_error, parent_sequence, library_number, number_of_wells, 
                             epcr_mutation_rate, frequency_cutoff, number_wells_to_mix, mixture_rate/100.0, 
                             qc_files_path='qc_data/')
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

 # Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment6_freq_0.2_{label}.csv', index=False)


  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:43<00:00,  2.23it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:44<00:00,  2.17it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:42<00:00,  2.24it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:47<00:00,  2.04it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:45<00:00,  2.11it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:42<00:00,  2.27it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [00:41<00:00,  2.29it/s]
  variant_df.at[current_well, "frequency"] = fre