# Simulation experiment for detection thresholds for variant calling

This notebook is to get detection thresholds for the limit of the variant calling based on: 

1. Read depth
2. Error rate
3. Number of mutations

The data from this comes from: https://pubs.acs.org/doi/10.1021/acscentsci.7b00548 Ape AGW


In [1]:
import pandas as pd
# Visualisation things to make the figures look nice
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from sciutil import SciUtil


data_dir = 'ePCR_data/'
fig_dir = 'ePCR_figures/'


plt.rcParams['svg.fonttype'] = 'none'
axis_line_width = 1.0
axis_font_size = 12
title_font_size = 12
label_font_size = 10
figsize = (5, 4) # Figure size
font = 'Arial'
style = 'ticks'
font_family = 'sans-serif'

# Create a figure with 2x2 subplots
sns.set_style("whitegrid")
cmap = 'viridis'
palette = sns.color_palette("viridis", as_cmap=True)
sns.set_palette(cmap)

sns.set(rc={'figure.figsize': figsize, 'font.family': font_family,
            'font.sans-serif': font, 'font.size': label_font_size}, style=style)

def set_ax_params(ax):
    ax.tick_params(direction='out', length=2, width=axis_line_width)
    ax.spines['bottom'].set_linewidth(axis_line_width)
    ax.spines['top'].set_linewidth(0)
    ax.spines['left'].set_linewidth(axis_line_width)
    ax.spines['right'].set_linewidth(0)
    ax.tick_params(labelsize=axis_font_size)
    ax.tick_params(axis='x', which='major', pad=2.0)
    ax.tick_params(axis='y', which='major', pad=2.0)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='right')
    ax.tick_params(labelsize=label_font_size)


u = SciUtil()

# Generate mutations on a "real" sequence
# Decided to choose tauD from Ecoli K12
import pandas as pd
# Visualisation things to make the figures look nice
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from sciutil import SciUtil


data_dir = 'ePCR_data/'
fig_dir = 'ePCR_figures/'


plt.rcParams['svg.fonttype'] = 'none'
axis_line_width = 1.0
axis_font_size = 12
title_font_size = 12
label_font_size = 10
figsize = (5, 4) # Figure size
font = 'Arial'
style = 'ticks'
font_family = 'sans-serif'

# Create a figure with 2x2 subplots
sns.set_style("whitegrid")
cmap = 'viridis'
palette = sns.color_palette("viridis", as_cmap=True)
sns.set_palette(cmap)

sns.set(rc={'figure.figsize': figsize, 'font.family': font_family,
            'font.sans-serif': font, 'font.size': label_font_size}, style=style)

def set_ax_params(ax):
    ax.tick_params(direction='out', length=2, width=axis_line_width)
    ax.spines['bottom'].set_linewidth(axis_line_width)
    ax.spines['top'].set_linewidth(0)
    ax.spines['left'].set_linewidth(axis_line_width)
    ax.spines['right'].set_linewidth(0)
    ax.tick_params(labelsize=axis_font_size)
    ax.tick_params(axis='x', which='major', pad=2.0)
    ax.tick_params(axis='y', which='major', pad=2.0)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='right')
    ax.tick_params(labelsize=label_font_size)


u = SciUtil()

# Generate mutations on a "real" sequence
# Decided to choose tauD from Ecoli K12
parent_sequence_aa = 'MTIKEMPQPKTFGELKNLPLLNTDKPVQALMKIADELGEIFKFEAPGRVTRYLSSQRLIKEACDESRFDKNLSQALKFARDFAGDGLVTSWTHEKNWKKAHNILLPSFSQQAMKGYHAMMVDIAVQLVQKWERLNADEHIEVSEDMTRLTLDTIGLCGFNYRFNSFYRDQPHPFIISMVRALDEVMNKLQRANPDDPAYDENKRQFQEDIKVMNDLVDKIIADRKARGEQSDDLLTQMLNGKDPETGEPLDDGNIRYQIITFLIAGHEATSGLLSFALYFLVKNPHVLQKVAEEAARVLVDPVPSYKQVKQLKYVGMVLNEALRLWPTAPAFSLYAKEDTVLGGEYPLEKGDEVMVLIPQLHRDKTVWGDDVEEFRPERFENPSAIPQHAFKPFGNGQRASIGQQFALHEATLVLGMMLKHFDFEDHTNYELDIKETLTLKPKGFVVKAKSKKIPLGGIPSPSTLEHHHHHH*'
parent_sequence = 'ATGACAATTAAAGAAATGCCTCAGCCAAAAACGTTTGGAGAGCTTAAAAATTTACCGTTATTAAACACAGATAAACCGGTTCAAGCTTTGATGAAAATTGCGGATGAATTAGGAGAAATCTTTAAATTCGAGGCGCCTGGTCGTGTAACGCGCTACTTATCAAGTCAGCGTCTAATTAAAGAAGCATGCGATGAATCACGCTTTGATAAAAACTTAAGTCAAGCGCTGAAATTTGCACGTGATTTTGCAGGAGACGGGTTAGTCACAAGCTGGACGCATGAAAAAAATTGGAAAAAAGCGCATAATATCTTACTTCCAAGCTTTAGTCAGCAGGCAATGAAAGGCTATCATGCGATGATGGTCGATATCGCCGTGCAGCTTGTTCAAAAGTGGGAGCGTCTAAATGCAGATGAGCATATTGAAGTATCGGAAGACATGACACGTTTAACGCTTGATACAATTGGTCTTTGCGGCTTTAACTATCGCTTTAACAGCTTTTACCGAGATCAGCCTCATCCATTTATTATAAGTATGGTCCGTGCACTGGATGAAGTAATGAACAAGCTGCAGCGAGCAAATCCAGACGACCCAGCTTATGATGAAAACAAGCGCCAGTTTCAAGAAGATATCAAGGTGATGAACGACCTAGTAGATAAAATTATTGCAGATCGCAAAGCAAGGGGTGAACAAAGCGATGATTTATTAACGCAGATGCTAAACGGAAAAGATCCAGAAACGGGTGAGCCGCTTGATGACGGGAACATTCGCTATCAAATTATTACATTCTTAATTGCGGGACACGAAGCAACAAGTGGTCTTTTATCATTTGCGCTGTATTTCTTAGTGAAAAATCCACATGTATTACAAAAAGTAGCAGAAGAAGCAGCACGAGTTCTAGTAGATCCTGTTCCAAGCTACAAACAAGTCAAACAGCTTAAATATGTCGGCATGGTCTTAAACGAAGCGCTGCGCTTATGGCCAACTGCTCCTGCGTTTTCCCTATATGCAAAAGAAGATACGGTGCTTGGAGGAGAATATCCTTTAGAAAAAGGCGACGAAGTAATGGTTCTGATTCCTCAGCTTCACCGTGATAAAACAGTTTGGGGAGACGATGTGGAGGAGTTCCGTCCAGAGCGTTTTGAAAATCCAAGTGCGATTCCGCAGCATGCGTTTAAACCGTTTGGAAACGGTCAGCGTGCGTCTATCGGTCAGCAGTTCGCTCTTCATGAAGCAACGCTGGTACTTGGTATGATGCTAAAACACTTTGACTTTGAAGATCATACAAACTACGAGCTCGATATTAAAGAAACTTTAACGTTAAAACCTAAAGGCTTTGTGGTAAAAGCAAAATCGAAAAAAATTCCGCTTGGCGGTATTCCTTCACCTAGCACTCTCGAGCACCACCACCACCACCACTGA'


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


# Introduce mutations at a given frequency and an error rate

Test setup:

Change to 0.1 - 5% 0.2% step size. 

1. For number of mutations from 1 to the sequence length test mutating each one and correlate this to the p value
2. For sequencing error rates from 0 to 100% make sequences ranging with this and see what the results are for the p value and the error
3. For different sequence lengths also check how sequence length corresponds to the pvalue

In [2]:
from minION import *
from tqdm import tqdm

label = 'BM3-P411-CIS'

## Experiment 1: Varying the sequencing error rate for a single mutation

In [3]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1

experiment_df = pd.DataFrame()
for sequencing_error in range(0, 50, 5):
    sequencing_error_rate = sequencing_error/100.0
    run_df = make_experiment(f'SeqError_{sequencing_error}', read_depth, sequencing_error_rate, parent_sequence,
                             library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

# Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment1_SeqError_{label}.csv', index=False)

100%|███████████████████████████████████████████| 96/96 [01:05<00:00,  1.46it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:13<00:00,  1.30it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:23<00:00,  1.15it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:27<00:00,  1.10it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:38<00:00,  1.03s/it]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:37<00:00,  1.02s/it]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:43<00:00,  1.08s/it]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████

-----------------------------------------
-----------------------------------------


  variant_df.at[current_well, "frequency"] = frequency
  1%|▍                                           | 1/96 [00:01<01:53,  1.19s/it]

-----------------------------------------
-----------------------------------------


  2%|▉                                           | 2/96 [00:02<01:46,  1.14s/it]

-----------------------------------------
-----------------------------------------


  3%|█▍                                          | 3/96 [00:03<01:44,  1.12s/it]

-----------------------------------------
-----------------------------------------


  4%|█▊                                          | 4/96 [00:04<01:42,  1.12s/it]

-----------------------------------------
-----------------------------------------


  5%|██▎                                         | 5/96 [00:05<01:44,  1.15s/it]

-----------------------------------------
-----------------------------------------


  6%|██▊                                         | 6/96 [00:06<01:41,  1.13s/it]

-----------------------------------------
-----------------------------------------


  7%|███▏                                        | 7/96 [00:07<01:39,  1.12s/it]

-----------------------------------------
-----------------------------------------


  8%|███▋                                        | 8/96 [00:08<01:37,  1.11s/it]

-----------------------------------------
-----------------------------------------


  9%|████▏                                       | 9/96 [00:10<01:36,  1.11s/it]

-----------------------------------------
-----------------------------------------


 10%|████▍                                      | 10/96 [00:11<01:35,  1.10s/it]

-----------------------------------------
-----------------------------------------


 11%|████▉                                      | 11/96 [00:12<01:36,  1.14s/it]

-----------------------------------------
-----------------------------------------


 12%|█████▍                                     | 12/96 [00:13<01:34,  1.13s/it]

-----------------------------------------
-----------------------------------------


 14%|█████▊                                     | 13/96 [00:14<01:32,  1.12s/it]

-----------------------------------------
-----------------------------------------


 15%|██████▎                                    | 14/96 [00:15<01:31,  1.12s/it]

-----------------------------------------
-----------------------------------------


 16%|██████▋                                    | 15/96 [00:16<01:32,  1.14s/it]

-----------------------------------------
-----------------------------------------


 17%|███████▏                                   | 16/96 [00:18<01:30,  1.13s/it]

-----------------------------------------
-----------------------------------------


 18%|███████▌                                   | 17/96 [00:19<01:28,  1.12s/it]

-----------------------------------------
-----------------------------------------


 19%|████████                                   | 18/96 [00:20<01:26,  1.10s/it]

-----------------------------------------
-----------------------------------------


 20%|████████▌                                  | 19/96 [00:21<01:24,  1.10s/it]

-----------------------------------------
-----------------------------------------


 21%|████████▉                                  | 20/96 [00:22<01:25,  1.12s/it]

-----------------------------------------
-----------------------------------------


 22%|█████████▍                                 | 21/96 [00:23<01:23,  1.11s/it]

-----------------------------------------
-----------------------------------------


 23%|█████████▊                                 | 22/96 [00:24<01:20,  1.09s/it]

-----------------------------------------
-----------------------------------------


 24%|██████████▎                                | 23/96 [00:25<01:19,  1.09s/it]

-----------------------------------------
-----------------------------------------


 25%|██████████▊                                | 24/96 [00:26<01:18,  1.09s/it]

-----------------------------------------
-----------------------------------------


 26%|███████████▏                               | 25/96 [00:27<01:16,  1.08s/it]

-----------------------------------------
-----------------------------------------


 27%|███████████▋                               | 26/96 [00:28<01:17,  1.10s/it]

-----------------------------------------
-----------------------------------------


 28%|████████████                               | 27/96 [00:30<01:15,  1.09s/it]

-----------------------------------------
-----------------------------------------


 29%|████████████▌                              | 28/96 [00:31<01:13,  1.09s/it]

-----------------------------------------
-----------------------------------------


 30%|████████████▉                              | 29/96 [00:32<01:12,  1.08s/it]

-----------------------------------------
-----------------------------------------


 31%|█████████████▍                             | 30/96 [00:33<01:12,  1.10s/it]

-----------------------------------------
-----------------------------------------


 32%|█████████████▉                             | 31/96 [00:34<01:10,  1.08s/it]

-----------------------------------------
-----------------------------------------


 33%|██████████████▎                            | 32/96 [00:35<01:08,  1.06s/it]

-----------------------------------------
-----------------------------------------


 34%|██████████████▊                            | 33/96 [00:36<01:06,  1.06s/it]

-----------------------------------------
-----------------------------------------


 35%|███████████████▏                           | 34/96 [00:37<01:05,  1.05s/it]

-----------------------------------------
-----------------------------------------


 36%|███████████████▋                           | 35/96 [00:38<01:05,  1.07s/it]

-----------------------------------------
-----------------------------------------


 38%|████████████████▏                          | 36/96 [00:39<01:03,  1.06s/it]

-----------------------------------------
-----------------------------------------


 39%|████████████████▌                          | 37/96 [00:40<01:01,  1.05s/it]

-----------------------------------------
-----------------------------------------


 40%|█████████████████                          | 38/96 [00:41<01:04,  1.12s/it]

-----------------------------------------
-----------------------------------------


 41%|█████████████████▍                         | 39/96 [00:42<01:02,  1.10s/it]

-----------------------------------------
-----------------------------------------


 42%|█████████████████▉                         | 40/96 [00:44<01:02,  1.11s/it]

-----------------------------------------
-----------------------------------------


 43%|██████████████████▎                        | 41/96 [00:45<00:59,  1.09s/it]

-----------------------------------------
-----------------------------------------


 44%|██████████████████▊                        | 42/96 [00:46<00:57,  1.07s/it]

-----------------------------------------
-----------------------------------------


 45%|███████████████████▎                       | 43/96 [00:47<00:55,  1.06s/it]

-----------------------------------------
-----------------------------------------


 46%|███████████████████▋                       | 44/96 [00:48<00:58,  1.12s/it]

-----------------------------------------
-----------------------------------------


 47%|████████████████████▏                      | 45/96 [00:49<01:02,  1.22s/it]

-----------------------------------------
-----------------------------------------


 48%|████████████████████▌                      | 46/96 [00:51<01:08,  1.37s/it]

-----------------------------------------
-----------------------------------------


 49%|█████████████████████                      | 47/96 [00:53<01:08,  1.40s/it]

-----------------------------------------
-----------------------------------------


 50%|█████████████████████▌                     | 48/96 [00:54<01:08,  1.43s/it]

-----------------------------------------
-----------------------------------------


 51%|█████████████████████▉                     | 49/96 [00:56<01:08,  1.46s/it]

-----------------------------------------
-----------------------------------------


 52%|██████████████████████▍                    | 50/96 [00:57<01:09,  1.50s/it]

-----------------------------------------
-----------------------------------------


 53%|██████████████████████▊                    | 51/96 [00:59<01:07,  1.49s/it]

-----------------------------------------
-----------------------------------------


 54%|███████████████████████▎                   | 52/96 [01:00<01:05,  1.49s/it]

-----------------------------------------
-----------------------------------------


 55%|███████████████████████▋                   | 53/96 [01:02<01:03,  1.48s/it]

-----------------------------------------
-----------------------------------------


 56%|████████████████████████▏                  | 54/96 [01:03<01:01,  1.47s/it]

-----------------------------------------
-----------------------------------------


 57%|████████████████████████▋                  | 55/96 [01:05<01:00,  1.48s/it]

-----------------------------------------
-----------------------------------------


 58%|█████████████████████████                  | 56/96 [01:06<00:57,  1.44s/it]

-----------------------------------------
-----------------------------------------


 59%|█████████████████████████▌                 | 57/96 [01:07<00:55,  1.42s/it]

-----------------------------------------
-----------------------------------------


 60%|█████████████████████████▉                 | 58/96 [01:09<00:52,  1.38s/it]

-----------------------------------------
-----------------------------------------


 61%|██████████████████████████▍                | 59/96 [01:10<00:49,  1.35s/it]

-----------------------------------------
-----------------------------------------


 62%|██████████████████████████▉                | 60/96 [01:11<00:48,  1.35s/it]

-----------------------------------------
-----------------------------------------


 64%|███████████████████████████▎               | 61/96 [01:12<00:46,  1.32s/it]

-----------------------------------------
-----------------------------------------


 65%|███████████████████████████▊               | 62/96 [01:14<00:44,  1.30s/it]

-----------------------------------------
-----------------------------------------


 66%|████████████████████████████▏              | 63/96 [01:15<00:41,  1.27s/it]

-----------------------------------------
-----------------------------------------


 67%|████████████████████████████▋              | 64/96 [01:16<00:39,  1.25s/it]

-----------------------------------------
-----------------------------------------


 68%|█████████████████████████████              | 65/96 [01:17<00:38,  1.23s/it]

-----------------------------------------
-----------------------------------------


 69%|█████████████████████████████▌             | 66/96 [01:19<00:37,  1.24s/it]

-----------------------------------------
-----------------------------------------


 70%|██████████████████████████████             | 67/96 [01:20<00:35,  1.21s/it]

-----------------------------------------
-----------------------------------------


 71%|██████████████████████████████▍            | 68/96 [01:21<00:33,  1.19s/it]

-----------------------------------------
-----------------------------------------


 72%|██████████████████████████████▉            | 69/96 [01:22<00:31,  1.16s/it]

-----------------------------------------
-----------------------------------------


 73%|███████████████████████████████▎           | 70/96 [01:23<00:30,  1.17s/it]

-----------------------------------------
-----------------------------------------


 74%|███████████████████████████████▊           | 71/96 [01:24<00:28,  1.15s/it]

-----------------------------------------
-----------------------------------------


 75%|████████████████████████████████▎          | 72/96 [01:25<00:27,  1.13s/it]

-----------------------------------------
-----------------------------------------


 76%|████████████████████████████████▋          | 73/96 [01:26<00:25,  1.11s/it]

-----------------------------------------
-----------------------------------------


 77%|█████████████████████████████████▏         | 74/96 [01:27<00:24,  1.10s/it]

-----------------------------------------
-----------------------------------------


 78%|█████████████████████████████████▌         | 75/96 [01:29<00:23,  1.12s/it]

-----------------------------------------
-----------------------------------------


 79%|██████████████████████████████████         | 76/96 [01:30<00:21,  1.10s/it]

-----------------------------------------
-----------------------------------------


 80%|██████████████████████████████████▍        | 77/96 [01:31<00:20,  1.09s/it]

-----------------------------------------
-----------------------------------------


 81%|██████████████████████████████████▉        | 78/96 [01:32<00:19,  1.08s/it]

-----------------------------------------
-----------------------------------------


 82%|███████████████████████████████████▍       | 79/96 [01:33<00:18,  1.07s/it]

-----------------------------------------
-----------------------------------------


 83%|███████████████████████████████████▊       | 80/96 [01:34<00:17,  1.09s/it]

-----------------------------------------
-----------------------------------------


 84%|████████████████████████████████████▎      | 81/96 [01:35<00:16,  1.08s/it]

-----------------------------------------
-----------------------------------------


 85%|████████████████████████████████████▋      | 82/96 [01:36<00:14,  1.07s/it]

-----------------------------------------
-----------------------------------------


 86%|█████████████████████████████████████▏     | 83/96 [01:37<00:13,  1.06s/it]

-----------------------------------------
-----------------------------------------


 88%|█████████████████████████████████████▋     | 84/96 [01:38<00:12,  1.05s/it]

-----------------------------------------
-----------------------------------------


 89%|██████████████████████████████████████     | 85/96 [01:39<00:11,  1.05s/it]

-----------------------------------------
-----------------------------------------


 90%|██████████████████████████████████████▌    | 86/96 [01:40<00:10,  1.07s/it]

-----------------------------------------
-----------------------------------------


 91%|██████████████████████████████████████▉    | 87/96 [01:41<00:09,  1.05s/it]

-----------------------------------------
-----------------------------------------


 92%|███████████████████████████████████████▍   | 88/96 [01:42<00:08,  1.04s/it]

-----------------------------------------
-----------------------------------------


 93%|███████████████████████████████████████▊   | 89/96 [01:43<00:07,  1.03s/it]

-----------------------------------------
-----------------------------------------


 94%|████████████████████████████████████████▎  | 90/96 [01:44<00:06,  1.03s/it]

-----------------------------------------
-----------------------------------------


 95%|████████████████████████████████████████▊  | 91/96 [01:45<00:05,  1.05s/it]

-----------------------------------------
-----------------------------------------


 96%|█████████████████████████████████████████▏ | 92/96 [01:46<00:04,  1.04s/it]

-----------------------------------------
-----------------------------------------


 97%|█████████████████████████████████████████▋ | 93/96 [01:47<00:03,  1.03s/it]

-----------------------------------------
-----------------------------------------


 98%|██████████████████████████████████████████ | 94/96 [01:48<00:02,  1.02s/it]

-----------------------------------------
-----------------------------------------


 99%|██████████████████████████████████████████▌| 95/96 [01:49<00:01,  1.02s/it]

-----------------------------------------
-----------------------------------------


100%|███████████████████████████████████████████| 96/96 [01:51<00:00,  1.16s/it]
  0%|                                                    | 0/96 [00:00<?, ?it/s]

-----------------------------------------
-----------------------------------------


  variant_df.at[current_well, "frequency"] = frequency
  1%|▍                                           | 1/96 [00:01<01:36,  1.02s/it]

-----------------------------------------
-----------------------------------------


  2%|▉                                           | 2/96 [00:02<01:35,  1.01s/it]

-----------------------------------------
-----------------------------------------


  3%|█▍                                          | 3/96 [00:03<01:34,  1.02s/it]

-----------------------------------------
-----------------------------------------


  4%|█▊                                          | 4/96 [00:04<01:33,  1.02s/it]

-----------------------------------------
-----------------------------------------


  5%|██▎                                         | 5/96 [00:05<01:32,  1.02s/it]

-----------------------------------------
-----------------------------------------


  6%|██▊                                         | 6/96 [00:06<01:34,  1.05s/it]

-----------------------------------------
-----------------------------------------


  7%|███▏                                        | 7/96 [00:07<01:32,  1.04s/it]

-----------------------------------------
-----------------------------------------


  8%|███▋                                        | 8/96 [00:08<01:31,  1.04s/it]

-----------------------------------------
-----------------------------------------


  9%|████▏                                       | 9/96 [00:09<01:30,  1.05s/it]

-----------------------------------------
-----------------------------------------


 10%|████▍                                      | 10/96 [00:10<01:32,  1.08s/it]

-----------------------------------------
-----------------------------------------


 11%|████▉                                      | 11/96 [00:11<01:31,  1.08s/it]

-----------------------------------------
-----------------------------------------


 12%|█████▍                                     | 12/96 [00:12<01:36,  1.15s/it]

-----------------------------------------
-----------------------------------------


 14%|█████▊                                     | 13/96 [00:14<01:35,  1.15s/it]

-----------------------------------------
-----------------------------------------


 15%|██████▎                                    | 14/96 [00:15<01:34,  1.15s/it]

-----------------------------------------
-----------------------------------------


 16%|██████▋                                    | 15/96 [00:16<01:36,  1.19s/it]

-----------------------------------------
-----------------------------------------


 17%|███████▏                                   | 16/96 [00:17<01:35,  1.20s/it]

-----------------------------------------
-----------------------------------------


 18%|███████▌                                   | 17/96 [00:18<01:34,  1.20s/it]

-----------------------------------------
-----------------------------------------


 19%|████████                                   | 18/96 [00:20<01:33,  1.20s/it]

-----------------------------------------
-----------------------------------------


 20%|████████▌                                  | 19/96 [00:21<01:36,  1.26s/it]

-----------------------------------------
-----------------------------------------


 21%|████████▉                                  | 20/96 [00:22<01:39,  1.31s/it]

-----------------------------------------
-----------------------------------------


 22%|█████████▍                                 | 21/96 [00:24<01:36,  1.29s/it]

-----------------------------------------
-----------------------------------------


 23%|█████████▊                                 | 22/96 [00:25<01:33,  1.27s/it]

-----------------------------------------
-----------------------------------------


 24%|██████████▎                                | 23/96 [00:26<01:32,  1.26s/it]

-----------------------------------------
-----------------------------------------


 25%|██████████▊                                | 24/96 [00:27<01:30,  1.25s/it]

-----------------------------------------
-----------------------------------------


 26%|███████████▏                               | 25/96 [00:29<01:30,  1.27s/it]

-----------------------------------------
-----------------------------------------


 27%|███████████▋                               | 26/96 [00:30<01:28,  1.26s/it]

-----------------------------------------
-----------------------------------------


 28%|████████████                               | 27/96 [00:31<01:26,  1.25s/it]

-----------------------------------------
-----------------------------------------


 29%|████████████▌                              | 28/96 [00:32<01:24,  1.25s/it]

-----------------------------------------
-----------------------------------------


 30%|████████████▉                              | 29/96 [00:34<01:23,  1.24s/it]

-----------------------------------------
-----------------------------------------


 31%|█████████████▍                             | 30/96 [00:35<01:23,  1.27s/it]

-----------------------------------------
-----------------------------------------


 32%|█████████████▉                             | 31/96 [00:36<01:21,  1.26s/it]

-----------------------------------------
-----------------------------------------


 33%|██████████████▎                            | 32/96 [00:37<01:20,  1.25s/it]

-----------------------------------------
-----------------------------------------


 34%|██████████████▊                            | 33/96 [00:39<01:18,  1.25s/it]

-----------------------------------------
-----------------------------------------


 35%|███████████████▏                           | 34/96 [00:40<01:17,  1.25s/it]

-----------------------------------------
-----------------------------------------


 36%|███████████████▋                           | 35/96 [00:41<01:17,  1.28s/it]

-----------------------------------------
-----------------------------------------


 38%|████████████████▏                          | 36/96 [00:42<01:16,  1.27s/it]

-----------------------------------------
-----------------------------------------


 39%|████████████████▌                          | 37/96 [00:44<01:14,  1.25s/it]

-----------------------------------------
-----------------------------------------


 40%|█████████████████                          | 38/96 [00:45<01:12,  1.25s/it]

-----------------------------------------
-----------------------------------------


 41%|█████████████████▍                         | 39/96 [00:46<01:10,  1.24s/it]

-----------------------------------------
-----------------------------------------


 42%|█████████████████▉                         | 40/96 [00:47<01:11,  1.27s/it]

-----------------------------------------
-----------------------------------------


 43%|██████████████████▎                        | 41/96 [00:49<01:10,  1.28s/it]

-----------------------------------------
-----------------------------------------


 44%|██████████████████▊                        | 42/96 [00:50<01:08,  1.27s/it]

-----------------------------------------
-----------------------------------------


 45%|███████████████████▎                       | 43/96 [00:51<01:06,  1.26s/it]

-----------------------------------------
-----------------------------------------


 46%|███████████████████▋                       | 44/96 [00:53<01:05,  1.26s/it]

-----------------------------------------
-----------------------------------------


 47%|████████████████████▏                      | 45/96 [00:54<01:05,  1.29s/it]

-----------------------------------------
-----------------------------------------


 48%|████████████████████▌                      | 46/96 [00:55<01:03,  1.28s/it]

-----------------------------------------
-----------------------------------------


 49%|█████████████████████                      | 47/96 [00:56<01:02,  1.27s/it]

-----------------------------------------
-----------------------------------------


 50%|█████████████████████▌                     | 48/96 [00:58<01:00,  1.26s/it]

-----------------------------------------
-----------------------------------------


 51%|█████████████████████▉                     | 49/96 [00:59<00:58,  1.25s/it]

-----------------------------------------
-----------------------------------------


 52%|██████████████████████▍                    | 50/96 [01:00<00:59,  1.28s/it]

-----------------------------------------
-----------------------------------------


 53%|██████████████████████▊                    | 51/96 [01:01<00:57,  1.27s/it]

-----------------------------------------
-----------------------------------------


 54%|███████████████████████▎                   | 52/96 [01:03<00:55,  1.26s/it]

-----------------------------------------
-----------------------------------------


 55%|███████████████████████▋                   | 53/96 [01:04<00:53,  1.25s/it]

-----------------------------------------
-----------------------------------------


 56%|████████████████████████▏                  | 54/96 [01:05<00:52,  1.25s/it]

-----------------------------------------
-----------------------------------------


 57%|████████████████████████▋                  | 55/96 [01:06<00:52,  1.27s/it]

-----------------------------------------
-----------------------------------------


 58%|█████████████████████████                  | 56/96 [01:08<00:50,  1.26s/it]

-----------------------------------------
-----------------------------------------


 59%|█████████████████████████▌                 | 57/96 [01:09<00:48,  1.26s/it]

-----------------------------------------
-----------------------------------------


 60%|█████████████████████████▉                 | 58/96 [01:10<00:47,  1.25s/it]

-----------------------------------------
-----------------------------------------


 61%|██████████████████████████▍                | 59/96 [01:11<00:45,  1.24s/it]

-----------------------------------------
-----------------------------------------


 62%|██████████████████████████▉                | 60/96 [01:13<00:44,  1.24s/it]

-----------------------------------------
-----------------------------------------


 64%|███████████████████████████▎               | 61/96 [01:14<00:44,  1.27s/it]

-----------------------------------------
-----------------------------------------


 65%|███████████████████████████▊               | 62/96 [01:15<00:42,  1.25s/it]

-----------------------------------------
-----------------------------------------


 66%|████████████████████████████▏              | 63/96 [01:16<00:41,  1.25s/it]

-----------------------------------------
-----------------------------------------


 67%|████████████████████████████▋              | 64/96 [01:18<00:41,  1.28s/it]

-----------------------------------------
-----------------------------------------


 68%|█████████████████████████████              | 65/96 [01:19<00:40,  1.30s/it]

-----------------------------------------
-----------------------------------------


 69%|█████████████████████████████▌             | 66/96 [01:20<00:38,  1.28s/it]

-----------------------------------------
-----------------------------------------


 70%|██████████████████████████████             | 67/96 [01:22<00:36,  1.27s/it]

-----------------------------------------
-----------------------------------------


 71%|██████████████████████████████▍            | 68/96 [01:23<00:35,  1.26s/it]

-----------------------------------------
-----------------------------------------


 72%|██████████████████████████████▉            | 69/96 [01:24<00:33,  1.25s/it]

-----------------------------------------
-----------------------------------------


 73%|███████████████████████████████▎           | 70/96 [01:25<00:33,  1.28s/it]

-----------------------------------------
-----------------------------------------


 74%|███████████████████████████████▊           | 71/96 [01:27<00:31,  1.26s/it]

-----------------------------------------
-----------------------------------------


 75%|████████████████████████████████▎          | 72/96 [01:28<00:30,  1.25s/it]

-----------------------------------------
-----------------------------------------


 76%|████████████████████████████████▋          | 73/96 [01:29<00:28,  1.25s/it]

-----------------------------------------
-----------------------------------------


 77%|█████████████████████████████████▏         | 74/96 [01:30<00:27,  1.23s/it]

-----------------------------------------
-----------------------------------------


 78%|█████████████████████████████████▌         | 75/96 [01:32<00:26,  1.25s/it]

-----------------------------------------
-----------------------------------------


 79%|██████████████████████████████████         | 76/96 [01:33<00:24,  1.23s/it]

-----------------------------------------
-----------------------------------------


 80%|██████████████████████████████████▍        | 77/96 [01:34<00:23,  1.22s/it]

-----------------------------------------
-----------------------------------------


 81%|██████████████████████████████████▉        | 78/96 [01:35<00:21,  1.21s/it]

-----------------------------------------
-----------------------------------------


 82%|███████████████████████████████████▍       | 79/96 [01:36<00:20,  1.21s/it]

-----------------------------------------
-----------------------------------------


 83%|███████████████████████████████████▊       | 80/96 [01:38<00:19,  1.20s/it]

-----------------------------------------
-----------------------------------------


 84%|████████████████████████████████████▎      | 81/96 [01:39<00:18,  1.23s/it]

-----------------------------------------
-----------------------------------------


 85%|████████████████████████████████████▋      | 82/96 [01:40<00:17,  1.22s/it]

-----------------------------------------
-----------------------------------------


 86%|█████████████████████████████████████▏     | 83/96 [01:41<00:15,  1.21s/it]

-----------------------------------------
-----------------------------------------


 88%|█████████████████████████████████████▋     | 84/96 [01:42<00:14,  1.20s/it]

-----------------------------------------
-----------------------------------------


 89%|██████████████████████████████████████     | 85/96 [01:44<00:13,  1.22s/it]

-----------------------------------------
-----------------------------------------


 90%|██████████████████████████████████████▌    | 86/96 [01:45<00:12,  1.21s/it]

-----------------------------------------
-----------------------------------------


 91%|██████████████████████████████████████▉    | 87/96 [01:46<00:10,  1.19s/it]

-----------------------------------------
-----------------------------------------


 92%|███████████████████████████████████████▍   | 88/96 [01:47<00:09,  1.17s/it]

-----------------------------------------
-----------------------------------------


 93%|███████████████████████████████████████▊   | 89/96 [01:48<00:08,  1.16s/it]

-----------------------------------------
-----------------------------------------


 94%|████████████████████████████████████████▎  | 90/96 [01:50<00:07,  1.18s/it]

-----------------------------------------
-----------------------------------------


 95%|████████████████████████████████████████▊  | 91/96 [01:51<00:05,  1.16s/it]

-----------------------------------------
-----------------------------------------


 96%|█████████████████████████████████████████▏ | 92/96 [01:52<00:04,  1.15s/it]

-----------------------------------------
-----------------------------------------


 97%|█████████████████████████████████████████▋ | 93/96 [01:53<00:03,  1.14s/it]

-----------------------------------------
-----------------------------------------


 98%|██████████████████████████████████████████ | 94/96 [01:54<00:02,  1.13s/it]

-----------------------------------------
-----------------------------------------


 99%|██████████████████████████████████████████▌| 95/96 [01:55<00:01,  1.15s/it]

-----------------------------------------
-----------------------------------------


100%|███████████████████████████████████████████| 96/96 [01:56<00:00,  1.22s/it]


## Experiment 2: varying read depth and it's effect on significance

In [3]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1

experiment_df = pd.DataFrame()
for read_depth in range(1, 10, 1):
    run_df = make_experiment(f'ReadDepth_{read_depth}', read_depth, sequencing_error, parent_sequence, 
                     library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

for read_depth in range(10, 100, 5):
    run_df = make_experiment(f'ReadDepth_{read_depth}', read_depth, sequencing_error, parent_sequence, 
                     library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

 # Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment2_ReadDepth_{label}.csv', index=False)

100%|███████████████████████████████████████████| 96/96 [01:07<00:00,  1.42it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:09<00:00,  1.38it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:14<00:00,  1.30it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:13<00:00,  1.30it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:13<00:00,  1.30it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:21<00:00,  1.18it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████| 96/96 [01:18<00:00,  1.22it/s]
  variant_df.at[current_well, "frequency"] = frequency
100%|███████████████████████████████████████████

In [None]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1

experiment_df = pd.DataFrame()
for read_depth in range(5, 100, 5):
    run_df = make_experiment(f'ReadDepth_{read_depth}', read_depth, sequencing_error, parent_sequence, 
                     library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

 # Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment2_ReadDepth_{label}.csv', index=False)

In [None]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1

experiment_df = pd.DataFrame()
for read_depth in range(5, 100, 5):
    run_df = make_experiment(f'ReadDepth_{read_depth}', read_depth, sequencing_error, parent_sequence, 
                     library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

 # Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment2_ReadDepth_{label}.csv', index=False)

## Experiment 3: effect of sequence length on significance


In [None]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1


experiment_df = pd.DataFrame()
for seq_len in range(5, 2000, 50):
    if seq_len >= len(parent_sequence):
        break
    run_df = make_experiment(f'SeqLen_{seq_len}', read_depth, sequencing_error, parent_sequence[:seq_len*3],
             library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

# Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment3_SeqLen_{label}.csv', index=False)


## Experiment 4: effect of frequency cutoff

In [None]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1

experiment_df = pd.DataFrame()
for frequency_cutoff in range(5, 100, 10):
    run_df = make_experiment(f'FreqCutoff_{frequency_cutoff}', read_depth, sequencing_error, parent_sequence, library_number, number_of_wells, epcr_mutation_rate, frequency_cutoff/100.0)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

 # Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment4_Freq_{label}.csv', index=False)

## Experiment 5: ePCR mutation rate

In [None]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1


experiment_df = pd.DataFrame()
for epcr_mutation_rate in range(1, 20, 2):
    run_df = make_experiment(f'ePCR_{epcr_mutation_rate}', read_depth, sequencing_error, parent_sequence, library_number, number_of_wells, 
                             epcr_mutation_rate/1000.0, frequency_cutoff)
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

 # Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment5_mutationRate_{label}.csv', index=False)


## Experiment 6: mixed well rates


In [None]:
! pip install /Users/ariane/Documents/code/MinION/dist/minION-0.1.0.tar.gz

In [None]:
# We're going to make an experiment of 10 plates with different sequencing error rates
read_depth = 25
number_of_wells = 96
epcr_mutation_rate = 0.02
frequency_cutoff = 0.5
library_number = 96 # Usually do a 96 well plate
verbose = False
sequencing_error = 0.1
experiment_df = pd.DataFrame()
number_wells_to_mix = 20
for mixture_rate in range(10, 90, 10):
    run_df = make_experiment(f'mixedWells_{mixture_rate}', read_depth, sequencing_error, parent_sequence, library_number, number_of_wells, 
                             epcr_mutation_rate, frequency_cutoff, number_wells_to_mix, mixture_rate/100.0, 
                             qc_files_path='qc_data_BM3/')
    run_df.reset_index(inplace=True)
    experiment_df = pd.concat([experiment_df, run_df])

 # Also plot each one
experiment_df.to_csv(f'{data_dir}Experiment6_mixedWell_{label}.csv', index=False)
