## 1. Load Required Libraries
We'll begin by loading the necessary libraries and any custom functions provided in the `run_evSeq.py` file.


In [1]:
# Load necessary libraries
import os
import sys
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import pandas as pd
from importlib import resources
import subprocess
from Bio import SeqIO
import tqdm
import re
import gzip
import shutil

# Add the path to the levseq directory to the system path
sys.path.append('../levseq')

# Import custom functions from the provided script
from run_levseq import *
result_folder = os.getcwd()

## 2. Define Run Location
We'll specify the location of the sequencing run data. This is also within the same directory structure.


In [2]:
os.getcwd()

'/Users/JLoong8/git/LevSeq/example'

In [3]:
# Define the path to the run data
run_location = '/Users/JLoong8/git/LevSeq/example/sample_data/parlq_ep1'
ref_df = pd.read_csv('/Users/JLoong8/git/LevSeq/example/sample_data/parlq_ep1/20240422-YL-ParLQ-ep1.csv')
name = 'Test-ep1'

## 3. Demultiplexing and variant calling
Demultiplexing is the process of separating out individual samples from a multiplexed sequencing run. We'll use the `demux_fastq` function from the custom script to perform this step.


In [4]:
# Create empty variant df
result_folder = os.path.join(result_folder, name)
variant_df = pd.DataFrame(columns=["barcode_plate", "name", "refseq", "variant"])

for i, row in ref_df.iterrows():
    barcode_plate = row["barcode_plate"]
    name = row["name"]
    refseq = row["refseq"].upper()

    # Create a subfolder for the current iteration using the name value
    name_folder = os.path.join(result_folder, name)
    os.makedirs(name_folder, exist_ok=True)

    # Write the refseq to a temporary fasta file
    temp_fasta_path = os.path.join(name_folder, f"temp_{name}.fasta")
    with open(temp_fasta_path, "w") as f:
        f.write(f">{name}\n{refseq}\n")
    # Create filtered barcode path
    f_min = 1
    f_max = 96
    rbc = i+1
    front_prefix = "NB"
    back_prefix = "RB"
    barcode_path = "../levseq/barcoding/minion_barcodes.fasta"
    barcode_path_filter = os.path.join(name_folder, "minion_barcodes_filtered.fasta")
    filter_barcodes(
        barcode_path,
        barcode_path_filter,
        (f_min, f_max),
        rbc,
        front_prefix,
        back_prefix,)
    
    # Perform demultiplexing
    demux_fastq(run_location, name_folder, barcode_path_filter)
    
    variant_result = call_variant(f"{name}", name_folder, temp_fasta_path, barcode_path_filter)
    variant_result["barcode_plate"] = barcode_plate
    variant_result["name"] = name
    variant_result["refseq"] = refseq
    variant_df = pd.concat([variant_df, variant_result])
variant_df.to_csv(variant_csv_path, index=False)

/opt/anaconda3/envs/LevSeq/lib/python3.8/site-packages/levseq/barcoding/demultiplex
Processed argument: -f with value: /Users/JLoong8/git/LevSeq/example/sample_data/parlq_ep1
Processed argument: -d with value: /Users/JLoong8/git/LevSeq/example/Test-ep1/300-1
Processed argument: -b with value: /Users/JLoong8/git/LevSeq/example/Test-ep1/300-1/minion_barcodes_filtered.fasta
Processed argument: -w with value: 100
Processed argument: -r with value: 100
Processed argument: -m with value: 800
Processed argument: -x with value: 5000
Number of files: 2
Processing files: [#########################                         ] 50%


  0%|                                                                          | 0/9 [00:00<?, ?it/s]

  0%|                                                                          | 0/9 [00:00<?, ?it/s][A[A
  0%|                                                                          | 0/9 [00:00<?, ?it/s][A




  0%|                                                                          | 0/9 [00:00<?, ?it/s][A[A[A[A[A


  0%|                                                                          | 0/9 [00:00<?, ?it/s][A[A[A





Writing MSAWriting MSA

Writing MSA
Writing MSA
Writing MSA
Writing MSA


  0%|                                                                          | 0/9 [00:00<?, ?it/s][A[A[A[A






  0%|                                                                          | 0/9 [00:00<?, ?it/s][A[A[A[A[A[A[A







  0%|                                                                          | 0/9 [00:00<?, ?it/s][A[A[A[A[A[A[A[A







Writing MSA
Writing MSA
Writing MSA


  0%|                                                                          | 0/9 [00:00<?, ?it/s][A[A[A[A[A[A








  0%|                                                                          | 0/9 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Writing MSA
Writing MSA







 11%|███████▎                                                          | 1/9 [00:01<00:10,  1.25s/it][A[A[A[A[A

 11%|███████▎                                                          | 1/9 [00:01<00:10,  1.28s/it][A[A


 11%|███████▎                                                          | 1/9 [00:01<00:11,  1.43s/it][A[A[A








Writing MSA
Writing MSA
Writing MSA


 11%|███████▎                                                          | 1/9 [00:01<00:10,  1.30s/it][A[A[A[A[A[A[A



 11%|███████▎                                                          | 1/9 [00:01<00:11,  1.40s/it][A[A[A[A







 11%|███████▎                                                          | 1/9 [00:01<00:09,  1.21s/it][A[A[A[A[A[A[A[A
 33%|██████████████████████                                            | 3/9 [00:01<00:03,  1.81it/s][A

Writing MSA
Writing MSA
Writing MSA








 11%|███████▎                                                          | 1/9 [00:01<00:10,  1.33s/it][A[A[A[A[A[A








 11%|███████▎                                                          | 1/9 [00:01<00:10,  1.26s/it][A[A[A[A[A[A[A[A[A

Writing MSA
Writing MSA
Writing MSA
Writing MSA







 22%|██████████████▋                                                   | 2/9 [00:02<00:10,  1.45s/it][A[A[A[A[A


 22%|██████████████▋                                                   | 2/9 [00:02<00:10,  1.50s/it][A[A[A



 22%|██████████████▋                                                   | 2/9 [00:02<00:09,  1.40s/it][A[A[A[A

 22%|██████████████▋                                                   | 2/9 [00:03<00:10,  1.55s/it]

Writing MSA
Writing MSA


[A[A







 22%|██████████████▋                                                   | 2/9 [00:02<00:09,  1.41s/it][A[A[A[A[A[A[A[A






 44%|█████████████████████████████▎                                    | 4/9 [00:03<00:04,  1.12it/s][A[A[A[A[A[A[A
 22%|██████████████▋                                                   | 2/9 [00:03<00:11,  1.64s/it]

Writing MSA
Writing MSA
Writing MSA
Writing MSA
Writing MSA


[A





 22%|██████████████▋                                                   | 2/9 [00:02<00:09,  1.42s/it][A[A[A[A[A[A






 33%|██████████████████████                                            | 3/9 [00:03<00:05,  1.15it/s][A[A[A[A[A[A[A








 22%|██████████████▋                                                   | 2/9 [00:02<00:09,  1.39s/it][A[A[A[A[A[A[A[A[A

Writing MSA
Writing MSA
Writing MSA
Writing MSA







 33%|██████████████████████                                            | 3/9 [00:04<00:08,  1.38s/it][A[A[A[A[A




 44%|█████████████████████████████▎                                    | 4/9 [00:04<00:04,  1.10it/s][A[A[A[A[A


 33%|██████████████████████                                            | 3/9 [00:04<00:08,  1.48s/it]

Writing MSA
Writing MSA


[A[A[A







 33%|██████████████████████                                            | 3/9 [00:04<00:08,  1.40s/it][A[A[A[A[A[A[A[A

 33%|██████████████████████                                            | 3/9 [00:04<00:09,  1.54s/it][A[A





Writing MSA
Writing MSA
Writing MSA


 33%|██████████████████████                                            | 3/9 [00:04<00:09,  1.57s/it][A[A[A[A
 33%|██████████████████████                                            | 3/9 [00:04<00:09,  1.65s/it][A





 33%|██████████████████████                                            | 3/9 [00:04<00:09,  1.53s/it]

Writing MSA
Writing MSA


 56%|████████████████████████████████████▋                             | 5/9 [00:05<00:04,  1.17s/it]






 44%|█████████████████████████████▎                                    | 4/9 [00:04<00:06,  1.25s/it][A[A[A[A[A[A[A








 33%|██████████████████████                                            | 3/9 [00:04<00:09,  1.66s/it][A[A[A[A[A[A[A[A[A

Writing MSA
Writing MSA
Writing MSA
Writing MSA







 56%|████████████████████████████████████▋                             | 5/9 [00:05<00:04,  1.09s/it][A[A[A[A[A


 44%|█████████████████████████████▎                                    | 4/9 [00:05<00:07,  1.45s/it][A[A[A

 44%|█████████████████████████████▎                                    | 4/9 [00:06<00:07,  1.50s/it][A[A







 44%|█████████████████████████████▎                                    | 4/9 [00:05<00:07,  1.43s/it][A[A[A[A[A[A[A[A

Writing MSA
Writing MSA
Writing MSA
Writing MSA






 44%|█████████████████████████████▎                                    | 4/9 [00:06<00:08,  1.65s/it][A[A[A[A





 44%|█████████████████████████████▎                                    | 4/9 [00:06<00:07,  1.55s/it][A[A[A[A[A[A
 44%|█████████████████████████████▎                                    | 4/9 [00:06<00:08,  1.66s/it][A








 44%|█████████████████████████████▎                                    | 4/9 [00:06<00:07,  1.51s/it]

Writing MSA
Writing MSA
Writing MSA


 67%|████████████████████████████████████████████                      | 6/9 [00:06<00:04,  1.34s/it]






 56%|████████████████████████████████████▋                             | 5/9 [00:06<00:05,  1.42s/it][A[A[A[A[A[A[A



Writing MSA
Writing MSA
Writing MSA


 56%|████████████████████████████████████▋                             | 5/9 [00:07<00:05,  1.40s/it][A[A




 67%|████████████████████████████████████████████                      | 6/9 [00:07<00:03,  1.22s/it][A[A[A[A[A

Writing MSA
Writing MSA





 56%|████████████████████████████████████▋                             | 5/9 [00:07<00:06,  1.58s/it][A[A[A







 56%|████████████████████████████████████▋                             | 5/9 [00:07<00:06,  1.52s/it][A[A[A[A[A[A[A[A

Writing MSA
Writing MSA






 56%|████████████████████████████████████▋                             | 5/9 [00:07<00:06,  1.61s/it][A[A[A[A





 56%|████████████████████████████████████▋                             | 5/9 [00:07<00:06,  1.57s/it][A[A[A[A[A[A

Writing MSA
Writing MSA











 56%|████████████████████████████████████▋                             | 5/9 [00:07<00:06,  1.62s/it][A[A[A[A[A[A[A[A[A
 78%|███████████████████████████████████████████████████▎              | 7/9 [00:08<00:02,  1.49s/it][A






 67%|████████████████████████████████████████████                      | 6/9 [00:08<00:04,  1.54s/it]

Writing MSA
Writing MSA
Writing MSA


 89%|██████████████████████████████████████████████████████████▋       | 8/9 [00:08<00:01,  1.08s/it]

 67%|████████████████████████████████████████████                      | 6/9 [00:08<00:04,  1.50s/it][A[A






 78%|███████████████████████████████████████████████████▎              | 7/9 [00:08<00:02,  1.13s/it][A[A[A[A[A[A[A

Writing MSAWriting MSA

Writing MSA







 78%|███████████████████████████████████████████████████▎              | 7/9 [00:09<00:02,  1.43s/it][A[A[A[A[A







 67%|████████████████████████████████████████████                      | 6/9 [00:08<00:04,  1.52s/it][A[A[A[A[A[A[A[A

Writing MSA
Writing MSA






 67%|████████████████████████████████████████████                      | 6/9 [00:09<00:04,  1.51s/it][A[A[A[A


 67%|████████████████████████████████████████████                      | 6/9 [00:09<00:05,  1.67s/it][A[A[A

Writing MSA
Writing MSA








 67%|████████████████████████████████████████████                      | 6/9 [00:09<00:04,  1.58s/it][A[A[A[A[A[A

Writing MSA
Writing MSA











100%|██████████████████████████████████████████████████████████████████| 9/9 [00:10<00:00,  1.20s/it][A[A[A[A[A[A[A[A[A
 67%|████████████████████████████████████████████                      | 6/9 [00:10<00:05,  1.74s/it][A

Writing MSA




100%|██████████████████████████████████████████████████████████████████| 9/9 [00:10<00:00,  1.16s/it][A[A


Writing MSA
Writing MSA







  0%|                                                                          | 0/6 [00:00<?, ?it/s][A[A[A[A[A






 89%|██████████████████████████████████████████████████████████▋       | 8/9 [00:10<00:01,  1.28s/it][A[A[A[A[A[A[A







 78%|███████████████████████████████████████████████████▎              | 7/9 [00:10<00:03,  1.53s/it][A[A[A[A[A[A[A[A

Writing MSA
Writing MSA
Writing MSA






 78%|███████████████████████████████████████████████████▎              | 7/9 [00:10<00:03,  1.50s/it][A[A[A[A

Writing MSA
Writing MSA








 78%|███████████████████████████████████████████████████▎              | 7/9 [00:10<00:03,  1.53s/it][A[A[A[A[A[A








 78%|███████████████████████████████████████████████████▎              | 7/9 [00:10<00:03,  1.52s/it][A[A[A[A[A[A[A[A[A

Writing MSA
Writing MSA





 78%|███████████████████████████████████████████████████▎              | 7/9 [00:11<00:03,  1.80s/it][A[A[A
 78%|███████████████████████████████████████████████████▎              | 7/9 [00:11<00:03,  1.64s/it][A

Writing MSA
Writing MSA







100%|██████████████████████████████████████████████████████████████████| 9/9 [00:12<00:00,  1.46s/it][A[A[A[A[A






100%|██████████████████████████████████████████████████████████████████| 9/9 [00:11<00:00,  1.32s/it][A[A[A[A[A[A[A
100%|██████████████████████████████████████████████████████████████████| 9/9 [00:12<00:00,  1.36s/it]


 89%|██████████████████████████████████████████████████████████▋       | 8/9 [00:12<00:01,  1.62s/it][A[A



 89%|██████████████████████████████████████████████████████████▋       | 8/9 [00:12<00:01,  1.46s/it][A[A[A[A







 17%|███████████                                                       | 1/6 [00:01<00:09,  1.90s/it][A[A[A[A[A[A[A[A

Writing MSA
Writing MSA
Writing MSA


 33%|██████████████████████                                            | 2/6 [00:02<00:03,  1.16it/s]





 89%|██████████████████████████████████████████████████████████▋       | 8/9 [00:12<00:01,  1.52s/it][A[A[A[A[A[A








 89%|██████████████████████████████████████████████████████████▋       | 8/9 [00:12<00:01,  1.46s/it]

Writing MSA
Writing MSA
Writing MSA


[A[A[A[A[A[A[A[A[A
 89%|██████████████████████████████████████████████████████████▋       | 8/9 [00:12<00:01,  1.48s/it][A


 89%|██████████████████████████████████████████████████████████▋       | 8/9 [00:13<00:01,  1.68s/it][A[A[A

Writing MSA
Writing MSA
Writing MSA




100%|██████████████████████████████████████████████████████████████████| 9/9 [00:13<00:00,  1.52s/it][A[A



100%|██████████████████████████████████████████████████████████████████| 9/9 [00:13<00:00,  1.40s/it][A[A[A[A







100%|██████████████████████████████████████████████████████████████████| 9/9 [00:13<00:00,  1.52s/it][A[A[A[A[A[A[A[A
100%|██████████████████████████████████████████████████████████████████| 9/9 [00:13<00:00,  1.50s/it]
100%|██████████████████████████████████████████████████████████████████| 9/9 [00:13<00:00,  1.48s/it]






100%|██████████████████████████████████████████████████████████████████| 9/9 [00:13<00:00,  1.38s/it][A[A[A[A[A[A








100%|██████████████████████████████████████████████████████████████████| 9/9 [00:13<00:00,  1.48s/it][A[A[A[A[A[A[A[A[A



100%|██████████████████████████████████████████████████████████████████| 9/9 [00:13<00:00,  1.47s/it][A[A[A

100%|█████████████████████████████████████████████████████

Writing MSA


 83%|███████████████████████████████████████████████████████           | 5/6 [00:03<00:00,  1.98it/s]

Writing MSA
Writing MSA


100%|██████████████████████████████████████████████████████████████████| 6/6 [00:03<00:00,  1.60it/s]


KeyboardInterrupt: 

## 4. Create variant and visualization csv files

In [None]:
variant_df

## 5. Visualization
Finally, we'll visualize the results using appropriate visualization functions. This step helps in understanding the distribution and impact of the identified variants.

In [None]:
df_variants, df_vis = create_df_v(variant_df)
layout = generate_platemaps(
            max_combo_data=df_vis,
            result_folder=result_folder,)   
layout