# W6-CRISPR-Cas3-in-frame-deletion

In Workflow 6, CRISPR-Cas3 is paired with a PCR and Gibson protocol to achieve in-frame deletions22(Figure 7A). Just like in workflow 5, this workflow can be used to knock out specific genes in streptomycetes but with the crucial difference of using Cas3 which has been shown to have higher efficiencies for targeted gene deletions in for example BGCs and random-sized deletion in streptomyces22. Due to homologies, and high GC content, this protocol relies on two steps of Gibson cloning to incorporate the sgRNA and the upstream and downstream repair regions. To get started, users can download the pCRISPR-Cas3 file and the genome file (S. coelicolor A3). StreptoAIM will then generate the necessary primers, plasmid files, and checking primers to verify full gene deletions (S5). 


In [1]:
import sys
import os
from Bio.Restriction import * 
import pandas as pd
from datetime import datetime

# Ensure the src directory is in the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '../../'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)


from pydna.dseqrecord import Dseqrecord

from streptocad.sequence_loading.sequence_loading import (
    load_and_process_plasmid,
    load_and_process_genome_sequences, 
    check_and_convert_input, 
    annotate_dseqrecord)

from streptocad.utils import polymerase_dict, create_primer_df_from_dict,ProjectDirectory,extract_metadata_to_dataframe
from streptocad.primers.primer_generation import create_idt_order_dataframe, make_primer_records, primers_to_IDT, find_best_check_primers_from_genome
from streptocad.crispr.guideRNAcas3_9_12 import extract_sgRNAs, SgRNAargs
from streptocad.cloning.cas3_plasmid_cloning import generate_cas3_protospacer_primers, cas3_plasmid_pcrs, assemble_cas3_plasmids
from streptocad.cloning.gibson_cloning import (
    find_up_dw_repair_templates,
    assemble_multiple_plasmids_with_repair_templates_for_deletion,
    update_primer_names
)
from streptocad.cloning.plasmid_processing import determine_workflow_order_for_plasmids

In [2]:
from pydna.dseqrecord import Dseqrecord

def find_locus_tags(record: Dseqrecord, max_tags: int = None):
    """
    Iterate through a Dseqrecord and return a list of unique locus tags found in its features,
    stopping after `max_tags` tags if specified.

    :param record: pydna.dseqrecord.Dseqrecord object
    :param max_tags: maximum number of locus tags to collect (None for no limit)
    :return: list of locus_tag strings
    """
    tags = []
    seen = set()
    count = 0

    # Ensure the record has features
    if not hasattr(record, 'features'):
        return tags

    for feature in record.features:
        if max_tags is not None and count >= max_tags:
            break

        qualifiers = getattr(feature, 'qualifiers', {})

        for tag in qualifiers.get('locus_tag', []):
            if max_tags is not None and count >= max_tags:
                break
            # only append new tags
            if tag not in seen:
                seen.add(tag)
                tags.append(tag)
                count += 1

        if max_tags is not None and count >= max_tags:
            break

    return tags


## INPUT

In [3]:
# Inputs
# 1 Add genome of choice (genbank, fasta)
path_to_genome = '../../../../data/genomes/Streptomyces_coelicolor_A3_chromosome.gb'
genome = load_and_process_genome_sequences(path_to_genome)[0]

# 2 Add plasmid 
path_to_plasmid = '../../../../data/plasmids/pCRISPR_Cas3.gbk'
clean_plasmid = load_and_process_plasmid(path_to_plasmid)
print(clean_plasmid.name)

# 3 Choose genes to knock out (list)
locus_tags = find_locus_tags(genome, 150)
genes_to_KO = locus_tags


#### Advanced settings ####
# 4 Filtering metrics for sgRNAs
gc_upper = 0.8
gc_lower = 0.2
off_target_seed = 8
off_target_upper = 10
cas_type='cas3'
number_of_sgRNAs_per_group = 1

# 5 Choose polymerase and target melting temperature
chosen_polymerase = polymerase_dict['Phusion High-Fidelity DNA Polymerase (GC Buffer)']
melting_temperature = 65
primer_concentration = 0.4 
primer_number_increment = 23
flanking_region = 500
# 6 Choose overlapping sequences for our plasmid we can use the following
# the part of the primers that anneal to the generate the crRNA and protospacer
forward_protospacer_overhang = 'GTCGCCCGGCAAAACCGG'
reverse_protospacer_overhang = 'GTTTCAATCCACGCGCCCGT'

# The universal part of the 
forward_backbone_overhang = 'GAGCTCATAAGTTCCTATTCCGAAG'
reverse_backbone_overhang = 'AAGAAGTGGGTGTCGGACGC'

Enzyme_for_repair_template_integration = EcoRI
repair_length = 1000
overlap_length = 40 


pCRISPR-Cas3


# Computation

In [4]:
target_dict, genes_to_KO, annotation_input = check_and_convert_input(genes_to_KO)

print(annotation_input)
if annotation_input == True:
    genome = annotate_dseqrecord(genome, target_dict)

False


In [5]:
# Initialize SgRNAargs with desired parameters
args = SgRNAargs(genome, 
                genes_to_KO,
                step=['find', 'filter'],
                gc_upper = gc_upper,
                gc_lower = gc_lower,
                off_target_seed = off_target_seed,
                off_target_upper = off_target_upper,
                cas_type=cas_type
                )

sgrna_df = extract_sgRNAs(args)
sgrna_df

Unnamed: 0,strain_name,locus_tag,gene_loc,gene_strand,sgrna_strand,sgrna_loc,gc,pam,sgrna,sgrna_seed_sequence,off_target_count
1196,NC_003888.3,SCO0088,74917,-1,1,836,0.676471,TTC,ACATGGAGGCGGGAGTCGTCCGGTATCCCGACGG,ACATGGAG,0
2173,NC_003888.3,SCO0144,137197,1,-1,109,0.676471,TTC,AAAGGCAGCCTGACCATCCCGCTGGCCCACGTCC,AAAGGCAG,0
2176,NC_003888.3,SCO0144,137197,1,-1,282,0.588235,TTC,GAAGGCTGTCGTGATCGAACTCTCCGACGAGCAG,GAAGGCTG,0
764,NC_003888.3,SCO0062,47789,-1,-1,844,0.647059,TTC,CGGATTTGTACAGGGCCCGGATCGCTCCCTGGCT,CGGATTTG,0
754,NC_003888.3,SCO0062,47789,-1,1,325,0.676471,TTC,TACTCACGCCTCGCCCTCGGCGTCCAGGAAGTGG,TACTCACG,0
...,...,...,...,...,...,...,...,...,...,...,...
2144,NC_003888.3,SCO0142,134295,-1,1,601,0.705882,TTC,CTGGTCAGCGACACCACCCGGCACCAGCTGGTGG,CTGGTCAG,10
879,NC_003888.3,SCO0067,54097,1,-1,313,0.617647,TTC,GCCACGATCATCATGCCCGTCAAGAGGCAGGCGT,GCCACGAT,10
1794,NC_003888.3,SCO0126,106637,1,1,767,0.735294,TTC,CCGGTGTCCACGCCGCCGCTGATGACCAGGTCCG,CCGGTGTC,10
2011,NC_003888.3,SCO0135,127571,1,-1,82,0.676471,TTC,GCGGACTCGTCCAGCTGGAACGGCGTGGTGGAAC,GCGGACTC,10


In [6]:
# Filter the DataFrame to retain only up to 5 sgRNA sequences per locus_tag
filtered_df = sgrna_df.groupby('locus_tag').head(number_of_sgRNAs_per_group)[:100]
filtered_df

Unnamed: 0,strain_name,locus_tag,gene_loc,gene_strand,sgrna_strand,sgrna_loc,gc,pam,sgrna,sgrna_seed_sequence,off_target_count
1196,NC_003888.3,SCO0088,74917,-1,1,836,0.676471,TTC,ACATGGAGGCGGGAGTCGTCCGGTATCCCGACGG,ACATGGAG,0
2173,NC_003888.3,SCO0144,137197,1,-1,109,0.676471,TTC,AAAGGCAGCCTGACCATCCCGCTGGCCCACGTCC,AAAGGCAG,0
764,NC_003888.3,SCO0062,47789,-1,-1,844,0.647059,TTC,CGGATTTGTACAGGGCCCGGATCGCTCCCTGGCT,CGGATTTG,0
732,NC_003888.3,SCO0061,46430,-1,1,882,0.588235,TTC,GATATACGCGTACGAGGTGGACGGCAACGGCAAC,GATATACG,0
792,NC_003888.3,SCO0063,48889,-1,-1,288,0.735294,TTC,GAACTTGCCGACGTCGGCGAAGCGGCCGACGCCG,GAACTTGC,0
...,...,...,...,...,...,...,...,...,...,...,...
285,NC_003888.3,SCO0015,16070,-1,-1,596,0.676471,TTC,TGCAAGTAGCCGAACACGGCCAGGCCGACGGCAG,TGCAAGTA,1
1120,NC_003888.3,SCO0083,69799,1,1,69,0.676471,TTC,ATCCGACACGATCCACGGCGACGTCCCCACACCG,ATCCGACA,1
2140,NC_003888.3,SCO0142,134295,-1,1,157,0.647059,TTC,TGGACGTTCTGCTGCGTCAACTGCCTGCACGCCC,TGGACGTT,1
251,NC_003888.3,SCO0012,14044,-1,-1,114,0.764706,TTC,CAACGGAGCCTCCCGGCCCGGCTCCCGGTACACC,CAACGGAG,1


## Output

In [7]:
filtered_df_w_primers = generate_cas3_protospacer_primers(filtered_df,
                                                          fwd_overhang= forward_protospacer_overhang, 
                                                          rev_overhang=reverse_protospacer_overhang)
filtered_df_w_primers

Unnamed: 0,strain_name,locus_tag,gene_loc,gene_strand,sgrna_strand,sgrna_loc,gc,pam,sgrna,sgrna_seed_sequence,off_target_count,Fwd Primer,Rev Primer
1196,NC_003888.3,SCO0088,74917,-1,1,836,0.676471,TTC,ACATGGAGGCGGGAGTCGTCCGGTATCCCGACGG,ACATGGAG,0,ACATGGAGGCGGGAGTCGTCCGGTATCCCGACGGGTCGCCCGGCAA...,CCGTCGGGATACCGGACGACTCCCGCCTCCATGTGTTTCAATCCAC...
2173,NC_003888.3,SCO0144,137197,1,-1,109,0.676471,TTC,AAAGGCAGCCTGACCATCCCGCTGGCCCACGTCC,AAAGGCAG,0,AAAGGCAGCCTGACCATCCCGCTGGCCCACGTCCGTCGCCCGGCAA...,GGACGTGGGCCAGCGGGATGGTCAGGCTGCCTTTGTTTCAATCCAC...
764,NC_003888.3,SCO0062,47789,-1,-1,844,0.647059,TTC,CGGATTTGTACAGGGCCCGGATCGCTCCCTGGCT,CGGATTTG,0,CGGATTTGTACAGGGCCCGGATCGCTCCCTGGCTGTCGCCCGGCAA...,AGCCAGGGAGCGATCCGGGCCCTGTACAAATCCGGTTTCAATCCAC...
732,NC_003888.3,SCO0061,46430,-1,1,882,0.588235,TTC,GATATACGCGTACGAGGTGGACGGCAACGGCAAC,GATATACG,0,GATATACGCGTACGAGGTGGACGGCAACGGCAACGTCGCCCGGCAA...,GTTGCCGTTGCCGTCCACCTCGTACGCGTATATCGTTTCAATCCAC...
792,NC_003888.3,SCO0063,48889,-1,-1,288,0.735294,TTC,GAACTTGCCGACGTCGGCGAAGCGGCCGACGCCG,GAACTTGC,0,GAACTTGCCGACGTCGGCGAAGCGGCCGACGCCGGTCGCCCGGCAA...,CGGCGTCGGCCGCTTCGCCGACGTCGGCAAGTTCGTTTCAATCCAC...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
285,NC_003888.3,SCO0015,16070,-1,-1,596,0.676471,TTC,TGCAAGTAGCCGAACACGGCCAGGCCGACGGCAG,TGCAAGTA,1,TGCAAGTAGCCGAACACGGCCAGGCCGACGGCAGGTCGCCCGGCAA...,CTGCCGTCGGCCTGGCCGTGTTCGGCTACTTGCAGTTTCAATCCAC...
1120,NC_003888.3,SCO0083,69799,1,1,69,0.676471,TTC,ATCCGACACGATCCACGGCGACGTCCCCACACCG,ATCCGACA,1,ATCCGACACGATCCACGGCGACGTCCCCACACCGGTCGCCCGGCAA...,CGGTGTGGGGACGTCGCCGTGGATCGTGTCGGATGTTTCAATCCAC...
2140,NC_003888.3,SCO0142,134295,-1,1,157,0.647059,TTC,TGGACGTTCTGCTGCGTCAACTGCCTGCACGCCC,TGGACGTT,1,TGGACGTTCTGCTGCGTCAACTGCCTGCACGCCCGTCGCCCGGCAA...,GGGCGTGCAGGCAGTTGACGCAGCAGAACGTCCAGTTTCAATCCAC...
251,NC_003888.3,SCO0012,14044,-1,-1,114,0.764706,TTC,CAACGGAGCCTCCCGGCCCGGCTCCCGGTACACC,CAACGGAG,1,CAACGGAGCCTCCCGGCCCGGCTCCCGGTACACCGTCGCCCGGCAA...,GGTGTACCGGGAGCCGGGCCGGGAGGCTCCGTTGGTTTCAATCCAC...


In [8]:
amplicons = cas3_plasmid_pcrs(clean_plasmid, 
                              filtered_df, 
                              universal_fwd_seq=forward_backbone_overhang, 
                              universal_rev_seq=reverse_backbone_overhang
                              )

len(amplicons)

100

In [9]:
amplicons = cas3_plasmid_pcrs(clean_plasmid, filtered_df)

len(amplicons)

100

In [10]:
# cut plasmid and incorporate amplicons
assembled_cas3_plasmids = assemble_cas3_plasmids(clean_plasmid, amplicons)
len(assembled_cas3_plasmids)

100

In [11]:
# Constructing a meaningful name, ID, and description for the assembled plasmid using user input
targeting_info = []
for index, row in filtered_df.iterrows():
    formatted_str = f"pCas3_{row['locus_tag']}({row['sgrna_loc']})"
    targeting_info.append(formatted_str)

for i in range(len(assembled_cas3_plasmids)):
    assembled_cas3_plasmids[i].name = f'{targeting_info[i]}'
    assembled_cas3_plasmids[i].id = assembled_cas3_plasmids[i].name  
    assembled_cas3_plasmids[i].description = f'CRISPR-Cas3 targeting {", ".join(genes_to_KO)} for single gene knockout, assembled using StreptoCAD.'


In [12]:
assembled_cas3_plasmids[0].name

'pCas3_SCO0088(836)'

In [13]:
assembled_cas3_plasmids[0].description

'CRISPR-Cas3 targeting SCO0001, SCO0002, SCO0003, SCO0004, SCO0005, SCO0006, SCO0007, SCO0008, SCO0009, SCO0010, SCO0011, SCO0012, SCO0013, SCO0014, SCO0015, SCO0016, SCO0017, SCO0018, SCO0019, SCO0020, SCO0021, SCO0022, SCO0025, SCO0026, SCO0027, SCO0028, SCO0029, SCO0030, SCO0031, SCO0033, SCO0034, SCO0035, SCO0036, SCO0037, SCO0038, SCO0039, SCO0040, SCO0041, SCO0042, SCO0043, SCO0044, SCO0045, SCO0046, SCO0047, SCO0048, SCO0049, SCO0050, SCO0051, SCO0053, SCO0055, SCO0056, SCO0057, SCO0058, SCO0059, SCO0060, SCO0061, SCO0062, SCO0063, SCO0064, SCO0065, SCO0066, SCO0067, SCO0068, SCO0069, SCO0070, SCO0071, SCO0072, SCO0073, SCO0074, SCO0075, SCO0076, SCO0079, SCO0080, SCO0081, SCO0083, SCO0084, SCO0085, SCO0086, SCO0087, SCO0088, SCO0089, SCO0090, SCO0091, SCO0092, SCO0095, SCO0096, SCO0097, SCO0098, SCO0099, SCO0101, SCO0102, SCO0103, SCO0104, SCO0105, SCO0106, SCO0107, SCO0108, SCO0109, SCO0110, SCO0111, SCO0112, SCO0113, SCO0114, SCO0116, SCO0117, SCO0118, SCO0119, SCO0120, SCO01

In [14]:
assembled_cas3_plasmids[0].id

'pCas3_SCO0088(836)'

In [15]:
print_plasmids = False

if print_plasmids: 
    for vector in assembled_cas3_plasmids: 
        vector.write(f"../../data/plasmids/sgRNA_plasmids_pCRISPR–Cas3/{vector.id}.gb")

### IDT primers

In [16]:
primer_records = make_primer_records(filtered_df_w_primers)
primer_records

[Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecord(-54),
 Dseqrecord(-52),
 Dseqrecor

In [17]:
idt_primers_cas3=primers_to_IDT(primer_records)
idt_primers_cas3

Unnamed: 0,Name,Sequence,Concentration,Purification
0,SCO0088_loc_836_fwd,ACATGGAGGCGGGAGTCGTCCGGTATCCCGACGGGTCGCCCGGCAA...,25nm,STD
1,SCO0088_loc_836_rev,CCGTCGGGATACCGGACGACTCCCGCCTCCATGTGTTTCAATCCAC...,25nm,STD
2,SCO0144_loc_109_fwd,AAAGGCAGCCTGACCATCCCGCTGGCCCACGTCCGTCGCCCGGCAA...,25nm,STD
3,SCO0144_loc_109_rev,GGACGTGGGCCAGCGGGATGGTCAGGCTGCCTTTGTTTCAATCCAC...,25nm,STD
4,SCO0062_loc_844_fwd,CGGATTTGTACAGGGCCCGGATCGCTCCCTGGCTGTCGCCCGGCAA...,25nm,STD
...,...,...,...,...
195,SCO0142_loc_157_rev,GGGCGTGCAGGCAGTTGACGCAGCAGAACGTCCAGTTTCAATCCAC...,25nm,STD
196,SCO0012_loc_114_fwd,CAACGGAGCCTCCCGGCCCGGCTCCCGGTACACCGTCGCCCGGCAA...,25nm,STD
197,SCO0012_loc_114_rev,GGTGTACCGGGAGCCGGGCCGGGAGGCTCCGTTGGTTTCAATCCAC...,25nm,STD
198,SCO0006_loc_272_fwd,TTGGCAAGGGCGTCGATAGCGGGGGTGCGCACGGGTCGCCCGGCAA...,25nm,STD


# In frame deletion

In [18]:
in_frame_deletion = False 


In [19]:
Dseqrecord(assembled_cas3_plasmids[3], circular=True).cut(EcoRI)

(Dseqrecord(-12770),)

#### Getting repair templates

In [20]:
if in_frame_deletion:
    

    # Make repair templates
    repair_templates_data = find_up_dw_repair_templates(genome, 
                                                        genes_to_KO, 
                                                        target_tm=melting_temperature, 
                                                        primer_tm_kwargs={'conc':primer_concentration, 'prodcode':chosen_polymerase} , 
                                                        repair_length=repair_length)

    # Digest the plasmids
    processed_records = [sorted(Dseqrecord(record, circular=True).cut(Enzyme_for_repair_template_integration), key=lambda x: len(x), reverse=True)[0] for record in assembled_cas3_plasmids]

    # Rename them appropriately
    for i in range(len(processed_records)):
        processed_records[i].name = assembled_cas3_plasmids[i].name
        print(processed_records[i].name)

    # Assembly 
    assembly_data = assemble_multiple_plasmids_with_repair_templates_for_deletion(genes_to_KO, 
                                                                                processed_records, 
                                                                                repair_templates_data, 
                                                                                overlap=overlap_length)
    # updating the primer names to something systematic.
    update_primer_names(assembly_data)

    # Parse through the primer df
    primer_df = create_primer_df_from_dict(assembly_data)

    #Find_unique primers
    unique_df = primer_df.drop_duplicates(keep='first')
    idt_df = create_idt_order_dataframe(unique_df, concentration="25nm", purification="STD")

    # Contigs
    assembled_contigs = []
    for data in assembly_data: 
        contig_record = data['contig']
        contig_record.id = f"{data['name']}_w_rep"
        contig_record.name = f"{data['name']}_w_rep"
        assembled_contigs.append(contig_record)


    # print our df that contains primers for the adding repair templates to the vecors.         
    print(unique_df)

Check for restriction enzyme sites

In [21]:
if in_frame_deletion:
    integration_names = filtered_df.apply(lambda row: f"sgRNA_{row['locus_tag']}({row['sgrna_loc']})", axis=1).tolist()
    plasmid_metadata_df = extract_metadata_to_dataframe(assembled_contigs,
                                                        clean_plasmid,
                                                        integration_names)
    workflow_df = determine_workflow_order_for_plasmids(assembled_cas3_plasmids, 
                                                        assembled_contigs,
                                                        [Enzyme_for_repair_template_integration,], [ "NcoI","BstBI" ]) 
    
    plasmid_metadata_df = pd.merge(plasmid_metadata_df, workflow_df, on='plasmid_name', how='inner')  # or 'left'/'right' depending on what you need

else: 
    integration_names = filtered_df.apply(lambda row: f"sgRNA_{row['locus_tag']}({row['sgrna_loc']})", axis=1).tolist()
    plasmid_metadata_df = extract_metadata_to_dataframe(assembled_cas3_plasmids,
                                                        clean_plasmid,
                                                        integration_names)

plasmid_metadata_df

Unnamed: 0,plasmid_name,date,original_plasmid,integration,size
0,pCas3_SCO0088(836),2025-06-10,pCRISPR-Cas3,sgRNA_SCO0088(836),12766
1,pCas3_SCO0144(109),2025-06-10,pCRISPR-Cas3,sgRNA_SCO0144(109),12766
2,pCas3_SCO0062(844),2025-06-10,pCRISPR-Cas3,sgRNA_SCO0062(844),12766
3,pCas3_SCO0061(882),2025-06-10,pCRISPR-Cas3,sgRNA_SCO0061(882),12766
4,pCas3_SCO0063(288),2025-06-10,pCRISPR-Cas3,sgRNA_SCO0063(288),12766
...,...,...,...,...,...
95,pCas3_SCO0015(596),2025-06-10,pCRISPR-Cas3,sgRNA_SCO0015(596),12766
96,pCas3_SCO0083(69),2025-06-10,pCRISPR-Cas3,sgRNA_SCO0083(69),12766
97,pCas3_SCO0142(157),2025-06-10,pCRISPR-Cas3,sgRNA_SCO0142(157),12766
98,pCas3_SCO0012(114),2025-06-10,pCRISPR-Cas3,sgRNA_SCO0012(114),12766


In [22]:

full_idt = idt_primers_cas3
full_idt

Unnamed: 0,Name,Sequence,Concentration,Purification
0,SCO0088_loc_836_fwd,ACATGGAGGCGGGAGTCGTCCGGTATCCCGACGGGTCGCCCGGCAA...,25nm,STD
1,SCO0088_loc_836_rev,CCGTCGGGATACCGGACGACTCCCGCCTCCATGTGTTTCAATCCAC...,25nm,STD
2,SCO0144_loc_109_fwd,AAAGGCAGCCTGACCATCCCGCTGGCCCACGTCCGTCGCCCGGCAA...,25nm,STD
3,SCO0144_loc_109_rev,GGACGTGGGCCAGCGGGATGGTCAGGCTGCCTTTGTTTCAATCCAC...,25nm,STD
4,SCO0062_loc_844_fwd,CGGATTTGTACAGGGCCCGGATCGCTCCCTGGCTGTCGCCCGGCAA...,25nm,STD
...,...,...,...,...
195,SCO0142_loc_157_rev,GGGCGTGCAGGCAGTTGACGCAGCAGAACGTCCAGTTTCAATCCAC...,25nm,STD
196,SCO0012_loc_114_fwd,CAACGGAGCCTCCCGGCCCGGCTCCCGGTACACCGTCGCCCGGCAA...,25nm,STD
197,SCO0012_loc_114_rev,GGTGTACCGGGAGCCGGGCCGGGAGGCTCCGTTGGTTTCAATCCAC...,25nm,STD
198,SCO0006_loc_272_fwd,TTGGCAAGGGCGTCGATAGCGGGGGTGCGCACGGGTCGCCCGGCAA...,25nm,STD


## Folder with all the generated I/O

In [23]:
input_files = [
    {"name": "input_genome.gb", "content": genome},
    {"name": "input_plasmid.gb", "content": clean_plasmid}
]


if in_frame_deletion: 
    output_files = [
        {"name": "BEST_w_sgRNAs.gb", "content": assembled_contigs}, # LIST OF Dseqrecords
        {"name": "primer_df.csv", "content": primer_df},
        {"name": "full_idt.csv", "content": full_idt},
        {"name": "sgrna_df.csv", "content": sgrna_df},
        {"name": "filtered_sgrna_df.csv", "content": filtered_df},
        {"name": "plasmid_metadata_df.csv", "content": plasmid_metadata_df},

    ]
else: 
    output_files = [
        {"name": "cBEST_w_sgRNAs.gb", "content": assembled_cas3_plasmids}, # LIST OF Dseqrecords
        {"name": "full_idt.csv", "content": full_idt},
        {"name": "sgrna_df.csv", "content": sgrna_df},
        {"name": "filtered_df.csv", "content": filtered_df},
        {"name": "plasmid_metadata_df.csv", "content": plasmid_metadata_df},

    ]
         
input_values = {
    "genes_to_knockout": genes_to_KO,

    "filtering_metrics": {
        "gc_upper": gc_upper,
        "gc_lower": gc_lower,
        "off_target_seed": off_target_seed,
        "off_target_upper": off_target_upper,
        "cas_type": cas_type,
        "number_of_sgRNAs_per_group": number_of_sgRNAs_per_group,

    },  
    "polymerase_settings": {
        "chosen_polymerase": chosen_polymerase,
        "melting_temperature": melting_temperature,
        "primer_concentration": primer_concentration,
        "flanking_region": flanking_region
    },
    "overlapping_sequences": {
        "up_homology": str(forward_backbone_overhang),
        "dw_homology": str(reverse_backbone_overhang)
    }
}

# Paths to Markdown files
markdown_file_paths = [
    "../../protocols/conjugation_protcol.md",
    "../../protocols/single_target_crispr_plasmid_protcol.md"

]

timestamp = datetime.utcnow().isoformat()

# Create project directory structure
project_directory = ProjectDirectory(
    project_name=f"CRISPR_cas3_inframe_deletion_workflow_{timestamp}",
    input_files=input_files,
    output_files=output_files,
    input_values=input_values,
    markdown_file_paths=markdown_file_paths
)

# DO You want to save the folder? 
save_zip_folder = False 

if save_zip_folder: 
    # Generate the project directory structure and get the zip content
    zip_content = project_directory.create_directory_structure(create_directories=False)

    # Save the zip file to disk (optional)
    with open("project_structure.zip", "wb") as f:
        f.write(zip_content)

NameError: name 'primer_number_increment' is not defined