In [1]:
import pandas as pd
from IPython.display import Image
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from pydna.dseqrecord import Dseqrecord

In [None]:
a_oryzae ={
    'F': {'TTT': 0.38, 'TTC': 0.62},
    'S': {'TCT': 0.18, 'TCC': 0.20, 'TCA': 0.14, 'TCG': 0.16, 'AGT': 0.13, 'AGC': 0.18},
    'Y': {'TAT': 0.47, 'TAC': 0.53},
    'C': {'TGT': 0.46, 'TGC': 0.54},
    '*': {'TAA': 0.33, 'TAG': 0.29, 'TGA': 0.38},
    'L': {'TTA': 0.07, 'TTG': 0.18, 'CTT': 0.19, 'CTC': 0.23, 'CTA': 0.11, 'CTG': 0.22},
    'W': {'TGG': 1.00},
    'P': {'CCT': 0.27, 'CCC': 0.26, 'CCA': 0.25, 'CCG': 0.22},
    'H': {'CAT': 0.53, 'CAC': 0.47},
    'Q': {'CAA': 0.43, 'CAG': 0.57},
    'R': {'CGT': 0.18, 'CGC': 0.23, 'CGA': 0.17, 'CGG': 0.18, 'AGA': 0.13, 'AGG': 0.12},
    'I': {'ATT': 0.36, 'ATC': 0.50, 'ATA': 0.14},
    'M': {'ATG': 1.00},
    'T': {'ACT': 0.24, 'ACC': 0.32, 'ACA': 0.23, 'ACG': 0.20},
    'N': {'AAT': 0.45, 'AAC': 0.55},
    'K': {'AAA': 0.36, 'AAG': 0.64},
    'V': {'GTT': 0.27, 'GTC': 0.33, 'GTA': 0.13, 'GTG': 0.27},
    'A': {'GCT': 0.27, 'GCC': 0.30, 'GCA': 0.23, 'GCG': 0.20},
    'D': {'GAT': 0.53, 'GAC': 0.47},
    'E': {'GAA': 0.44, 'GAG': 0.56},
    'G': {'GGT': 0.28, 'GGC': 0.31, 'GGA': 0.24, 'GGG': 0.17}
}

### SPs

In [2]:
sp_df = pd.read_csv('../data/15_SecretoGen/Best_signal_peptides_for_A_oryzae_RFP_his_tag_sorted.csv')

# Rename the 'Unnamed: 0' column and add prefix to the numbers
#sp_df = sp_df.rename(columns={'Unnamed: 0': 'SP_ID'})
#sp_df['SP_ID'] = sp_df['SP_ID'].apply(lambda x: f'SP_{x}')
#sp_df = sp_df.drop(columns=['Unnamed: 0'])
sp_df

Unnamed: 0,SP_ID,Sequence,Perplexity
0,SP_997,MRPTLLALGVISFALTLHS,7.515804
1,SP_944,MKFTLIILIIVILTIIFSPGALA,7.421489
2,SP_948,MKVTLSLLAVFLAALSASAIPANG,7.179219
3,SP_993,MKAIIILLLIILTLALTVQG,7.140379
4,SP_204,MRVFSATAILALSPLLIASASP,6.925975
...,...,...,...
995,SP_656,MVLGILTITAIPTVTGLANA,1.597785
996,SP_696,MVLGLLTIAAIPTVTGVAQA,1.563614
997,SP_757,MVLGLLTIAAIPTVTGVGNAVSA,1.554127
998,SP_253,MVLGILTITAIPTVTGVAQA,1.535361


In [3]:
#sp_df.to_csv('../data/15_SecretoGen/Best_signal_peptides_for_A_oryzae_RFP_his_tag_sorted.csv', index= False)

In [4]:
sp_df

Unnamed: 0,SP_ID,Sequence,Perplexity
0,SP_997,MRPTLLALGVISFALTLHS,7.515804
1,SP_944,MKFTLIILIIVILTIIFSPGALA,7.421489
2,SP_948,MKVTLSLLAVFLAALSASAIPANG,7.179219
3,SP_993,MKAIIILLLIILTLALTVQG,7.140379
4,SP_204,MRVFSATAILALSPLLIASASP,6.925975
...,...,...,...
995,SP_656,MVLGILTITAIPTVTGLANA,1.597785
996,SP_696,MVLGLLTIAAIPTVTGVAQA,1.563614
997,SP_757,MVLGLLTIAAIPTVTGVGNAVSA,1.554127
998,SP_253,MVLGILTITAIPTVTGVAQA,1.535361


In [5]:
# Adding the length of sequences to the dataframe
sp_df['Length'] = sp_df['Sequence'].apply(len)

# Removing sequences with length over 26
filtered_df = sp_df[sp_df['Length'] <= 26]
filtered_df

Unnamed: 0,SP_ID,Sequence,Perplexity,Length
0,SP_997,MRPTLLALGVISFALTLHS,7.515804,19
1,SP_944,MKFTLIILIIVILTIIFSPGALA,7.421489,23
2,SP_948,MKVTLSLLAVFLAALSASAIPANG,7.179219,24
3,SP_993,MKAIIILLLIILTLALTVQG,7.140379,20
4,SP_204,MRVFSATAILALSPLLIASASP,6.925975,22
...,...,...,...,...
995,SP_656,MVLGILTITAIPTVTGLANA,1.597785,20
996,SP_696,MVLGLLTIAAIPTVTGVAQA,1.563614,20
997,SP_757,MVLGLLTIAAIPTVTGVGNAVSA,1.554127,23
998,SP_253,MVLGILTITAIPTVTGVAQA,1.535361,20


In [6]:

# Getting the top 30
top_30 = sp_df.head(30)

# Sampling the remaining dataframe for a good mix
remaining_sample_size = 84 - 30
remaining_sample = sp_df.iloc[30:].sample(n=remaining_sample_size, random_state=1)

# Combining the top 30 with the remaining sample
final_sample = pd.concat([top_30, remaining_sample]).reset_index(drop=True)
final_sample

Unnamed: 0,SP_ID,Sequence,Perplexity,Length
0,SP_997,MRPTLLALGVISFALTLHS,7.515804,19
1,SP_944,MKFTLIILIIVILTIIFSPGALA,7.421489,23
2,SP_948,MKVTLSLLAVFLAALSASAIPANG,7.179219,24
3,SP_993,MKAIIILLLIILTLALTVQG,7.140379,20
4,SP_204,MRVFSATAILALSPLLIASASP,6.925975,22
...,...,...,...,...
79,SP_952,MRVSTIPTALLGLSLSAVNA,3.271387,20
80,SP_659,MVRLNSITALASLLAVSLTAA,3.640410,21
81,SP_576,MILSYRSLVVTTVALLGLSTPATA,3.873739,24
82,SP_560,MVLLKPLLATALLASLTPAVA,3.045672,21


In [7]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
from pydna.dseq import Dseq

# Simplified back-translation codon table
amino_acid_to_codon = {
    'A': 'GCT', 'R': 'CGT', 'N': 'AAT', 'D': 'GAT',
    'C': 'TGT', 'Q': 'CAA', 'E': 'GAA', 'G': 'GGT',
    'H': 'CAT', 'I': 'ATT', 'L': 'CTT', 'K': 'AAA',
    'M': 'ATG', 'F': 'TTT', 'P': 'CCT', 'S': 'TCT',
    'T': 'ACT', 'W': 'TGG', 'Y': 'TAT', 'V': 'GTT',
    'X': 'NNN'  # 'X' represents any amino acid
}

def back_translate(protein_sequence):
    return ''.join([amino_acid_to_codon[aa] for aa in protein_sequence])


In [8]:

seq_records = []
for index, row in final_sample.iterrows():
    dna_sequence = back_translate(row['Sequence'])
    dseq_obj = Dseq(dna_sequence)
    dseqrecord = Dseqrecord(dseq_obj, name=row['SP_ID'], description=f"Perplexity: {row['Perplexity']}, Length: {row['Length']}")
    seq_records.append(dseqrecord)

seq_records

[Dseqrecord(-57),
 Dseqrecord(-69),
 Dseqrecord(-72),
 Dseqrecord(-60),
 Dseqrecord(-66),
 Dseqrecord(-57),
 Dseqrecord(-69),
 Dseqrecord(-60),
 Dseqrecord(-60),
 Dseqrecord(-75),
 Dseqrecord(-66),
 Dseqrecord(-63),
 Dseqrecord(-54),
 Dseqrecord(-51),
 Dseqrecord(-66),
 Dseqrecord(-57),
 Dseqrecord(-63),
 Dseqrecord(-63),
 Dseqrecord(-57),
 Dseqrecord(-51),
 Dseqrecord(-57),
 Dseqrecord(-84),
 Dseqrecord(-66),
 Dseqrecord(-54),
 Dseqrecord(-60),
 Dseqrecord(-63),
 Dseqrecord(-57),
 Dseqrecord(-54),
 Dseqrecord(-60),
 Dseqrecord(-60),
 Dseqrecord(-66),
 Dseqrecord(-57),
 Dseqrecord(-57),
 Dseqrecord(-63),
 Dseqrecord(-72),
 Dseqrecord(-66),
 Dseqrecord(-51),
 Dseqrecord(-63),
 Dseqrecord(-63),
 Dseqrecord(-63),
 Dseqrecord(-54),
 Dseqrecord(-60),
 Dseqrecord(-66),
 Dseqrecord(-60),
 Dseqrecord(-72),
 Dseqrecord(-60),
 Dseqrecord(-63),
 Dseqrecord(-66),
 Dseqrecord(-57),
 Dseqrecord(-60),
 Dseqrecord(-60),
 Dseqrecord(-63),
 Dseqrecord(-60),
 Dseqrecord(-63),
 Dseqrecord(-72),
 Dseqrecor

### Codon optimize the sequences: 

A. oryzae tax id : 5062

https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=5062&lvl=3&lin=f&keep=1&srchmode=1&unlock

In [18]:
from dnachisel import DnaOptimizationProblem, CodonOptimize

# Codon optimization for Aspergillus oryzae
codon_optimized_records = []
codon_optimized_records_str = []
taxonomy_id = 5062 # a oryzae

for seq in seq_records:
    problem = DnaOptimizationProblem(
        sequence=str(seq.seq),
        constraints=[],
        #objectives=[CodonOptimize(codon_usage_table=a_oryzae, method='match_codon_usage')], 
        objectives=[CodonOptimize(species=taxonomy_id, method='match_codon_usage')], 
    )
    problem.optimize()
    optimized_dna_sequence = str(problem.sequence)
    
    dseq_obj = Dseq(optimized_dna_sequence)
    dseqrecord = Dseqrecord(dseq_obj, name=row['SP_ID'], description=f"Perplexity: {row['Perplexity']}, Length: {row['Length']}")
    codon_optimized_records.append(dseqrecord)
    codon_optimized_records_str.append(str(dseqrecord.seq))
    
final_sample['codon_optimized_sequences']= codon_optimized_records_str

                                                                                  

In [19]:
final_sample

Unnamed: 0,SP_ID,Sequence,Perplexity,Length,codon_optimized_sequences
0,SP_997,MRPTLLALGVISFALTLHS,7.515804,19,ATGCACTTCACGTTCATGGCTATCGCCTGGATTACCTTTGCGGCAA...
1,SP_944,MKFTLIILIIVILTIIFSPGALA,7.421489,23,ATGAAATTTATCATCATTTTCTTCATAAAGATCATTATGATGATTA...
2,SP_948,MKVTLSLLAVFLAALSASAIPANG,7.179219,24,ATGAAAGCGAACATGTTCATGGCAGCTGCAATCGCCGCTGCTAAGA...
3,SP_993,MKAIIILLLIILTLALTVQG,7.140379,20,ATGAAAGCTCAGACAATTATAACCAAGATCATCGCCACTAAGGCAG...
4,SP_204,MRVFSATAILALSPLLIASASP,6.925975,22,ATGATGTGGTTTTCAGCCATCTTCATTATGGCTATCAGCTTCGCGG...
...,...,...,...,...,...
79,SP_952,MRVSTIPTALLGLSLSAVNA,3.271387,20,ATGATGGTTGTCATCATTATGATGTGGCTTCTATGGATGTTGATGC...
80,SP_659,MVRLNSITALASLLAVSLTAA,3.640410,21,ATGGCCTTGCTAAATTGGATTGCAGCTGCGGCTGCAATGCTTCTGA...
81,SP_576,MILSYRSLVVTTVALLGLSTPATA,3.873739,24,ATGATTACCTGGTATATGATCATGGTTACGACCACTTACGCTGCAG...
82,SP_560,MVLLKPLLATALLASLTPAVA,3.045672,21,ATGGCAGCAGCCGTGAAGTTGCTCGCTAAACTGCTACTTGCCTGGA...


# GEnerating primers

In [21]:
RFP = Dseqrecord(SeqIO.read("../data/10_genetic_parts/parts/RFP_w_B_homolgy.fasta", "fasta"))
pTef = Dseqrecord(SeqIO.read("../data/10_genetic_parts/parts/pTEF_w_A_homology.fasta", "fasta"))
print(f'The lenght of RFP is {len(RFP)}')
print(f'The lenght of pTef is {len(pTef)}')

The lenght of RFP is 1977
The lenght of pTef is 1384


In [23]:
from pydna.amplify import pcr
from pydna.primer import Primer

In [68]:
generic_pTef_fwd = Primer('CGAGACAGCAGAATCACCG') # generic 
generic_pTef_rev = Primer('GGTGAAGGTTGTGTTATGTTTTGTGG') # martis

generic_rfp_fwd = Primer('GCCTCCTCCGAGGACG') # martis
generic_rfp_rev = Primer('GAGGAGAGTGGATGGATAGTCTGG') # generic



## generating the overhangs

In [80]:
reverse_ovh_list = []
forward_ovh_list = []

for index, row in final_sample.iterrows():
    # get the sequence
    seq = str(row['codon_optimized_sequences'])

    # make the overhangs like marti propose: 
    # Forward
    fwd_ovh = seq[-44:] + generic_rfp_fwd
    fwd_ovh.name= f"{str(row['SP_ID'])}_fwd"
    fwd_ovh.id = f"{str(row['SP_ID'])}_fwd"

    # reverse
    rev_ovh = Seq(seq[:-14]).reverse_complement()+ generic_pTef_rev 
    rev_ovh.name = f"{str(row['SP_ID'])}_rev"
    rev_ovh.id = f"{str(row['SP_ID'])}_rev"


    forward_ovh_list.append(fwd_ovh)
    reverse_ovh_list.append(rev_ovh)

In [81]:
forward_ovh_list

[SP_997_fwd 60-mer:5'-TCATGGCTATCGCCT..ACG-3',
 SP_944_fwd 60-mer:5'-TAAAGATCATTATGA..ACG-3',
 SP_948_fwd 60-mer:5'-CAATCGCCGCTGCTA..ACG-3',
 SP_993_fwd 60-mer:5'-TTATAACCAAGATCA..ACG-3',
 SP_204_fwd 60-mer:5'-TCATTATGGCTATCA..ACG-3',
 SP_281_fwd 60-mer:5'-TCACGGCAGCCATTA..ACG-3',
 SP_945_fwd 60-mer:5'-TCACCGCTGCAACTA..ACG-3',
 SP_914_fwd 60-mer:5'-TTTGGACAATTATTA..ACG-3',
 SP_662_fwd 60-mer:5'-TAGCTTTTGCAGCGA..ACG-3',
 SP_849_fwd 60-mer:5'-TCCTTTTCCTATTGC..ACG-3',
 SP_337_fwd 60-mer:5'-ATGCCTACACAGCGT..ACG-3',
 SP_249_fwd 60-mer:5'-CAATCATGTGGAAGG..ACG-3',
 SP_989_fwd 60-mer:5'-TTCATATGATTGTTA..ACG-3',
 SP_847_fwd 60-mer:5'-TCTGGGCCGCATTTA..ACG-3',
 SP_549_fwd 60-mer:5'-TATTTCTGGTGCTTT..ACG-3',
 SP_117_fwd 60-mer:5'-CCATGGCTGTTTTTT..ACG-3',
 SP_745_fwd 60-mer:5'-TGATATTCGTCACAT..ACG-3',
 SP_906_fwd 60-mer:5'-CCTTCGTGGTTGTTT..ACG-3',
 SP_457_fwd 60-mer:5'-GGGCGATTATTATTG..ACG-3',
 SP_219_fwd 60-mer:5'-CGGGCTTTGGTACAA..ACG-3',
 SP_882_fwd 60-mer:5'-TATTTCAGCTTATTA..ACG-3',
 SP_410_fwd 6

In [82]:
reverse_ovh_list

[SP_997_rev 69-mer:5'-CCGCAAAGGTAATCC..TGG-3',
 SP_944_rev 81-mer:5'-CCTTAAAGATAATCA..TGG-3',
 SP_948_rev 84-mer:5'-TAAAGAAGGCCTTCT..TGG-3',
 SP_993_rev 72-mer:5'-CTGCCTTAGTGGCGA..TGG-3',
 SP_204_rev 78-mer:5'-AACTTGCCGCGAAGC..TGG-3',
 SP_281_rev 69-mer:5'-CTGTAGCCGCTGCGG..TGG-3',
 SP_945_rev 81-mer:5'-TTATTGGCGGGATAA..TGG-3',
 SP_914_rev 72-mer:5'-AGGTCATGGTAGTCA..TGG-3',
 SP_662_rev 72-mer:5'-CCGCGATGGCGGCAA..TGG-3',
 SP_849_rev 87-mer:5'-TAGTGGTAAATGTGA..TGG-3',
 SP_337_rev 78-mer:5'-CGGTGGTGATGAAAA..TGG-3',
 SP_249_rev 75-mer:5'-CAGCGAAAGCGAACG..TGG-3',
 SP_989_rev 66-mer:5'-AGAACACTGTCGTGA..TGG-3',
 SP_847_rev 63-mer:5'-AGATCATGATCATCA..TGG-3',
 SP_549_rev 78-mer:5'-TAACGATGACAATGA..TGG-3',
 SP_117_rev 69-mer:5'-ACCACATAGCCTGGA..TGG-3',
 SP_745_rev 75-mer:5'-TGGTCTGAACCACGC..TGG-3',
 SP_906_rev 75-mer:5'-CTACTGCCCAGACGA..TGG-3',
 SP_457_rev 69-mer:5'-TGAATATGGCAGCTG..TGG-3',
 SP_219_rev 63-mer:5'-TGAACCCTCCGGTAA..TGG-3',
 SP_882_rev 69-mer:5'-GAAACAACTTGAAGA..TGG-3',
 SP_410_rev 9

Amplicons 

In [83]:
from pydna.amplify import pcr
from pydna.primer import Primer

pTef_amplicons = []
for i in range(len(reverse_ovh_list)): 
    amplicon = pcr(generic_pTef_fwd, reverse_ovh_list[i], pTef)
    pTef_amplicons.append(amplicon)

In [84]:

RFP_amplicons = []
for i in range(len(forward_ovh_list)): 
    amplicon = pcr(forward_ovh_list[i], generic_rfp_rev, RFP)
    RFP_amplicons.append(amplicon)

# Testing the overlap

In [85]:
id_names= final_sample['SP_ID'].to_list()

In [86]:
# changing the names of the amplicons
for i in range(len(pTef_amplicons)):
    pTef_amplicons[i].name =  f'pTef_{pTef_amplicons[i].name }_{id_names[i]}'
    
for i in range(len(RFP_amplicons)):
    RFP_amplicons[i].name =  f'RFP_{RFP_amplicons[i].name }_{id_names[i]}'

In [87]:
from pydna.assembly import Assembly

In [88]:
for i in range(len(RFP_amplicons)): 
    assemblyobj = Assembly([pTef_amplicons[i],RFP_amplicons[i]], limit = 18)
    contig = assemblyobj.assemble_linear()[0]

    print(contig.figure(), '\n')


pTef_929bp_PCR_prod_SP_997|30
                           \/
                           /\
                           30|RFP_2021bp_PCR_prod_SP_997 

pTef_941bp_PCR_prod_SP_944|30
                           \/
                           /\
                           30|RFP_2021bp_PCR_prod_SP_944 

pTef_944bp_PCR_prod_SP_948|30
                           \/
                           /\
                           30|RFP_2021bp_PCR_prod_SP_948 

pTef_932bp_PCR_prod_SP_993|30
                           \/
                           /\
                           30|RFP_2021bp_PCR_prod_SP_993 

pTef_938bp_PCR_prod_SP_204|30
                           \/
                           /\
                           30|RFP_2021bp_PCR_prod_SP_204 

pTef_929bp_PCR_prod_SP_281|30
                           \/
                           /\
                           30|RFP_2021bp_PCR_prod_SP_281 

pTef_941bp_PCR_prod_SP_945|30
                           \/
                           /\
                

In [97]:
# changing the names of the amplicons
forward_primers = []
reverse_primers = []

for i in range(len(RFP_amplicons)):
    forward_primers.append(str(RFP_amplicons[i].forward_primer.seq))

for i in range(len(pTef_amplicons)):
    
    reverse_primers.append(str(pTef_amplicons[i].reverse_primer.seq))
    


In [99]:
final_sample['fwd_primers(5-3)']= forward_primers
final_sample['reverse_primers(5-3)']= reverse_primers
final_sample

Unnamed: 0,SP_ID,Sequence,Perplexity,Length,codon_optimized_sequences,fwd_primers(5-3),reverse_primers(5-3)
0,SP_997,MRPTLLALGVISFALTLHS,7.515804,19,ATGCACTTCACGTTCATGGCTATCGCCTGGATTACCTTTGCGGCAA...,TCATGGCTATCGCCTGGATTACCTTTGCGGCAACTACACATTGGGC...,CCGCAAAGGTAATCCAGGCGATAGCCATGAACGTGAAGTGCATGGT...
1,SP_944,MKFTLIILIIVILTIIFSPGALA,7.421489,23,ATGAAATTTATCATCATTTTCTTCATAAAGATCATTATGATGATTA...,TAAAGATCATTATGATGATTATCTTTAAGGCATTCGCTGCCGCGGC...,CCTTAAAGATAATCATCATAATGATCTTTATGAAGAAAATGATGAT...
2,SP_948,MKVTLSLLAVFLAALSASAIPANG,7.179219,24,ATGAAAGCGAACATGTTCATGGCAGCTGCAATCGCCGCTGCTAAGA...,CAATCGCCGCTGCTAAGAAGGCCTTCTTTATTGCGGCCAATTGGGC...,TAAAGAAGGCCTTCTTAGCAGCGGCGATTGCAGCTGCCATGAACAT...
3,SP_993,MKAIIILLLIILTLALTVQG,7.140379,20,ATGAAAGCTCAGACAATTATAACCAAGATCATCGCCACTAAGGCAG...,TTATAACCAAGATCATCGCCACTAAGGCAGCGATGACGCAAACCGC...,CTGCCTTAGTGGCGATGATCTTGGTTATAATTGTCTGAGCTTTCAT...
4,SP_204,MRVFSATAILALSPLLIASASP,6.925975,22,ATGATGTGGTTTTCAGCCATCTTCATTATGGCTATCAGCTTCGCGG...,TCATTATGGCTATCAGCTTCGCGGCAAGTTCCTCTATATCGATGGC...,AACTTGCCGCGAAGCTGATAGCCATAATGAAGATGGCTGAAAACCA...
...,...,...,...,...,...,...,...
79,SP_952,MRVSTIPTALLGLSLSAVNA,3.271387,20,ATGATGGTTGTCATCATTATGATGTGGCTTCTATGGATGTTGATGC...,TTATGATGTGGCTTCTATGGATGTTGATGCTGGTGCTCAATAACGC...,GCATCAACATCCATAGAAGCCACATCATAATGATGACAACCATCAT...
80,SP_659,MVRLNSITALASLLAVSLTAA,3.640410,21,ATGGCCTTGCTAAATTGGATTGCAGCTGCGGCTGCAATGCTTCTGA...,TTGCAGCTGCGGCTGCAATGCTTCTGATCTGGATGAACCTCGCCGC...,AGATCAGAAGCATTGCAGCCGCAGCTGCAATCCAATTTAGCAAGGC...
81,SP_576,MILSYRSLVVTTVALLGLSTPATA,3.873739,24,ATGATTACCTGGTATATGATCATGGTTACGACCACTTACGCTGCAG...,CGACCACTTACGCTGCAGTGTGGGCGGTCGCCGCCACAGCAGCTGC...,CGACCGCCCACACTGCAGCGTAAGTGGTCGTAACCATGATCATATA...
82,SP_560,MVLLKPLLATALLASLTPAVA,3.045672,21,ATGGCAGCAGCCGTGAAGTTGCTCGCTAAACTGCTACTTGCCTGGA...,TGCTCGCTAAACTGCTACTTGCCTGGAAGGTCATGGCGGTTGCTGC...,CCTTCCAGGCAAGTAGCAGTTTAGCGAGCAACTTCACGGCTGCTGC...


For IDT:

In [101]:
!pip install xlrd

Collecting xlrd
  Using cached xlrd-2.0.1-py2.py3-none-any.whl (96 kB)
Installing collected packages: xlrd
Successfully installed xlrd-2.0.1


In [115]:
fwd_name = [seq.name for seq in forward_ovh_list]
fwd_seq = [str(seq.seq) for seq in forward_ovh_list]

rev_name = [seq.name for seq in reverse_ovh_list]
rev_seq = [str(seq.seq) for seq in reverse_ovh_list]


In [113]:
forward_primers_well_plate = pd.read_excel('../data/example_plate-file-upload (1).xls')[:84]
forward_primers_well_plate['Name'] = fwd_name
forward_primers_well_plate['Sequence'] = fwd_seq

forward_primers_well_plate


Unnamed: 0,Well Position,Name,Sequence
0,A1,SP_997_fwd,TCATGGCTATCGCCTGGATTACCTTTGCGGCAACTACACATTGGGC...
1,A2,SP_944_fwd,TAAAGATCATTATGATGATTATCTTTAAGGCATTCGCTGCCGCGGC...
2,A3,SP_948_fwd,CAATCGCCGCTGCTAAGAAGGCCTTCTTTATTGCGGCCAATTGGGC...
3,A4,SP_993_fwd,TTATAACCAAGATCATCGCCACTAAGGCAGCGATGACGCAAACCGC...
4,A5,SP_204_fwd,TCATTATGGCTATCAGCTTCGCGGCAAGTTCCTCTATATCGATGGC...
...,...,...,...
79,G8,SP_952_fwd,TTATGATGTGGCTTCTATGGATGTTGATGCTGGTGCTCAATAACGC...
80,G9,SP_659_fwd,TTGCAGCTGCGGCTGCAATGCTTCTGATCTGGATGAACCTCGCCGC...
81,G10,SP_576_fwd,CGACCACTTACGCTGCAGTGTGGGCGGTCGCCGCCACAGCAGCTGC...
82,G11,SP_560_fwd,TGCTCGCTAAACTGCTACTTGCCTGGAAGGTCATGGCGGTTGCTGC...


In [116]:
reverse_primers_well_plate = pd.read_excel('../data/example_plate-file-upload (1).xls')[:84]
reverse_primers_well_plate['Name'] = rev_name
reverse_primers_well_plate['Sequence'] = rev_seq

reverse_primers_well_plate

Unnamed: 0,Well Position,Name,Sequence
0,A1,SP_997_rev,CCGCAAAGGTAATCCAGGCGATAGCCATGAACGTGAAGTGCATGGT...
1,A2,SP_944_rev,CCTTAAAGATAATCATCATAATGATCTTTATGAAGAAAATGATGAT...
2,A3,SP_948_rev,TAAAGAAGGCCTTCTTAGCAGCGGCGATTGCAGCTGCCATGAACAT...
3,A4,SP_993_rev,CTGCCTTAGTGGCGATGATCTTGGTTATAATTGTCTGAGCTTTCAT...
4,A5,SP_204_rev,AACTTGCCGCGAAGCTGATAGCCATAATGAAGATGGCTGAAAACCA...
...,...,...,...
79,G8,SP_952_rev,GCATCAACATCCATAGAAGCCACATCATAATGATGACAACCATCAT...
80,G9,SP_659_rev,AGATCAGAAGCATTGCAGCCGCAGCTGCAATCCAATTTAGCAAGGC...
81,G10,SP_576_rev,CGACCGCCCACACTGCAGCGTAAGTGGTCGTAACCATGATCATATA...
82,G11,SP_560_rev,CCTTCCAGGCAAGTAGCAGTTTAGCGAGCAACTTCACGGCTGCTGC...
