# Mutagenesis primer design

The goal here is to design primers for mutagenesis on PB2.

The specific PB2 sequence that we are going to use is `A/Victoria/361/2011`.

In [24]:
%load_ext autoreload
%autoreload 2

from Bio import SeqIO

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
def report(seqrecord):
    print(seqrecord)
    print('Length: {0} n.t.'.format(len(seqrecord.seq)))

Load the Victoria PB2 sequence:

In [26]:
vicpb2 = SeqIO.read('../../data/victoria-pb2.fasta', 'fasta')
report(vicpb2)

ID: A/Victoria/361/2011|KJ942687
Name: A/Victoria/361/2011|KJ942687
Description: A/Victoria/361/2011|KJ942687
Number of features: 0
Seq('AGCAAAAGCAGGTCAATTATATTCAGTATGGAAAGAATAAAAGAACTACGGAAT...TAC', SingleLetterAlphabet())
Length: 2340 n.t.


Load the pCI sequence:

In [27]:
pCI = SeqIO.read('../../data/pCI.fasta', 'fasta')
report(pCI)

ID: pCI
Name: pCI
Description: pCI
Number of features: 0
Seq('ACGCGTGGTACCTCTAGAGTCGACCCGGGCGGCCGCTTCGAGCAGACATGATAA...GAG', SingleLetterAlphabet())
Length: 4001 n.t.


Define a split at the 1/3 and 2/3 mark of the Victoria PB2 and pCI sequences. This is a backup measure just in case full amplification does not work.

In [28]:
def splits(seqrecord, frac1):
    return round(len(seqrecord.seq) * frac1)
vicpb2_split1 = splits(vicpb2, 1/3)
vicpb2_split1

vicpb2_split2 = splits(vicpb2, 2/3)
vicpb2_split2

1560

In [29]:
pCI_split1 = splits(pCI, 1/3)
pCI_split1

pCI_split2 = splits(pCI, 2/3)
pCI_split2

2667

In [30]:
from Bio.SeqRecord import SeqRecord

def split_seqrecord(seqrecord, split_location):
    split1 = SeqRecord(seqrecord.seq[0:split_location], id='{0}_part1'.format(seqrecord.id))
    split2 = SeqRecord(seqrecord.seq[split_location:], id='{0}_part2'.format(seqrecord.id))
    
    return split1, split2
    
    
vicpb2_split1_part1, vicpb2_split1_part2 = split_seqrecord(vicpb2, vicpb2_split1)
vicpb2_split1_part1, vicpb2_split1_part2

(SeqRecord(seq=Seq('AGCAAAAGCAGGTCAATTATATTCAGTATGGAAAGAATAAAAGAACTACGGAAT...AGG', SingleLetterAlphabet()), id='A/Victoria/361/2011|KJ942687_part1', name='<unknown name>', description='<unknown description>', dbxrefs=[]),
 SeqRecord(seq=Seq('AATGACGATGTTGACCAAAGCCTAATTATTGCGGCCAGGAACATAGTAAGAAGA...TAC', SingleLetterAlphabet()), id='A/Victoria/361/2011|KJ942687_part2', name='<unknown name>', description='<unknown description>', dbxrefs=[]))

In [31]:
vicpb2_split2_part1, vicpb2_split2_part2 = split_seqrecord(vicpb2, vicpb2_split2)
vicpb2_split2_part1, vicpb2_split2_part2

(SeqRecord(seq=Seq('AGCAAAAGCAGGTCAATTATATTCAGTATGGAAAGAATAAAAGAACTACGGAAT...GTA', SingleLetterAlphabet()), id='A/Victoria/361/2011|KJ942687_part1', name='<unknown name>', description='<unknown description>', dbxrefs=[]),
 SeqRecord(seq=Seq('TTATTATCTCCTGAGGAGGTCAGTGAAACACAGGGAACTGAGAGACTGACAATA...TAC', SingleLetterAlphabet()), id='A/Victoria/361/2011|KJ942687_part2', name='<unknown name>', description='<unknown description>', dbxrefs=[]))

In [32]:
pCI_split1_part1, pCI_split1_part2 = split_seqrecord(pCI, pCI_split1)
pCI_split1_part1, pCI_split1_part2

(SeqRecord(seq=Seq('ACGCGTGGTACCTCTAGAGTCGACCCGGGCGGCCGCTTCGAGCAGACATGATAA...GAA', SingleLetterAlphabet()), id='pCI_part1', name='<unknown name>', description='<unknown description>', dbxrefs=[]),
 SeqRecord(seq=Seq('AGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGA...GAG', SingleLetterAlphabet()), id='pCI_part2', name='<unknown name>', description='<unknown description>', dbxrefs=[]))

In [33]:
pCI_split2_part1, pCI_split2_part2 = split_seqrecord(pCI, pCI_split2)
pCI_split2_part1, pCI_split2_part2

(SeqRecord(seq=Seq('ACGCGTGGTACCTCTAGAGTCGACCCGGGCGGCCGCTTCGAGCAGACATGATAA...GAG', SingleLetterAlphabet()), id='pCI_part1', name='<unknown name>', description='<unknown description>', dbxrefs=[]),
 SeqRecord(seq=Seq('CTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTA...GAG', SingleLetterAlphabet()), id='pCI_part2', name='<unknown name>', description='<unknown description>', dbxrefs=[]))

Steps:
1. Compute primers for full amplification.
2. Compute primers for amplification using split PB2 gene.
3. Compute primers for amplification using split pCI backbone.

In [34]:
from mbtools.assembly import GibsonAssembler

In [35]:
# Assembly 1: Scenario where VicPB2 and pCI are amplified full-length.
assembly1 = GibsonAssembler([vicpb2, pCI])
for part, product in assembly1.pcr_products().items():
    print(part, len(product))

pCI 4027
A/Victoria/361/2011|KJ942687 2366




In [36]:
# Assembly 2: Scenario where VicPB2 is split into two parts at the 1/3 point.
assembly2 = GibsonAssembler([vicpb2_split1_part1, vicpb2_split1_part2, pCI])
for part, product in assembly2.pcr_products().items():
    print(part, len(product))

pCI 4027
A/Victoria/361/2011|KJ942687_part1 806
A/Victoria/361/2011|KJ942687_part2 1586




In [37]:
# Assembly 3: Scenario where VicPB2 and pCI are split each at their 1/3 points.
assembly3 = GibsonAssembler([vicpb2_split1_part1, vicpb2_split1_part2, pCI_split1_part1, pCI_split1_part2])
for part, product in assembly3.pcr_products().items():
    print(part, len(product))

A/Victoria/361/2011|KJ942687_part2 1586
pCI_part2 2693
A/Victoria/361/2011|KJ942687_part1 806
pCI_part1 1360




In [38]:
# Assembly 4: Scenario where VicPB2 and pCI are split each at their 2/3 points.
assembly4 = GibsonAssembler([vicpb2_split2_part1, vicpb2_split2_part2, pCI_split2_part1, pCI_split2_part2])
for part, product in assembly4.pcr_products().items():
    print(part, len(product))

A/Victoria/361/2011|KJ942687_part2 806
pCI_part2 1360
A/Victoria/361/2011|KJ942687_part1 1586
pCI_part1 2693




In [39]:
assembly4_primers = []
for part, primers in assembly4.primers().items():
    for primer, sequence in primers.items():
        prec = dict()
        prec['part'] = part
        prec['primer'] = primer
        prec['sequence'] = str(sequence)
        assembly4_primers.append(prec)

In [40]:
from tinydb import TinyDB, Query

primerdb = TinyDB('../../data/primers.db.json')

In [41]:
for primer in assembly4_primers:
    p = Query()
    if not primerdb.contains(p.sequence == primer['sequence']):
        primerdb.insert(primer)

In [42]:
len(primerdb)

16

In [43]:
import pandas as pd
primers_to_order = primerdb.all()
for p in primers_to_order:
    eid = primerdb.get(Query().sequence == p['sequence']).eid
    p['primer_id'] = 'EM-{0}'.format(eid)
pd.DataFrame(primers_to_order).to_csv('primers.csv')