# Mutagenesis primer design

The goal here is to design primers for mutagenesis on PB2.

The specific PB2 sequence that we are going to use is `A/Victoria/361/2011`.

In [1]:
%load_ext autoreload
%autoreload 2

from Bio import SeqIO

In [2]:
def report(seqrecord):
    print(seqrecord)
    print('Length: {0} n.t.'.format(len(seqrecord.seq)))

Load the Victoria PB2 sequence:

In [3]:
vicpb2 = SeqIO.read('../../data/victoria-pb2.fasta', 'fasta')
report(vicpb2)

ID: A/Victoria/361/2011|KJ942687
Name: A/Victoria/361/2011|KJ942687
Description: A/Victoria/361/2011|KJ942687
Number of features: 0
Seq('AGCAAAAGCAGGTCAATTATATTCAGTATGGAAAGAATAAAAGAACTACGGAAT...TAC', SingleLetterAlphabet())
Length: 2340 n.t.


Load the pCI sequence:

In [4]:
pCI = SeqIO.read('../../data/pCI.fasta', 'fasta')
report(pCI)

ID: pCI
Name: pCI
Description: pCI
Number of features: 0
Seq('ACGCGTGGTACCTCTAGAGTCGACCCGGGCGGCCGCTTCGAGCAGACATGATAA...GAG', SingleLetterAlphabet())
Length: 4001 n.t.


Define a split at the 1/3 and 2/3 mark of the Victoria PB2 and pCI sequences. This is a backup measure just in case full amplification does not work.

In [5]:
def splits(seqrecord, frac1):
    return round(len(seqrecord.seq) * frac1)
vicpb2_split1 = splits(vicpb2, 1/3)
vicpb2_split1

vicpb2_split2 = splits(vicpb2, 2/3)
vicpb2_split2

1560

In [6]:
pCI_split1 = splits(pCI, 1/3)
pCI_split1

pCI_split2 = splits(pCI, 2/3)
pCI_split2

2667

In [7]:
from Bio.SeqRecord import SeqRecord

def split_seqrecord(seqrecord, split_location):
    split1 = SeqRecord(seqrecord.seq[0:split_location], id='{0}_part1'.format(seqrecord.id))
    split2 = SeqRecord(seqrecord.seq[split_location:], id='{0}_part2'.format(seqrecord.id))
    
    return split1, split2
    
    
vicpb2_split1_part1, vicpb2_split1_part2 = split_seqrecord(vicpb2, vicpb2_split1)
vicpb2_split1_part1, vicpb2_split1_part2

(SeqRecord(seq=Seq('AGCAAAAGCAGGTCAATTATATTCAGTATGGAAAGAATAAAAGAACTACGGAAT...AGG', SingleLetterAlphabet()), id='A/Victoria/361/2011|KJ942687_part1', name='<unknown name>', description='<unknown description>', dbxrefs=[]),
 SeqRecord(seq=Seq('AATGACGATGTTGACCAAAGCCTAATTATTGCGGCCAGGAACATAGTAAGAAGA...TAC', SingleLetterAlphabet()), id='A/Victoria/361/2011|KJ942687_part2', name='<unknown name>', description='<unknown description>', dbxrefs=[]))

In [8]:
vicpb2_split2_part1, vicpb2_split2_part2 = split_seqrecord(vicpb2, vicpb2_split2)
vicpb2_split2_part1, vicpb2_split2_part2

(SeqRecord(seq=Seq('AGCAAAAGCAGGTCAATTATATTCAGTATGGAAAGAATAAAAGAACTACGGAAT...GTA', SingleLetterAlphabet()), id='A/Victoria/361/2011|KJ942687_part1', name='<unknown name>', description='<unknown description>', dbxrefs=[]),
 SeqRecord(seq=Seq('TTATTATCTCCTGAGGAGGTCAGTGAAACACAGGGAACTGAGAGACTGACAATA...TAC', SingleLetterAlphabet()), id='A/Victoria/361/2011|KJ942687_part2', name='<unknown name>', description='<unknown description>', dbxrefs=[]))

In [9]:
pCI_split1_part1, pCI_split1_part2 = split_seqrecord(pCI, pCI_split1)
pCI_split1_part1, pCI_split1_part2

(SeqRecord(seq=Seq('ACGCGTGGTACCTCTAGAGTCGACCCGGGCGGCCGCTTCGAGCAGACATGATAA...GAA', SingleLetterAlphabet()), id='pCI_part1', name='<unknown name>', description='<unknown description>', dbxrefs=[]),
 SeqRecord(seq=Seq('AGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGA...GAG', SingleLetterAlphabet()), id='pCI_part2', name='<unknown name>', description='<unknown description>', dbxrefs=[]))

In [10]:
pCI_split2_part1, pCI_split2_part2 = split_seqrecord(pCI, pCI_split2)
pCI_split2_part1, pCI_split2_part2

(SeqRecord(seq=Seq('ACGCGTGGTACCTCTAGAGTCGACCCGGGCGGCCGCTTCGAGCAGACATGATAA...GAG', SingleLetterAlphabet()), id='pCI_part1', name='<unknown name>', description='<unknown description>', dbxrefs=[]),
 SeqRecord(seq=Seq('CTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTA...GAG', SingleLetterAlphabet()), id='pCI_part2', name='<unknown name>', description='<unknown description>', dbxrefs=[]))

Steps:
1. Compute primers for full amplification.
2. Compute primers for amplification using split PB2 gene.
3. Compute primers for amplification using split pCI backbone.

In [11]:
from mbtools.assembly import GibsonAssembler

In [12]:
# Assembly 1: Scenario where VicPB2 and pCI are amplified full-length.
assembly1 = GibsonAssembler([vicpb2, pCI])
for part, product in assembly1.pcr_products().items():
    print(part, len(product))

A/Victoria/361/2011|KJ942687 2366
pCI 4027




In [13]:
# Assembly 2: Scenario where VicPB2 is split into two parts at the 1/3 point.
assembly2 = GibsonAssembler([vicpb2_split1_part1, vicpb2_split1_part2, pCI])
for part, product in assembly2.pcr_products().items():
    print(part, len(product))

A/Victoria/361/2011|KJ942687_part1 806
pCI 4027
A/Victoria/361/2011|KJ942687_part2 1586




In [14]:
# Assembly 3: Scenario where VicPB2 and pCI are split each at their 1/3 points.
assembly3 = GibsonAssembler([vicpb2_split1_part1, vicpb2_split1_part2, pCI_split1_part1, pCI_split1_part2])
for part, product in assembly3.pcr_products().items():
    print(part, len(product))



A/Victoria/361/2011|KJ942687_part1 806
pCI_part2 2693
pCI_part1 1360
A/Victoria/361/2011|KJ942687_part2 1586


In [15]:
# Assembly 4: Scenario where VicPB2 and pCI are split each at their 2/3 points.
assembly4 = GibsonAssembler([vicpb2_split2_part1, vicpb2_split2_part2, pCI_split2_part1, pCI_split2_part2])
for part, product in assembly4.pcr_products().items():
    print(part, len(product))

pCI_part2 1360
A/Victoria/361/2011|KJ942687_part1 1586
pCI_part1 2693
A/Victoria/361/2011|KJ942687_part2 806




In [16]:
assembly4_primers = []
for part, primers in assembly4.primers().items():
    for primer, sequence in primers.items():
        prec = dict()
        prec['part'] = part
        prec['primer'] = primer
        prec['sequence'] = str(sequence)
        assembly4_primers.append(prec)

Check the primers into the database.

In [17]:
from tinydb import TinyDB, Query

primerdb = TinyDB('../../data/primers.db.json')

In [18]:
for primer in assembly4_primers:
    p = Query()
    if not primerdb.contains(p.sequence == primer['sequence']):
        primerdb.insert(primer)
        print('Inserting ', primer)

In [19]:
len(primerdb)

16

Export the primers to be ordered.

In [20]:
import pandas as pd
primers_to_order = primerdb.all()
for p in primers_to_order:
    eid = primerdb.get(Query().sequence == p['sequence']).eid
    p['primer_id'] = 'EM-{0}'.format(eid)
pd.DataFrame(primers_to_order).to_csv('primers.csv')

In this cloning reaction, there will be a backbone that needs to be amplified using Phusion (not mutated), and an insert that needs to be amplified using the Agilent GeneMorph II enzyme. I will compute separate PCR protocols here.

In [21]:
# First, I will set up the Agilent amplification conditions.

from mbtools.pcr import input_plasmid_mass, input_volume, num_cycles

import math

mut_freq = 'low'

mut_freq_mass = dict(low=500, med=100, high=50)
mut_freq_fold = dict(low=5, med=100, high=5000)

target_length = len(vicpb2)
plasmid_length = len(vicpb2) + len(pCI)
target_mass = mut_freq_mass['low']

plasmid_mass = input_plasmid_mass(target_length, plasmid_length, target_mass)

plasmid_conc = 416  # in units ng/µL
volume = input_volume(plasmid_mass, plasmid_conc)

fold_amp = mut_freq_fold[mut_freq]
num_cycles = num_cycles(fold_amp)
cycle_time = math.ceil(target_length / 1000)

In [22]:
cycle_time  # in minutes

3

In [23]:
num_cycles  # an integer

3

In [24]:
fold_amp 

5

In [25]:
volume

3.2570060815253123

In [26]:
# Get primers out from each of the reactions.
a1_primers = assembly1.primers()
a1_primers

defaultdict(dict,
            {'A/Victoria/361/2011|KJ942687': {'3p_sequencing': Seq('GTGTTGGTAATGAAACGAAA', SingleLetterAlphabet()),
              '5p_sequencing': Seq('AATTATGGCCATATGGTCCA', SingleLetterAlphabet()),
              'fw_gibson': Seq('GGCTAGCCTCGAGAGCAAAAGCAGGTCAATTATATTCAGT', SingleLetterAlphabet()),
              're_gibson': Seq('AGGTACCACGCGTGTAGAAACAAGGTCGTTTTTAAACTAT', SingleLetterAlphabet())},
             'pCI': {'3p_sequencing': Seq('GCACCTATTGGTCTTACTGA', SingleLetterAlphabet()),
              '5p_sequencing': Seq('TTTCACAAATAAAGCATTTT', SingleLetterAlphabet()),
              'fw_gibson': Seq('ACCTTGTTTCTACACGCGTGGTACCTCTAGAGTCGACCCG', SingleLetterAlphabet()),
              're_gibson': Seq('ACCTGCTTTTGCTCTCGAGGCTAGCCTATAGTGAGTCGTA', SingleLetterAlphabet())}})

In [27]:
# Match up the Fw_gibson and Re_gibson primers for each template.
len(a1_primers['pCI']['fw_gibson'])

40

In [34]:
def pprint_primers(primer_set):
    for tmplt, prmrs in primer_set.items():
        print(tmplt)
        for prmr, sqnc in prmrs.items():
            if 'gibson' in prmr:
                # print(prmr)
                p = Query()
                eid = primerdb.get(p.sequence == str(sqnc)).eid
                print('  Primer: {0}'.format(prmr))
                print('  Primer ID:  EM-{0}'.format(eid))
                print('  Tm:         {0:.1f}ºC'.format(Tm_NN(sqnc)))

In [35]:
from Bio.SeqUtils.MeltingTemp import Tm_Wallace, Tm_staluc, Tm_NN
pprint_primers(a1_primers)

A/Victoria/361/2011|KJ942687
  Primer: re_gibson
  Primer ID:  EM-2
  Tm:         63.8ºC
  Primer: fw_gibson
  Primer ID:  EM-11
  Tm:         64.9ºC
pCI
  Primer: re_gibson
  Primer ID:  EM-6
  Tm:         67.1ºC
  Primer: fw_gibson
  Primer ID:  EM-15
  Tm:         68.7ºC


We will amplify Victoria PB2 using EM-2 and EM-11 on the mutagenesis kit, and amplify pCI using EM-6 and EM-15 using Phusion GC master-mix.

In [36]:
# The PCR protocol for pCI is dependent on the length of the PCR product.

bases_per_min = 2000

len(assembly1.pcr_products()['pCI']) / bases_per_min

2.0135

In [37]:
# Get primers out from each of the reactions.
a4_primers = assembly4.primers()
a4_primers

defaultdict(dict,
            {'A/Victoria/361/2011|KJ942687_part1': {'3p_sequencing': Seq('GAGATGTCAATGAGAGGAAT', SingleLetterAlphabet()),
              '5p_sequencing': Seq('AATTATGGCCATATGGTCCA', SingleLetterAlphabet()),
              'fw_gibson': Seq('GGCTAGCCTCGAGAGCAAAAGCAGGTCAATTATATTCAGT', SingleLetterAlphabet()),
              're_gibson': Seq('CAGGAGATAATAATACATTCCCACGTTGGTCTCGAACTCT', SingleLetterAlphabet())},
             'A/Victoria/361/2011|KJ942687_part2': {'3p_sequencing': Seq('GTGTTGGTAATGAAACGAAA', SingleLetterAlphabet()),
              '5p_sequencing': Seq('TTGATAGGTATTGACCAAAA', SingleLetterAlphabet()),
              'fw_gibson': Seq('ACGTGGGAATGTATTATTATCTCCTGAGGAGGTCAGTGAA', SingleLetterAlphabet()),
              're_gibson': Seq('AGGTACCACGCGTGTAGAAACAAGGTCGTTTTTAAACTAT', SingleLetterAlphabet())},
             'pCI_part1': {'3p_sequencing': Seq('TTGGACTCAAGACGATAGTT', SingleLetterAlphabet()),
              '5p_sequencing': Seq('TTTCACAAATAAAGCATTTT', SingleLetter

In [38]:
pprint_primers(a4_primers)

pCI_part2
  Primer: re_gibson
  Primer ID:  EM-6
  Tm:         67.1ºC
  Primer: fw_gibson
  Primer ID:  EM-7
  Tm:         71.0ºC
A/Victoria/361/2011|KJ942687_part1
  Primer: re_gibson
  Primer ID:  EM-10
  Tm:         63.0ºC
  Primer: fw_gibson
  Primer ID:  EM-11
  Tm:         64.9ºC
pCI_part1
  Primer: re_gibson
  Primer ID:  EM-14
  Tm:         66.7ºC
  Primer: fw_gibson
  Primer ID:  EM-15
  Tm:         68.7ºC
A/Victoria/361/2011|KJ942687_part2
  Primer: re_gibson
  Primer ID:  EM-2
  Tm:         63.8ºC
  Primer: fw_gibson
  Primer ID:  EM-3
  Tm:         63.4ºC


In [39]:
for part, seq in assembly4.pcr_products().items():
    print(part, len(seq))

pCI_part2 1360
A/Victoria/361/2011|KJ942687_part1 1586
pCI_part1 2693
A/Victoria/361/2011|KJ942687_part2 806
