# July 25th, 2018
## Benjamin Adam Catching
## Andino Lab, Summer Rotation
## Thermostable Zika Virus

First look at the Zika virus genome and the previously created mutants that are heat tolerant.

In [62]:
# Import packages
from Bio import SeqIO
from Bio import Seq
import numpy as np

In [34]:
# Read in the genome
zika_genome = next(SeqIO.parse('data/sequences/zika_genome.fa', 'fasta'))
# Get the sequence of the genome
zika_genome = zika_genome.seq.upper()

In [35]:
# Save the WT of the polyprotein RNA sequence
zika_WT_polyp = zika_genome.transcribe()[106:10378]

In [54]:
# Create a list of potential mutants as lists
thermo_mutants = [list(str(x)) for x in [zika_WT_polyp] * 5]

In [55]:
# Turn mutants to a dictionary (locations are not index-0)
mut_dict = {2177 : 'U',
            885 : 'U',
            1740 : 'C',
            2229 : 'U',
            1200 : 'U'}

In [56]:
mut_dict_keys = mut_dict.__iter__()
for mutant in thermo_mutants:
    loc = next(mut_dict_keys)
    print('Previous nt: %s' % mutant[loc-107])
    mutant[loc-107] = mut_dict[loc]
    print('New nt: %s: ' % mutant[loc-107])

Previous nt: C
New nt: U: 
Previous nt: C
New nt: U: 
Previous nt: U
New nt: C: 
Previous nt: C
New nt: U: 
Previous nt: C
New nt: U: 


In [57]:
# Change list of nucleotides to seq
thermo_mutant_seqs = [Seq.Seq(''.join(x)) for x in thermo_mutants]

In [63]:
# Translate the polyprotein to amino acid sequence
thermo_mutant_polyproteins = [x.translate() for x in thermo_mutant_seqs]
# Translate the polyprotein of the WT
thermo_WT_polyprotein = zika_WT_polyp.translate()

In [73]:
# See where the different amino acids are
for i, mutant in enumerate(thermo_mutant_polyproteins):
    for j, residue in enumerate(thermo_WT_polyprotein):
        mut_residue = mutant[j]
        #print(residue, mut_residue)
        if mut_residue != residue:
            print('Mutant %d: %s%d%s' % (i, residue, j, mut_residue))

Mutant 0: H690Y
Mutant 1: A259V
Mutant 2: V544A
Mutant 3: A707V
Mutant 4: P364L


In [91]:
# Determine the region of the E Protein
end_stop = [977, 2489]
pp_end_stop = [x - 107 for x in end_stop]
end_stop = [int(x / 3)  for x in pp_end_stop]
print(end_stop)

[290, 794]


In [100]:
e_protein_WT = str(thermo_WT_polyprotein)[end_stop[0]:end_stop[1]]
e_protein_WT[:10]

'IRCIGVSNRD'

In [108]:
# Get the mutant version of each E-protein
e_protein_mutants = []
for mut in thermo_mutant_polyproteins:
    if thermo_mutant_polyproteins.index(mut) != 1:
        temp_pro = str(mut)[end_stop[0]:end_stop[1]]
        e_protein_mutants.append(temp_pro)
        print(temp_pro[:10])
print(e_protein_mutants[0][:10])

IRCIGVSNRD
IRCIGVSNRD
IRCIGVSNRD
IRCIGVSNRD
IRCIGVSNRD


In [113]:
# Find the localized mutants in the e protein
for i, mutant in enumerate(e_protein_mutants):
    for j, residue in enumerate(e_protein_WT):
        #print(residue, mutant[j])
        if residue != mutant[j]:
            print('Mutant %d: %s%d%s' % (i+1, residue, j, mutant[j]))

Mutant 1: H400Y
Mutant 2: V254A
Mutant 3: A417V
Mutant 4: P74L


From looking at the locations of the mutations, the positively charged Histidine to hydrophobic Tyrosine may make increase the binding energy between dimers.