# Sample MERS viral sequence

In [1]:
# sourced from https://www.ncbi.nlm.nih.gov/nuccore/KF745068.1
with open('MERS_sequence.txt', 'r') as input:
    mers_sequence = input.read()
    
print(mers_sequence)

ttgtggcatt aatttgcctg ctcatctagg cagtggacat atgctcaaca ctgggtataa
       61 ttctaattga atactatttt tcagttagag cgtcgtgtct cttgtacgtc tcggtcacaa
      121 tacacggttt cgtccggtgc gtggcaattc ggggcacatc atgtctttcg tggctggtgt
      181 gaccgcgcaa ggtgcgcgcg gtacgtatcg agcagcgctc aactctgaaa aacatcaaga
      241 ccatgtgtct ctaactgtgc cactctgtgg ttcaggaaac ctggttgaaa aactttcacc
      301 atggttcatg gatggcgaaa atgcctatga agtggtgaag gccatgttac ttaaaaagga
      361 gccacttctc tatgtgccca tccggctggc tggacacact agacacctcc caggtcctcg
      421 tgtatacctg gttgagaggc tcattgcttg tgaaaatcca ttcatggtta accaattggc
      481 ttatagctct agtgcaaatg gcagcttggt tggcacaact ttgcagggca agcctattgg
      541 tatgttcttc ccttatgaca tcgaacttgt cacaggaaag caaaatattc tcctgcgcaa
      601 gtatggccgt ggtggttatc actacacccc attccactat gagcgagaca acacctcttg
      661 ccctgagtgg atggacgatt ttgaggcgga tcctaaaggc aaatatgccc agaatctgct
      721 taagaagttg attggcggtg atgtcactcc agttgaccaa tacatgtgtg gcgttgatgg
      781 aaaacccatt a

# Investigating a Sequence String

## Transcription 

In [7]:
%run ../../codons/genes.py

cd = Codons()
%time cd.transcribe(mers_sequence)
cd.export()

The sequence is transcribed.
Wall time: 1 ms


##  Translation

In [24]:
%run ../../codons/genes.py
from pprint import pprint

cd = Codons()
%time cd.translate(mers_sequence)
# cd.export()
# pprint(cd.genes)

{'protein': {'sequence': 'MLNTGYNSN', 'mass': 1157.2049}, 'codons': ['atg', 'ctc', 'aac', 'act', 'ggg', 'tat', 'aat', 'tct', 'aat', 'tga']}
{'protein': {'sequence': 'MSFVAGVTAQGARGTYRAALNSEKHQDHVSLTVPLCGSGNLVEKLSPWFMDGENAYEVVKAMLLKKEPLLYVPIRLAGHTRHLPGPRVYLVERLIACENPFMVNQLAYSSSANGSLVGTTLQGKPIGMFFPYDIELVTGKQNILLRKYGRGGYHYTPFHYERDNTSCPEWMDDFEADPKGKYAQNLLKKLIGGDVTPVDQYMCGVDGKPISAYAFLMAKDGITKLADVEADVAARADDEGFITLKNNLYRLVWHVERKDVPYPKQSIFTINSVVQKDGVENTPPHYFTLGCKILTLTPRNKWSGVSDLSLKQKLLYTFYGKESLENPTYIYHSAFIECGSCGNDSWLTGNAIQGFACGCGASYTANDVEVQSSGMIKPNALLCATCPFAKGDSCSSNCKHSVAQLVSYLSERCNVIADSKSFTLIFGGVAYAYFGCEEGTMYFVPRAKSVVSRIGDSIFTGCTGSWNKVTQIANMFLEQTQHSLNFVGEFVVNDVVLAILSGTTTNVDKIRQLLKGVTLDKLRDYLADYDVAVTAGPFMDNAINVGGTGLQYAAITAPYVVLTGLGESFKKVATIPYKVCNSVKDTLTYYAHSVLYRVFPYDMDSGVSSFSELLFDCVDLSVASTYFLVRLLQDKTGDFMSTIITSCQTAVSKLLDTCFEATEATFNFLLDLAGLFRIFLRNAYVYTSQGFVVVNGKVSTLVKQVLDLLNKGMQLLHTKVSWAGSNISAVIYSGRESLIFPSGTYYCVTTKAKSVQQDLDVILPGEFSKKQLGLLQPTDNSTTVSVTVSSNMVETVVGQLEQTNMHSPDVIVGDYVIISEKLFVRSKEEDGFAF

## BLASTp interpretation

In [8]:
%run ../../codons/genes.py

# BLAST parsing of each created protein
cd = Codons()
cd.translate(mers_sequence)
%time cd.blast_protein() 

>Protein - 8residues - 1124.3123amu - 
VHGWRKCL
>Protein - 15residues - 2154.24672amu - 
QNFNAHPTQQVEWRF
>Protein - 6residues - 827.91874amu - 
WGIIYS
>Protein - 8residues - 1104.23148amu - 
GRYYVFCA
>Protein - 1residues - 165.18914amu - 
F
>Protein - 3residues - 393.51876amu - 
LLL
>Protein - 3residues - 352.38402amu - 
LQG
>Protein - 25residues - 3326.59382amu - 
SCVFGFTHDTAVNNRSLSDCRWCKF
>Protein - 3residues - 470.56302amu - 
RFL
>Protein - 35residues - 4661.39614amu - 
LFCTHGLERVVQCLWHKRCCSTRLKSLLLRGCANC
>Protein - 4residues - 640.72716amu - 
PVWW
>Protein - 3residues - 386.4433amu - 
GFK
>Protein - 21residues - 2653.0669amu - 
HNYGIVVFCVSLVCLQSGLIK
>Protein - 4residues - 482.61194amu - 
LALL
>Protein - 8residues - 1007.19758amu - 
CLLFSSLV
>Protein - 1residues - 115.13046amu - 
P
>Protein - 1residues - 117.14634amu - 
V
>Protein - 2residues - 222.23892amu - 
VS
>Protein - 38residues - 5036.4143amu - 
PIDSCSNCWSCWCSHSRRTYYIGLGEQSDNFLCFSSLC
>Protein - 11residues - 1490.62864amu - 
D

## BLASTn interpretation

In [None]:
%run ../../codons/genes.py

# BLAST parsing of the genetic sequence
cd = Codons()
%time cd.blast_nucleotide(mers_sequence) 

The database search for the parameterized genetic sequence will complete circa 2022-02-09 05:50:34.836081, in 5.260277777777778 hours.
Section 1/38 is completed: 2022-02-09 00:37:59.367702
Section 2/38 is completed: 2022-02-09 00:43:19.409100
Section 3/38 is completed: 2022-02-09 00:48:39.383461
Section 4/38 is completed: 2022-02-09 00:55:59.374128
Section 5/38 is completed: 2022-02-09 02:17:20.556796
Section 6/38 is completed: 2022-02-09 02:20:39.818836
Section 7/38 is completed: 2022-02-09 04:07:00.636201
Section 8/38 is completed: 2022-02-09 04:11:20.515762
Section 9/38 is completed: 2022-02-09 05:56:39.826531
Section 10/38 is completed: 2022-02-09 08:15:00.756007
Section 11/38 is completed: 2022-02-09 08:26:20.733457
Section 12/38 is completed: 2022-02-09 11:40:40.720158
Section 13/38 is completed: 2022-02-09 11:44:00.549436
Section 14/38 is completed: 2022-02-09 13:18:20.724362
Section 15/38 is completed: 2022-02-09 13:21:39.830273
Section 16/38 is completed: 2022-02-09 13:26:00.6