## Retrive the sequence record form the nucleotide database
<hr>

In [1]:
from Bio import Entrez, SeqIO
Entrez.email = 'varunsendil2003@gmail.com'

In [2]:
hdl= Entrez.efetch(db = 'nucleotide', id=['NM_002299'], rettype= 'fasta')
seq = SeqIO.read(hdl, 'fasta')

In [3]:
seq

SeqRecord(seq=Seq('AACAGTTCCTAGAAAATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTA...GTC', SingleLetterAlphabet()), id='NM_002299.4', name='NM_002299.4', description='NM_002299.4 Homo sapiens lactase (LCT), mRNA', dbxrefs=[])

### Save the dasta file that we retrived

In [4]:
w_hdl = open('example.fasta', 'w')
w_seq = seq[11:5795]
SeqIO.write([w_seq], w_hdl, 'fasta')
w_hdl.close()


In [5]:
recs = SeqIO.parse('example.fasta', 'fasta')
recs

<generator object parse at 0x7fb5e0290620>

In [6]:
for rec in recs:
    seq = rec.seq
    print(rec.description)
    print(seq[:10])
    print(seq.alphabet)

NM_002299.4 Homo sapiens lactase (LCT), mRNA
GAAAATGGAG
SingleLetterAlphabet()


In [7]:
seq.alphabet

SingleLetterAlphabet()

#### since we already know this is a dna alphabet its it better to redifine our sequence as that part of the alphabet to help out with that

In [13]:
from Bio import Seq
from Bio.Alphabet import IUPAC
seq = Seq.Seq(str(seq), IUPAC.unambiguous_dna)

In [14]:
dna

Seq('GAAAATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATG...ATT', IUPACUnambiguousDNA())

#### Translate the DNA to RNA

In [15]:
rna = seq.transcribe()
print(rna)

GAAAAUGGAGCUGUCUUGGCAUGUAGUCUUUAUUGCCCUGCUAAGUUUUUCAUGCUGGGGGUCAGACUGGGAGUCUGAUAGAAAUUUCAUUUCCACCGCUGGUCCUCUAACCAAUGACUUGCUGCACAACCUGAGUGGUCUCCUGGGAGACCAGAGUUCUAACUUUGUAGCAGGGGACAAAGACAUGUAUGUUUGUCACCAGCCACUGCCCACUUUCCUGCCAGAAUACUUCAGCAGUCUCCAUGCCAGUCAGAUCACCCAUUAUAAGGUAUUUCUGUCAUGGGCACAGCUCCUCCCAGCAGGAAGCACCCAGAAUCCAGACGAGAAAACAGUGCAGUGCUACCGGCGACUCCUCAAGGCCCUCAAGACUGCACGGCUUCAGCCCAUGGUCAUCCUGCACCACCAGACCCUCCCUGCCAGCACCCUCCGGAGAACCGAAGCCUUUGCUGACCUCUUCGCCGACUAUGCCACAUUCGCCUUCCACUCCUUCGGGGACCUAGUUGGGAUCUGGUUCACCUUCAGUGACUUGGAGGAAGUGAUCAAGGAGCUUCCCCACCAGGAAUCAAGAGCGUCACAACUCCAGACCCUCAGUGAUGCCCACAGAAAAGCCUAUGAGAUUUACCACGAAAGCUAUGCUUUUCAGGGCGGAAAACUCUCUGUUGUCCUGCGAGCUGAAGAUAUCCCGGAGCUCCUGCUAGAACCACCCAUAUCUGCGCUUGCCCAGGACACGGUCGAUUUCCUCUCUCUUGAUUUGUCUUAUGAAUGCCAAAAUGAGGCAAGUCUGCGGCAGAAGCUGAGUAAAUUGCAGACCAUUGAGCCAAAAGUGAAAGUUUUCAUCUUCAACCUAAAACUCCCAGACUGCCCCUCCACCAUGAAGAACCCAGCCAGUCUGCUCUUCAGCCUUUUUGAAGCCAUAAAUAAAGACCAAGUGCUCACCAUUGGGUUUGAUAUUAAUGAGUUUCUGAGUUGUUCAUCAAGUUCCAAGAAAAGCAUGUCUUGU

In [19]:
prot = seq.translate(stop_symbol="*")
print(prot)


ENGAVLACSLYCPAKFFMLGVRLGV**KFHFHRWSSNQ*LAAQPEWSPGRPEF*LCSRGQRHVCLSPATAHFPARILQQSPCQSDHPL*GISVMGTAPPSRKHPESRRENSAVLPATPQGPQDCTASAHGHPAPPDPPCQHPPENRSLC*PLRRLCHIRLPLLRGPSWDLVHLQ*LGGSDQGASPPGIKSVTTPDPQ*CPQKSL*DLPRKLCFSGRKTLCCPAS*RYPGAPARTTHICACPGHGRFPLS*FVL*MPK*GKSAAEAE*IADH*AKSESFHLQPKTPRLPLHHEEPSQSALQPF*SHK*RPSAHHWV*Y**VSELFIKFQEKHVLFSDWQPGPSA*PAAGPRDHGLLSCLCLSENLGSICQSVQGGKGCLPAGYFP*RLPLGCLHRSL*RGRRLGRGWERGEHLGSTQAPEHH*GPSDAGGGQRQLPQGSL*RRPALRPPGSGVQVLHLLVPDLPHGAREQPQPPRRCLLQQAD*QATGCGHRAHGHAVPLGPASGPAGSWWMAE*ERGGCLPGLCGLLLLHIWGPCEAVGDLP*AVGDELRRLWHRPAPSRHL*PRSGLF*GGSLGPQGSCQNLAPLQQPSSPTAAGARGHCAELRLGRTPVSREA*GPESL*ALLALHAGLVCTPRLCGWRLPSHPEDPDPTDEQTVLPSCGSTPRVHRGREAAPERLC*FSGSVALHLPPHQQRPTKHLHP*L*YHWRLLPTREPCVAPDLILLDSCGALGDKEAVAVCIPGIHKRKSSNIPCREWHAHRGK*KSL**FLKSRLLQSIYQ*GAQGYQGRLCGCSFLHCSFPH*WLRRPFWLQPAVWPAPRQLQRQQQVKDSQEICLLFH*HHRKERFPHQGGKKTATT*YSKPPLQSQSLHFSI*GALQG*SRLGKVLQPTQVRKRFVLPRDVSG*LSVGRVLFRLSD*RRVGCRWQRPQHLG*LYPHTREQCERQCHWRHRL*QLSPAGCRSEYAPSFEGEGLPLLYLLVSDFPNWEKQLYQQSWG*L