In [77]:
from Bio import Entrez, SeqIO, SearchIO

In [78]:
Entrez.email ="isadogan03@gmail.com"
hand = Entrez.efetch(db ="nucleotide" , id= "MN908947" , retmode="gb", rettype = "text")
cov_recs = list(SeqIO.parse(hand , "gb"))
hand.close()
cov_recs

[SeqRecord(seq=Seq('ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGT...AAA'), id='MN908947.3', name='MN908947', description='Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome', dbxrefs=[])]

In [79]:
cov_dna= cov_recs[0].seq
cov_dna

Seq('ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGT...AAA')

In [80]:
print(f"Genome consist of {len(cov_dna)} nuclotides.")

Genome consist of 29903 nuclotides.


In [81]:
cov_mrna = cov_dna.transcribe()
cov_mrna

Seq('AUUAAAGGUUUAUACCUUCCCAGGUAACAAACCAACCAACUUUCGAUCUCUUGU...AAA')

In [82]:
cov_aa = cov_mrna.translate()
cov_aa

Seq('IKGLYLPR*QTNQLSISCRSVL*TNFKICVAVTRLHA*CTHAV*LITNYCR*QD...KKK')

In [127]:
cov_proteins =  cov_aa.split("*")
cov_proteins[:10]

[Seq('IKGLYLPR'),
 Seq('QTNQLSISCRSVL'),
 Seq('TNFKICVAVTRLHA'),
 Seq('CTHAV'),
 Seq('LITNYCR'),
 Seq('QDTSNSSIFCRLLTVSSVLQPIISTSRFRPGVTER'),
 Seq('DGEPCPWFQRENTRPTQFACFTGSRRARTWLWRLRGGGLIRGTSTS'),
 Seq('RWHLWLSRS'),
 Seq('KRRFAST'),
 Seq('TALCVHQTFGCSNCTSWSCYG')]

In [84]:
sorted_proteins = sorted(cov_proteins, key=len)

In [85]:
sorted_proteins[-1]

Seq('CTIVFKRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQ...VNN')

In [86]:
from Bio.Blast import NCBIWWW

In [115]:
result = NCBIWWW.qblast("blastp", "pdb", sorted_proteins[-1])

In [116]:
blast = SearchIO.read(result, "blast-xml")

In [128]:
print(blast[:5])

Program: blastp (2.16.0+)
  Query: unnamed (2701)
         protein product
 Target: pdb
   Hits: ----  -----  ----------------------------------------------------------
            #  # HSP  ID + description
         ----  -----  ----------------------------------------------------------
            0      1  pdb|7D4F|A  Chain A, RNA-directed RNA polymerase [Sever...
            1      1  pdb|6YYT|A  Chain A, nsp12 [Severe acute respiratory sy...
            2      1  pdb|6XEZ|A  Chain A, RNA-directed RNA polymerase [Sever...
            3      1  pdb|7BW4|A  Chain A, RNA-directed RNA polymerase [Sever...
            4      1  pdb|6XQB|A  Chain A, RNA-directed RNA polymerase [Sever...


In [129]:
for record in blast[:5]:
    print(f"ID: {record.id}")
    print(f"Descrition: {record.description}")
    print(f"Evalue: {record[0].evalue}")
    print(f"Bit score: {record[0].bitscore}")
    print(f"Aligment: {record[0].aln}")
    print()

ID: pdb|7D4F|A
Descrition: Chain A, RNA-directed RNA polymerase [Severe acute respiratory syndrome coronavirus 2]
Evalue: 0.0
Bit score: 1938.7
Aligment: Alignment with 2 rows and 926 columns
FKRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKT...LQA unnamed
LNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKT...LQG pdb|7D4F|A

ID: pdb|6YYT|A
Descrition: Chain A, nsp12 [Severe acute respiratory syndrome coronavirus 2]
Evalue: 0.0
Bit score: 1938.31
Aligment: Alignment with 2 rows and 925 columns
FKRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKT...VLQ unnamed
LNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKT...VLQ pdb|6YYT|A

ID: pdb|6XEZ|A
Descrition: Chain A, RNA-directed RNA polymerase [Severe acute respiratory syndrome coronavirus 2]
Evalue: 0.0
Bit score: 1937.92
Aligment: Alignment with 2 rows and 925 columns
FKRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKT...VLQ unnamed
LNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKT...VLQ pdb|6XEZ|A

ID: pdb|7BW4|A
Descrition: Chain A, RNA-directed RNA polymerase [Severe acute 

In [109]:
!wget https://files.rcsb.org/download/7D4F.pdb

--2024-07-11 18:07:53--  https://files.rcsb.org/download/7CI3.pdb
Resolving files.rcsb.org (files.rcsb.org)... 128.6.159.157
Connecting to files.rcsb.org (files.rcsb.org)|128.6.159.157|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/octet-stream]
Saving to: ‘7CI3.pdb’

7CI3.pdb                [  <=>               ]  65.89K   229KB/s    in 0.3s    

2024-07-11 18:07:54 (229 KB/s) - ‘7CI3.pdb’ saved [67473]



In [110]:
from Bio.PDB import PDBParser

In [120]:
parser = PDBParser()
structure = parser.get_structure("7D4F", "7D4F.pdb")
structure

<Structure id=7D4F>

In [124]:
for chain in structure[0]:
    print(f"chain id: {chain.id}")

chain id: B
chain id: C
chain id: G
chain id: A


In [125]:
import nglview as nv

In [126]:
nv.show_biopython(structure, gui=True) 

NGLWidget()

Tab(children=(Box(children=(Box(children=(Box(children=(Label(value='step'), IntSlider(value=1, min=-100)), la…