In [2]:
from Bio import Entrez, SeqIO, SearchIO

In [3]:
Entrez.email ="isadogan03@gmail.com"
hand = Entrez.efetch(db ="nucleotide" , id= "MN908947" , retmode="gb", rettype = "text")
cov_recs = list(SeqIO.parse(hand , "gb"))
hand.close()
cov_recs

[SeqRecord(seq=Seq('ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGT...AAA'), id='MN908947.3', name='MN908947', description='Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome', dbxrefs=[])]

In [4]:
cov_dna= cov_recs[0].seq
cov_dna

Seq('ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGT...AAA')

In [5]:
print(f"Genome consist of {len(cov_dna)} nuclotides.")

Genome consist of 29903 nuclotides.


In [6]:
cov_mrna = cov_dna.transcribe()
cov_mrna

Seq('AUUAAAGGUUUAUACCUUCCCAGGUAACAAACCAACCAACUUUCGAUCUCUUGU...AAA')

In [7]:
cov_aa = cov_mrna.translate()
cov_aa



Seq('IKGLYLPR*QTNQLSISCRSVL*TNFKICVAVTRLHA*CTHAV*LITNYCR*QD...KKK')

In [8]:
cov_proteins =  cov_aa.split("*")
cov_proteins[:10]

[Seq('IKGLYLPR'),
 Seq('QTNQLSISCRSVL'),
 Seq('TNFKICVAVTRLHA'),
 Seq('CTHAV'),
 Seq('LITNYCR'),
 Seq('QDTSNSSIFCRLLTVSSVLQPIISTSRFRPGVTER'),
 Seq('DGEPCPWFQRENTRPTQFACFTGSRRARTWLWRLRGGGLIRGTSTS'),
 Seq('RWHLWLSRS'),
 Seq('KRRFAST'),
 Seq('TALCVHQTFGCSNCTSWSCYG')]

In [9]:
sorted_proteins = sorted(cov_proteins, key=len)

In [10]:
sorted_proteins[-1]

Seq('CTIVFKRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQ...VNN')

In [11]:
from Bio.Blast import NCBIWWW

In [13]:
result = NCBIWWW.qblast("blastp", "pdb", sorted_proteins[-3])

In [14]:
blast = SearchIO.read(result, "blast-xml")

In [15]:
print(blast)

Program: blastp (2.16.0+)
  Query: unnamed (123)
         protein product
 Target: pdb
   Hits: ----  -----  ----------------------------------------------------------
            #  # HSP  ID + description
         ----  -----  ----------------------------------------------------------
            0      1  pdb|7CI3|A  Chain A, Orf7a protein [Severe acute respir...
            1      1  pdb|1XAK|A  Chain A, SARS ORF7A ACCESSORY PROTEIN [Seve...
            2      1  pdb|1YO4|A  Chain A, Hypothetical protein X4 [Severe ac...
            3      1  pdb|6W37|A  Chain A, ORF7a protein [Severe acute respir...


In [16]:
for record in blast:
    print(f"ID: {record.id}")
    print(f"Descrition: {record.description}")
    print(f"ID: {record[0].evalue}")
    print(f"ID: {record[0].bitscore}")
    print(f"ID: {record[0].aln}")
    print()

ID: pdb|7CI3|A
Descrition: Chain A, Orf7a protein [Severe acute respiratory syndrome coronavirus 2]
ID: 4.90126e-59
ID: 177.948
ID: Alignment with 2 rows and 83 columns
TCELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALT...QEL unnamed
TCELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALT...QEL pdb|7CI3|A

ID: pdb|1XAK|A
Descrition: Chain A, SARS ORF7A ACCESSORY PROTEIN [Severe acute respiratory syndrome coronavirus]
ID: 1.79052e-51
ID: 158.688
ID: Alignment with 2 rows and 82 columns
TCELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALT...VQE unnamed
SCELYHYQECVRGTTVILKEPCPSGTYEGNSPFHPLADNKFALT...VQQ pdb|1XAK|A

ID: pdb|1YO4|A
Descrition: Chain A, Hypothetical protein X4 [Severe acute respiratory syndrome-related coronavirus]
ID: 2.62563e-51
ID: 158.303
ID: Alignment with 2 rows and 84 columns
ELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALTCF...LYS unnamed
ELYHYQECVRGTTVLLKEPCPSGTYEGNSPFHPLADNKFALTCT...LYS pdb|1YO4|A

ID: pdb|6W37|A
Descrition: Chain A, ORF7a protein [Severe acute respiratory syndrome coronavirus 2

In [109]:
!wget https://files.rcsb.org/download/7CI3.pdb

--2024-07-11 18:07:53--  https://files.rcsb.org/download/7CI3.pdb
Resolving files.rcsb.org (files.rcsb.org)... 128.6.159.157
Connecting to files.rcsb.org (files.rcsb.org)|128.6.159.157|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/octet-stream]
Saving to: ‘7CI3.pdb’

7CI3.pdb                [  <=>               ]  65.89K   229KB/s    in 0.3s    

2024-07-11 18:07:54 (229 KB/s) - ‘7CI3.pdb’ saved [67473]



In [17]:
from Bio.PDB import PDBParser

In [18]:
parser = PDBParser()
structure = parser.get_structure("7CI3", "7CI3.pdb")
structure

<Structure id=7CI3>

In [19]:
for chain in structure[0]:
    print(f"chain id: {chain.id}")

chain id: A


In [20]:
import nglview as nv



In [21]:
nv.show_biopython(structure, gui=True) 

NGLWidget()

Tab(children=(Box(children=(Box(children=(Box(children=(Label(value='step'), IntSlider(value=1, min=-100)), la…