Following protocols in ProteoFAV example...

In [3]:
import os
from proteofav.structures import mmCIF, PDB

pdb_id = "6vw1"

# create tmp dir
out_dir = os.path.join(os.getcwd(), "proteofav_db")
os.makedirs(out_dir, exist_ok=True)

# output file names
out_mmcif = os.path.join(out_dir, "{}.cif".format(pdb_id))
out_mmcif_bio = os.path.join(out_dir, "{}_bio.cif".format(pdb_id))
out_pdb = os.path.join(out_dir, "{}.pdb".format(pdb_id))

# download structures
mmCIF.download(identifier=pdb_id, filename=out_mmcif)
mmCIF.download(identifier=pdb_id, filename=out_mmcif_bio, 
               bio_unit=True, bio_unit_preferred=True)
PDB.download(identifier=pdb_id, filename=out_pdb)

assert os.path.exists(out_mmcif)
assert os.path.exists(out_mmcif_bio)
assert os.path.exists(out_pdb)

In [4]:
mmcif = mmCIF.read(filename=out_mmcif)
print(mmcif.head())
print(mmcif.columns)

KeyError: "['Cartn_x_esd' 'Cartn_y_esd' 'Cartn_z_esd' 'occupancy_esd'\n 'B_iso_or_equiv_esd'] not found in axis"

In [5]:
mmcif_bio = mmCIF.read(filename=out_mmcif_bio)
print(mmcif_bio.head())
print(mmcif_bio.columns)

KeyError: "['Cartn_x_esd' 'Cartn_y_esd' 'Cartn_z_esd' 'occupancy_esd'\n 'B_iso_or_equiv_esd'] not found in axis"

In [9]:
pdb = PDB.read(filename=out_pdb)
print(pdb.head())
print(pdb.columns)

KeyError: "['Cartn_x_esd' 'Cartn_y_esd' 'Cartn_z_esd' 'occupancy_esd'\n 'B_iso_or_equiv_esd' 'pdbx_formal_charge'] not found in axis"

In [10]:
from proteofav.sifts import SIFTS

# output file names
out_sifts = os.path.join(out_dir, "{}.xml".format(pdb_id))

SIFTS.download(identifier=pdb_id, filename=out_sifts)

assert os.path.exists(out_sifts)

In [11]:
sifts = SIFTS.read(filename=out_sifts)
print(sifts.head())

KeyError: "['InterPro' 'GO' 'EC' 'NCBI'] not found in axis"

In [12]:
from proteofav.dssp import DSSP

# output file names
out_dssp = os.path.join(out_dir, "{}.dssp".format(pdb_id))

DSSP.download(identifier=pdb_id, filename=out_dssp)

# sometimes fecthing from the DSSP FTP server at ftp://ftp.cmbi.ru.nl/pub/molbio/data/dssp/ times out...
print(os.path.exists(out_dssp))

True


In [13]:
dssp = DSSP.read(filename=out_dssp)
print(dssp.head())

  RES RES_FULL INSCODE CHAIN AA SS  ACC    TCO  KAPPA  ALPHA    PHI    PSI
0  19       19             A  S     105  0.000  360.0  360.0  360.0  152.3
1  20       20             A  T      78 -0.454  360.0 -120.8  -81.0  156.9
2  21       21             A  I  H   43  0.907  116.5   51.8  -61.8  -39.8
3  22       22             A  E  H   26  0.904  110.2   47.8  -64.2  -40.3
4  23       23             A  E  H   97  0.901  110.4   51.3  -69.2  -40.7


In [14]:
from proteofav.validation import Validation

out_validation = os.path.join(out_dir, "{}_validation.xml".format(pdb_id))

Validation.download(identifier=pdb_id, filename=out_validation)

assert os.path.exists(out_validation)

In [15]:
validation = Validation.read(filename=out_validation)
print(validation.head())

  validation_flippable-sidechain validation_ent  validation_rscc  \
0                            NaN              1            0.857   
1                            NaN              1            0.893   
2                            NaN              1            0.925   
3                            NaN              1            0.923   
4                            NaN              1            0.973   

  validation_mogul_bonds_rmsz validation_altcode validation_num-H-reduce  \
0                         NaN                  .                       4   
1                         NaN                  .                       7   
2                         NaN                  .                      11   
3                         NaN                  .                       6   
4                         NaN                  .                       6   

  validation_rota  validation_phi  validation_avgoccu  validation_NatomsEDS  \
0               m             NaN                 1.0  

UniProt annotations

In [7]:
params = {
    'ace2_acc': 'Q9BYF1'
}

In [17]:
from proteofav.annotation import Annotation

out_annotation = os.path.join(out_dir, "{}.gff".format(params['ace2_acc']))

Annotation.download(identifier=params['ace2_acc'], filename=out_annotation)

assert os.path.exists(out_annotation)

In [18]:
annotation = Annotation.read(filename=out_annotation)
print(annotation.head())

     NAME     SOURCE                TYPE START  END SCORE STRAND FRAME  \
0  Q9BYF1  UniProtKB      Signal peptide     1   17     .      .     .   
1  Q9BYF1  UniProtKB               Chain    18  805     .      .     .   
2  Q9BYF1  UniProtKB               Chain    18  708     .      .     .   
3  Q9BYF1  UniProtKB  Topological domain    18  740     .      .     .   
4  Q9BYF1  UniProtKB       Transmembrane   741  761     .      .     .   

                                               GROUP  Ontology_term  \
0     Ontology_term=ECO:0000255;evidence=ECO:0000255  [ECO:0000255]   
1  ID=PRO_0000028570;Note=Angiotensin-converting ...            NaN   
2  ID=PRO_0000292268;Note=Processed angiotensin-c...            NaN   
3  Note=Extracellular;Ontology_term=ECO:0000255;e...  [ECO:0000255]   
4  Note=Helical;Ontology_term=ECO:0000255;evidenc...  [ECO:0000255]   

        evidence                ID  \
0  [ECO:0000255]               NaN   
1            NaN  [PRO_0000028570]   
2            N