## Fetching PDB files from Protein DataBank

+ pdb file
+ cif file
+ fetching file
+ working with file

In [1]:
# fetching our 3D structure file
import biotite.database.rcsb as rcsb

In [2]:
dir(rcsb)

['BasicQuery',
 'CompositeQuery',
 'DepositGrouping',
 'FieldQuery',
 'Grouping',
 'IdentityGrouping',
 'MotifQuery',
 'Query',
 'SequenceQuery',
 'SingleQuery',
 'Sorting',
 'StructureQuery',
 'UniprotGrouping',
 '__author__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'count',
 'download',
 'fetch',
 'query',
 'search']

In [9]:
pdb_file = rcsb.fetch("4ZS6", "pdb","mypdb_dir")

In [10]:
print(pdb_file)

mypdb_dir\4ZS6.pdb


In [11]:
# fetching multiple files together
pdb_file = rcsb.fetch(["4ZS6","6LU7"], "pdb", "mypdb_dir")

In [12]:
print(pdb_file)

['mypdb_dir\\4ZS6.pdb', 'mypdb_dir\\6LU7.pdb']


#### Working with 3D Structure

+ pdb
+ cif
+ npz
+ xtc
+ mmt

In [13]:
# Reading 3d pdb files
import biotite.structure.io.pdb as pdb

In [14]:
dir(pdb)

['PDBFile',
 '__author__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'convert',
 'file',
 'get_assembly',
 'get_model_count',
 'get_structure',
 'get_symmetry_mates',
 'hybrid36',
 'list_assemblies',
 'set_structure']

In [15]:
# Reading 
file_reader = pdb.PDBFile()
file_reader.read("mypdb_dir/4ZS6.pdb")

In [16]:
file_reader

<biotite.structure.io.pdb.PDBFile at 0x28657257e50>

In [17]:
print(file_reader)

HEADER    IMMUNE SYSTEM                           13-MAY-15   4ZS6              
TITLE     RECEPTOR BINDING DOMAIN AND FAB COMPLEX                               
COMPND    MOL_ID: 1;                                                            
COMPND   2 MOLECULE: FAB LIGHT CHAIN;                                           
COMPND   3 CHAIN: L, D;                                                         
COMPND   4 ENGINEERED: YES;                                                     
COMPND   5 MOL_ID: 2;                                                           
COMPND   6 MOLECULE: FAB HEAVY CHAIN;                                           
COMPND   7 CHAIN: H, C;                                                         
COMPND   8 ENGINEERED: YES;                                                     
COMPND   9 MOL_ID: 3;                                                           
COMPND  10 MOLECULE: S PROTEIN;                                                 
COMPND  11 CHAIN: A, B;     

In [18]:
dir(file_reader)

['__abstractmethods__',
 '__class__',
 '__copy_create__',
 '__copy_fill__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_atom_line_i',
 '_deprecated_read',
 '_get_atom_record_indices_for_model',
 '_get_bonds',
 '_get_model_length',
 '_index_models_and_atoms',
 '_model_start_i',
 '_set_bonds',
 'copy',
 'get_assembly',
 'get_b_factor',
 'get_coord',
 'get_model_count',
 'get_remark',
 'get_structure',
 'get_symmetry_mates',
 'lines',
 'list_assemblies',
 'read',
 'read_iter',
 'set_structure',
 'write',
 'write_iter']

In [19]:
protein_structure = file_reader.get_structure()

In [21]:
print(protein_structure)

Model 1
    L       1  ALA N      N       -18.110   24.359  641.863
    L       1  ALA CA     C       -17.678   23.037  641.408
    L       1  ALA C      C       -16.857   22.298  642.484
    L       1  ALA O      O       -16.397   22.914  643.454
    L       1  ALA CB     C       -16.869   23.156  640.091
    L       2  ILE N      N       -16.718   20.979  642.335
    L       2  ILE CA     C       -15.815   20.205  643.188
    L       2  ILE C      C       -14.421   20.285  642.610
    L       2  ILE O      O       -14.262   19.993  641.438
    L       2  ILE CB     C       -16.229   18.710  643.282
    L       2  ILE CG1    C       -17.509   18.537  644.117
    L       2  ILE CG2    C       -15.076   17.892  643.873
    L       2  ILE CD1    C       -17.949   17.121  644.276
    L       3  ARG N      N       -13.416   20.645  643.412
    L       3  ARG CA     C       -12.055   20.807  642.878
    L       3  ARG C      C       -11.141   19.631  643.136
    L       3  ARG O      O     

In [23]:
# Shape and Coord
print("Shape",protein_structure.shape)
print("Coordinate",protein_structure.coord)

Shape (1, 9842)
Coordinate [[[-18.11   24.359 641.863]
  [-17.678  23.037 641.408]
  [-16.857  22.298 642.484]
  ...
  [-28.943  39.391 574.093]
  [-31.744  37.804 575.741]
  [-26.276  43.09  573.911]]]
