# 0. Download a structural ensemble from RCSB PDB

In [1]:
import numpy as np
from PDBClean import pdbclean_io, pdbutils

## Let's first organize the project
We will store everything under the directory PROJDIR, that we need to create if it does not exists already.

In [4]:
PROJDIR='../examples/AKproject'
pdbclean_io.check_project(projdir=PROJDIR)

../examples/AKproject already exists, with content:
['info.txt', 'raw_bank', 'simple_bank', 'clean_bank', 'standard_MolID_bank']


## Retrieve reference sequence(s) from keyword

In [5]:
ref_sequences, ref_metadata = pdbutils.retrieve_sequence_from_PDB('adenylate kinase', mode='metadata', update=False, seqfile='../data/pdb_seqres.txt')

In [6]:
print('{0} sequences were identified as potential hits! \n'.format(len(ref_sequences)))
for iseq in np.arange(len(ref_sequences)):
    print('{0} {1}'.format(ref_metadata[iseq], ref_sequences[iseq]))

18 sequences were identified as potential hits! 

>1kht_A mol:protein length:192  adenylate kinase
 MKNKVVVVTGVPGVGSTTSSQLAMDNLRKEGVNYKMVSFGSVMFEVAKEENLVSDRDQMRKMDPETQKRIQKMAGRKIAEMAKESPVAVDTHSTVSTPKGYLPGLPSWVLNELNPDLIIVVETTGDEILMRRMSDETRVRDLDTASTIEQHQFMNRCAAMSYGVLTGATVKIVQNRNGLLDQAVEELTNVLR

>1kht_B mol:protein length:192  adenylate kinase
 MKNKVVVVTGVPGVGSTTSSQLAMDNLRKEGVNYKMVSFGSVMFEVAKEENLVSDRDQMRKMDPETQKRIQKMAGRKIAEMAKESPVAVDTHSTVSTPKGYLPGLPSWVLNELNPDLIIVVETTGDEILMRRMSDETRVRDLDTASTIEQHQFMNRCAAMSYGVLTGATVKIVQNRNGLLDQAVEELTNVLR

>1kht_C mol:protein length:192  adenylate kinase
 MKNKVVVVTGVPGVGSTTSSQLAMDNLRKEGVNYKMVSFGSVMFEVAKEENLVSDRDQMRKMDPETQKRIQKMAGRKIAEMAKESPVAVDTHSTVSTPKGYLPGLPSWVLNELNPDLIIVVETTGDEILMRRMSDETRVRDLDTASTIEQHQFMNRCAAMSYGVLTGATVKIVQNRNGLLDQAVEELTNVLR

>1ki9_A mol:protein length:192  adenylate kinase
 MKNKLVVVTGVPGVGGTTITQKAMEKLSEEGINYKMVNFGTVMFEVAQEENLVEDRDQMRKLDPDTQKRIQKLAGRKIAEMVKESPVVVDTHSTIKTPKGYLPGLPVWVLNELNPDIIIVVETSGDEILIRRLNDETRNRDLETTAGIEEHQIMNRAAAMTYGVLTGA

## Retrieve all sequences that match the reference sequence(s)

In [7]:
sequences, metadata = pdbutils.retrieve_sequence_from_PDB(ref_sequences[0], mode='sequence', update=False, seqfile='../data/pdb_seqres.txt')
for seq in ref_sequences[1:]:
    newseq, newmet = pdbutils.retrieve_sequence_from_PDB(seq, mode='sequence', update=False, seqfile='../data/pdb_seqres.txt')
    sequences = np.append(sequences, newseq)
    metadata  = np.append(metadata, newmet)

In [8]:
print('{0} sequences were retrieved! \n'.format(len(sequences)))
for iseq in np.arange(len(sequences)):
    print('{0} {1}'.format(metadata[iseq], sequences[iseq]))

55 sequences were retrieved! 

>1kht_A mol:protein length:192  adenylate kinase
 MKNKVVVVTGVPGVGSTTSSQLAMDNLRKEGVNYKMVSFGSVMFEVAKEENLVSDRDQMRKMDPETQKRIQKMAGRKIAEMAKESPVAVDTHSTVSTPKGYLPGLPSWVLNELNPDLIIVVETTGDEILMRRMSDETRVRDLDTASTIEQHQFMNRCAAMSYGVLTGATVKIVQNRNGLLDQAVEELTNVLR

>1kht_B mol:protein length:192  adenylate kinase
 MKNKVVVVTGVPGVGSTTSSQLAMDNLRKEGVNYKMVSFGSVMFEVAKEENLVSDRDQMRKMDPETQKRIQKMAGRKIAEMAKESPVAVDTHSTVSTPKGYLPGLPSWVLNELNPDLIIVVETTGDEILMRRMSDETRVRDLDTASTIEQHQFMNRCAAMSYGVLTGATVKIVQNRNGLLDQAVEELTNVLR

>1kht_C mol:protein length:192  adenylate kinase
 MKNKVVVVTGVPGVGSTTSSQLAMDNLRKEGVNYKMVSFGSVMFEVAKEENLVSDRDQMRKMDPETQKRIQKMAGRKIAEMAKESPVAVDTHSTVSTPKGYLPGLPSWVLNELNPDLIIVVETTGDEILMRRMSDETRVRDLDTASTIEQHQFMNRCAAMSYGVLTGATVKIVQNRNGLLDQAVEELTNVLR

>1kht_A mol:protein length:192  adenylate kinase
 MKNKVVVVTGVPGVGSTTSSQLAMDNLRKEGVNYKMVSFGSVMFEVAKEENLVSDRDQMRKMDPETQKRIQKMAGRKIAEMAKESPVAVDTHSTVSTPKGYLPGLPSWVLNELNPDLIIVVETTGDEILMRRMSDETRVRDLDTASTIEQHQFMNRCAAMSYGVLTGATVKIVQNRNGLLDQAVEEL

## Download the corresponding PDB files
First we create the download directory if it does not exists already

In [9]:
pdbclean_io.check_project(projdir=PROJDIR, level='raw_bank')

../examples/AKproject/raw_bank already exists, with content:
['info.txt', '1kht.cif', '1ki9.cif', '2ar7.cif', '2bbw.cif', '5x6k.cif', '5x6l.cif', '5xru.cif', '5ycb.cif', '5ycc.cif', '6hf7.cif']


In [10]:
pdbutils.download_pdb_from_metadata(metadata, projdir=PROJDIR)

wrote ../examples/AKproject/raw_bank/1kht.cif from https://files.rcsb.org/download/1KHT.cif
wrote ../examples/AKproject/raw_bank/1ki9.cif from https://files.rcsb.org/download/1KI9.cif
wrote ../examples/AKproject/raw_bank/2ar7.cif from https://files.rcsb.org/download/2AR7.cif
wrote ../examples/AKproject/raw_bank/2bbw.cif from https://files.rcsb.org/download/2BBW.cif
wrote ../examples/AKproject/raw_bank/5x6k.cif from https://files.rcsb.org/download/5X6K.cif
wrote ../examples/AKproject/raw_bank/5x6l.cif from https://files.rcsb.org/download/5X6L.cif
wrote ../examples/AKproject/raw_bank/5xru.cif from https://files.rcsb.org/download/5XRU.cif
wrote ../examples/AKproject/raw_bank/5ycb.cif from https://files.rcsb.org/download/5YCB.cif
wrote ../examples/AKproject/raw_bank/5ycc.cif from https://files.rcsb.org/download/5YCC.cif
wrote ../examples/AKproject/raw_bank/6hf7.cif from https://files.rcsb.org/download/6HF7.cif
