In [6]:
from prody import *
from pylab import *
import pickle
import sys, os
# Añadimos la ruta del directorio raíz para tener acceso a los archivos y módulos
sys.path.append(r'..')

In [2]:
# Nombre de la proteina de interes
prot_name = 'fxa'
# Secuencia P28482 (ERK2_HUMAN)
uniprot_id = "P00742"

#### According to UNIPROT the reference structure 1EZQ goes from the position 234 to 488.

In [180]:
# Secuencia de la CDK2 de UniProt
import requests
from Bio import SeqIO

# Descargamos el fasta de UniProt
url_fasta = requests.get("https://www.uniprot.org/uniprot/" + uniprot_id + ".fasta")
file_name_fasta = '../data/' + uniprot_id + '.fasta'
open(file_name_fasta, 'wb').write(url_fasta.content)
# Leemos la secuenciade aminoácidos
fasta_prot = SeqIO.read(open(file_name_fasta),'fasta')
seq_prot = str(fasta_prot.seq)
print(seq_prot)
print(F'\nSequence length: {len(seq_prot)} aa.')

MGRPLHLVLLSASLAGLLLLGESLFIRREQANNILARVTRANSFLEEMKKGHLERECMEETCSYEEAREVFEDSDKTNEFWNKYKDGDQCETSPCQNQGKCKDGLGEYTCTCLEGFEGKNCELFTRKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYPCGKQTLERRKRSVAQATSSSGEAPDSITWKPYDAADLDPTENPFDLLDFNQTQPERGDNNLTRIVGGQECKDGECPWQALLINEENEGFCGGTILSEFYILTAAHCLYQAKRFKVRVGDRNTEQEEGGEAVHEVEVVIKHNRFTKETYDFDIAVLRLKTPITFRMNVAPACLPERDWAESTLMTQKTGIVSGFGRTHEKGRQSTRLKMLEVPYVDRNSCKLSSSFIITQNMFCAGYDTKQEDACQGDSGGPHVTRFKDTYFVTGIVSWGEGCARKGKYGIYTKVTAFLKWIDRSMKTRGLPKAKSHAPEVITSSPLK

Sequence length: 488 aa.


#### Blas would be ommited do to it is not finding the correct hits due to the 

In [185]:
ref_struct = parsePDB('1ezq', folder = DIR_OUT_RAW_PDBS).select('protein and chain A')

@> PDB file is found in working directory (../../FILES/.../1ezq.pdb.gz).
@> 2371 atoms and 1 coordinate set(s) were parsed in 0.03s.


In [196]:
# Hacemos un blast

blast_file = F'../data/{prot_name}_blast_record.pkl'
# 
# Save the file
if os.path.isfile(blast_file):
    blast_record = pickle.load(open(blast_file, 'rb'))
else:
    # Save
    blast_record = blastPDB(ref_struct, hitlist_size = 600)
    pickle.dump(blast_record, open(blast_file, 'wb'))

@> Blast searching NCBI PDB database for "IVGGQ..." 
@> Blast search completed in 29.8s.                 


In [198]:
identity  = 'OMITED'
pdbids_blast = blast_record.getHits(chain = True) 
# For now on we ommit the identity cutoff soue to now we're getting the pdb id list directly from UNIPROT

print(F'{len(pdbids_blast.keys())} hits')

best_id = blast_record.getBest()['pdb_id']


442 hits


#### PDB Ids are scrapped from UNIPROT

In [None]:
# Also try with
# pypdb.get_seq_cluster('1ezq.A')['pdbChain']

# and pypdb.get_blast(pdb_id, chain_id='A')

In [268]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

def pdb_ids_from_uniprot(uniprot_id, time_sleep = 2):
    r = requests.get('https://www.uniprot.org/uniprot/' + uniprot_id)
    soup = BeautifulSoup(r.content)
    pdb_tags = soup.find_all('a', {'class': 'pdb'})
    
    # Exclusive for FXa protein given its structures not match the whole sequence
    
    pdb_chains = []
    for tag in pdb_tags:
        # if a model, skip
        method_ = tag.find_next('td').text
        if method_ == 'model':
            continue
        pdb_id = tag.text
        chain = tag.find_next('td').find_next('td').find_next('td').text[0]
        seq_tag = tag.find_next('td').find_next('td').find_next('td').find_next('td')
        seq_text = seq_tag.text
        edges = [int(n) for n in seq_text.split('-')]
        range_len = len(range(edges[0], edges[1]))
        # Determine which chain is the best given its sequence range
        if range_len < 200:
            seq_tag = tag.find_next('tr').find_next('td').find_next('td').find_next('td')
            seq_text = seq_tag.text
            # Try to find a new range
            try:
                edges = [int(n) for n in seq_text.split('-')]
                range_len = len(range(edges[0], edges[1]))
            except ValueError:
                print('Error:', pdb_id)
                continue
            if range_len < 200:
                continue
            # Update the chain
            chain = tag.find_next('tr').find_next('td').find_next('td').text[0]
        # If pass, add the pdbid and the chain
        pdb_chains.append((pdb_id.lower(), chain))
    
    time.sleep(time_sleep)
    # Return a dataframe
    result_df = pd.DataFrame(pdb_chains, columns = ['pdb_id', 'chain'])
    return(result_df)

In [270]:
# List of pdbis
num_pdb_ids = 144
pdbids_file = F'../data/{prot_name.upper()}_pdb_IDs_{uniprot_id}.csv'

if os.path.isfile(pdbids_file):
    pdbids_df = pd.read_csv(pdbids_file)
else:
    pdbids_df = pdb_ids_from_uniprot(uniprot_id)
    pdbids_df.to_csv(pdbids_file)

# Get a list
pdbids_list = pdbids_df['pdb_id'].tolist()
pdbids_chains = pdbids_df['chain'].tolist()
print(F'{len(pdbids_list)} pdb ids were found.')

136 pdb ids were found.


In [271]:
len(pdbids_chains)

136

### List of residues

In [272]:
ref_prot_id = pdbids_list[0] # Visually inspection using UCSF Quimera

print(F'PDBID used as reference structure: {ref_prot_id}')
cabezal = parsePDB(ref_prot_id, header=True, model=0)
estructura = parsePDB(ref_prot_id, folder = '../data')
sec_sctr = assignSecstr(cabezal, estructura.select('protein')).getSecstrs()
sec_sctr_HE = estructura.select(F'secondary H E and calpha and resnum 1:{len(seq_prot)}').getResnums()

@> PDB file is found in working directory (1c5m.pdb.gz).
@> PDB file is found in working directory (../data/1c5m.pdb.gz).
@> 5856 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> Secondary structures were assigned to 116 residues.


PDBID used as reference structure: 1c5m


In [265]:
sec_sctr_res_list = sec_sctr_HE.tolist()
sec_sctr_res_str = ' '.join(map(str, sec_sctr_res_list))
print(sec_sctr_res_str)
print(F'Un total de {len(sec_sctr_res_list)} residuos pertenecen a\
la estructura secundaria de la proteína {prot_name.upper()}.')

20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38
Un total de 101 residuos pertenecen ala estructura secundaria de la proteína FXA.


In [266]:
with open(F'../data/list_{prot_name}_secondary_structure_residues.txt', 'w') as f:
    f.write(sec_sctr_res_str)

### Fetching the structures

In [273]:
# Directorios
DIR_OUT_RAW_PDBS = F'../../FILES/CRYSTALS/PDB_{prot_name.upper()}_RAW_files/'
DIR_OUT_CHAINS = F'../../FILES/CRYSTALS/PROT_{prot_name.upper()}_CHAINS/'
DIR_OUT_LIGS = F'../../FILES/CRYSTALS/LIGS_{prot_name.upper()}/RAW/'

# Crea los directorios si no existen
import os
for directory in [DIR_OUT_RAW_PDBS, DIR_OUT_CHAINS, DIR_OUT_LIGS]:
    if not os.path.exists(directory):
        os.makedirs(directory)

In [274]:
# Estructura de referencia para alinear los PDBs, se usarán los CA de los residuos que pertenezcan a una región con estructura secundaria.
best_id = pdbids_list[0] # '1c5'
chain_best_id = pdbids_chains[0] #
ref_struct = parsePDB(best_id, folder = DIR_OUT_RAW_PDBS).select('protein and chain ' + 
                                                                 chain_best_id)

for pdb_id, chain_id in zip(pdbids_list, pdbids_chains):
    try:
        pdb_cry = parsePDB(pdb_id, folder = DIR_OUT_RAW_PDBS) # Obtiene del pdb la estructura y la guarda en el folder
        pdb_chain = pdb_cry.select('protein and chain ' + chain_id) # selecciona de la proteína a la cadena que el BLAST indicó
          # Realiza el alineamiento usando los CA de los residuos con estructura Secundaria
        pdb_alg = matchAlign(mobile = pdb_chain, target = ref_struct,
                             overlap=85,
                             tarsel='calpha  and resnum' + ' ' + sec_sctr_res_str) 
        # Retorna un tuple con la estructura alineada en el primer índice
        protein = pdb_alg[0]
    except Exception as e:
        print(e, "Error al alinear:", pdb_id)
        continue
    else:
        repr(protein)
        if protein: # Si hubo una cadena, la guarda
            protein.setChids("A") # fuerza que la cadena sea renombrada a "A", para homogenizar
            writePDB(DIR_OUT_CHAINS + pdb_id + '_A.pdb', protein) # Guarda el archivo
        # Comprueba si existen ligandos (moléculas no protéicas)
        ligand = pdb_cry.select('hetatm')
        repr(ligand)
        if ligand: # Si hubo un ligando, lo guarda
            writePDB(DIR_OUT_LIGS + pdb_id + '_lig.pdb', ligand)

@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 1c5m downloaded (../../FILES/.../1c5m.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 5856 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> PDB file is found in working directory (../../FILES/.../1c5m.pdb.gz).
@> 5856 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain D from 1c5m (len=241) and Chain D from 1c5m (len=241):
@> 	Match: 241 residues match with 100% sequence identity and 100% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 

@> Before alignment RMSD: 62.86
@> After alignment  RMSD: 0.41
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 1hcg downloaded (../../FILES/.../1hcg.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2449 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Checking AtomGroup 1hcg: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain A from 1hcg (len=236) and Chain D from 1c5m (len=241):
@> 	Match: 236 residues match with 100% sequence identity and 98% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 

@> 1iqk downloaded (../../FILES/.../1iqk.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2274 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Checking AtomGroup 1iqk: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain A from 1iqk (len=235) and Chain D from 1c5m (len=241):
@> 	Match: 234 residues match with 99% sequence identity and 97% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 59.24
@> After alignment  RMSD: 0.48
@> Connecting wwPDB FTP server 

@> 2574 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Checking AtomGroup 1lqd: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain B from 1lqd (len=234) and Chain D from 1c5m (len=241):
@> 	Match: 232 residues match with 98% sequence identity and 96% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 63.39
@> After alignment  RMSD: 0.39
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 1mq5 downloaded (../../FILES/.../1mq5.pdb.gz)
@> PDB download via FTP completed (1 downloa

@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain A from 1v3x (len=233) and Chain D from 1c5m (len=241):
@> 	Match: 232 residues match with 99% sequence identity and 96% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 63.51
@> After alignment  RMSD: 0.38
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 1wu1 downloaded (../../FILES/.../1wu1.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2266 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> Checking AtomGroup 1wu1: 1 chains

@>   Comparing Chain B from 2bq6 (len=234) and Chain D from 1c5m (len=241):
@> 	Match: 232 residues match with 98% sequence identity and 96% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 64.68
@> After alignment  RMSD: 0.40
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 2bq7 downloaded (../../FILES/.../2bq7.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2561 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Checking AtomGroup 2bq7: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue num

@> 	Match: 233 residues match with 100% sequence identity and 97% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 48.08
@> After alignment  RMSD: 0.39
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 2g00 downloaded (../../FILES/.../2g00.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2277 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> Checking AtomGroup 2g00: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain A from 2g00 (len=234) and Chain D from

@>   Comparing Chain A from 2j94 (len=234) and Chain D from 1c5m (len=241):
@> 	Match: 232 residues match with 98% sequence identity and 96% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 63.19
@> After alignment  RMSD: 0.37
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 2j95 downloaded (../../FILES/.../2j95.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2430 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Checking AtomGroup 2j95: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue num

@> 	Match: 233 residues match with 100% sequence identity and 97% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 26.80
@> After alignment  RMSD: 0.42
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 2p95 downloaded (../../FILES/.../2p95.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2356 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Checking AtomGroup 2p95: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain A from 2p95 (len=234) and Chain D from

@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 63.58
@> After alignment  RMSD: 0.38
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 2vh0 downloaded (../../FILES/.../2vh0.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2494 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Checking AtomGroup 2vh0: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain A from 2vh0 (len=234) and Chain D from 1c5m (len=241):
@> 	Match: 232 residues match with 98% sequence identity a

@> Before alignment RMSD: 61.65
@> After alignment  RMSD: 0.36
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 2vwo downloaded (../../FILES/.../2vwo.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2416 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Checking AtomGroup 2vwo: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain A from 2vwo (len=234) and Chain D from 1c5m (len=241):
@> 	Match: 233 residues match with 98% sequence identity and 97% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 2

@> 2xbx downloaded (../../FILES/.../2xbx.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2454 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Checking AtomGroup 2xbx: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain A from 2xbx (len=234) and Chain D from 1c5m (len=241):
@> 	Match: 233 residues match with 99% sequence identity and 97% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 61.60
@> After alignment  RMSD: 0.35
@> Connecting wwPDB FTP server 

@> 2507 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Checking AtomGroup 2y7x: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain A from 2y7x (len=234) and Chain D from 1c5m (len=241):
@> 	Match: 232 residues match with 96% sequence identity and 96% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 63.83
@> After alignment  RMSD: 0.41
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 2y7z downloaded (../../FILES/.../2y7z.pdb.gz)
@> PDB download via FTP completed (1 downloa

@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain A from 3ffg (len=234) and Chain D from 1c5m (len=241):
@> 	Match: 233 residues match with 100% sequence identity and 97% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 75.39
@> After alignment  RMSD: 0.44
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 3hpt downloaded (../../FILES/.../3hpt.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 5631 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> Checking AtomGroup 3hpt: 1 chain

@>   Comparing Chain A from 3kqe (len=234) and Chain D from 1c5m (len=241):
@> 	Match: 233 residues match with 100% sequence identity and 97% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 75.38
@> After alignment  RMSD: 0.40
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 3liw downloaded (../../FILES/.../3liw.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2386 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Checking AtomGroup 3liw: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue nu

@> 	Match: 232 residues match with 98% sequence identity and 96% overlap.
@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 58.86
@> After alignment  RMSD: 0.39
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 4bti downloaded (../../FILES/.../4bti.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 5193 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> Checking AtomGroup 4bti: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain B from 4bti (len=234) and Chain D from 

@> Alignment is based on 95 atoms matching 'calpha  and resnum 20 21 30 31 32 33 34 35 39 40 41 42 43 44 45 46 51 52 53 54 64 65 66 67 68 81 82 83 85 86 87 88 89 90 104 105 106 107 108 135 136 137 138 139 140 156 157 158 159 160 161 164 165 166 167 168 169 170 198 199 200 201 202 203 206 207 208 209 210 211 212 213 214 215 227 228 229 230 234 235 236 237 238 239 240 241 242 12 13 14 15 18 19 20 21 27 28 29 36 37 38'.
@> Before alignment RMSD: 62.73
@> After alignment  RMSD: 0.43
@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 4y7b downloaded (../../FILES/.../4y7b.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 2598 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Checking AtomGroup 4y7b: 1 chains are identified
@> Checking AtomGroup 1c5m: 1 chains are identified
@> Trying to match chains based on residue numbers and names:
@>   Comparing Chain A from 4y7b (len=234) and Chain D from 1c5m (len=241):
@> 	Match: 232 residues match with 96% sequence identity a