In [43]:
from pathlib import Path
import Bio.PDB

In [44]:
pdb_file = Path("/Users/matthew/Documents/mtorc2/data/pdb/RICTOR.af.pdb")

In [45]:
pdb_parser = Bio.PDB.PDBParser(QUIET=True)
ref_model = pdb_parser.get_structure("reference", str(pdb_file))[0]

In [46]:
avg_plddts = list() 
for res in ref_model.get_residues():
    n_atoms = 0 
    avg_plddt = 0
    for atom in res.get_atoms(): 
        n_atoms = n_atoms + 1
        avg_plddt = avg_plddt + atom.bfactor
        
    avg_plddt = avg_plddt / n_atoms 
    avg_plddts.append(avg_plddt)
    

In [47]:
cutoff = 50
ordered = list() 

for plddt in avg_plddts: 
    if plddt > cutoff: 
        ordered.append(1)
    else: 
        ordered.append(0)
        

In [48]:
domains = dict() 
i = 0
while i < len(ordered): 
    if ordered[i]: 
        domain_start = i
        j = i+1
        while ordered[j] == 1: 
            j = j+1 
            
        domain_end = j 
        ## Residue numbers are offset by 1
        domains[len(domains.keys())] = (domain_start+1, domain_end)
        i = j+1 
    else: 
        i = i+1 

In [49]:
domains

{0: (24, 505),
 1: (521, 521),
 2: (523, 638),
 3: (642, 860),
 4: (863, 865),
 5: (871, 1007),
 6: (1331, 1332),
 7: (1334, 1335),
 8: (1338, 1338),
 9: (1421, 1442),
 10: (1513, 1531),
 11: (1603, 1697)}

In [50]:
coverage = 0 
for domain in domains.keys():
    start = domains[domain][0]
    end = domains[domain][1]
    length = end - start + 1 
    coverage = coverage + length
    
coverage = coverage / len(ordered)

In [51]:
coverage

0.6434426229508197

### Reconstruct the model identifying each ordered domain

In [52]:
chain_ids = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
chains = dict() 
for domain in domains.keys(): 
    chain_id = chain_ids[domain] 
    chains[chain_id] = Bio.PDB.Chain.Chain(chain_id)

In [53]:
chain_copy = Bio.PDB.Chain.Chain("A")

In [54]:
for res in ref_model.get_residues(): 
    ordered = False
    
    for domain in domains.keys():
        start = domains[domain][0]
        end = domains[domain][1]
        chain_id = chain_ids[domain]
        
        if res.id[1] >= start and res.id[1] <= end:    
            res_copy = res.copy() 
            chains[chain_id].add(res_copy)
            chain_copy.add(res_copy)

In [55]:
thresh_model = Bio.PDB.Model.Model("thresh")
thresh_model.add(chain_copy)

In [56]:
thresh_structure = Bio.PDB.Structure.Structure("thresh")
thresh_structure.add(thresh_model)

In [57]:
thresh_pdb_file = Path("/Users/matthew/Documents/mtorc2/data/pdb/RICTOR.thresh.2.pdb")
pdb_io = Bio.PDB.PDBIO(use_model_flag=1)
pdb_io.set_structure(thresh_structure)
pdb_io.save(str(thresh_pdb_file))

In [40]:
decomp_model = Bio.PDB.Model.Model("decomp")
for chain_id in chains.keys(): 
    decomp_model.add(chains[chain_id])

In [41]:
decomp_structure = Bio.PDB.Structure.Structure("decomp")
decomp_structure.add(decomp_model)

In [42]:
decomp_pdb_file = Path("/Users/matthew/Documents/mtorc2/data/pdb/RICTOR.decomp.pdb")
pdb_io = Bio.PDB.PDBIO(use_model_flag=1)
pdb_io.set_structure(decomp_structure)
pdb_io.save(str(decomp_pdb_file))