In [4]:
import sys
from pathlib import Path

sys.path.insert(0, str(Path('../').resolve()))

from protein_design_tools.io.pdb import fetch_pdb
from protein_design_tools.utils.analysis import find_overlapping_residues

# Get PDB ID 6P60, chains A, B, C
protein1 = fetch_pdb('6p60', chains=['A','B','C'])
# Get PDB ID 6X78, chains H, L, I
protein2 = fetch_pdb('6x78', chains=['H','L','I'])

print(f"Initial shape of protein1: {protein1.get_coordinates().shape}")

# Remove water molecules
protein1.remove_water()
protein2.remove_water()

# Remove nonstandard residues (this will also remove water)
protein1.remove_non_standard_residues()
protein2.remove_non_standard_residues()

print(protein1.get_coordinates(chain='A').shape, protein2.get_coordinates(chain='H').shape)

# Suppose we want overlapping residues between chain A in protein1 and chain H in protein2
overlap_AB = find_overlapping_residues(
    protein1=protein1,
    chain_id1='A',
    protein2=protein2,
    chain_id2='H',
    match_res_names=False
)

print(len(overlap_AB))

Initial shape of protein1: (5272, 3)
(1663, 3) (1678, 3)
213


In [3]:
print("Overlap between 6P60 chain A and 6X78 chain H (by residue seq + i_code):")
for (res_seq, i_code, res_name) in overlap_AB:
    print(f"  Residue {res_seq}{i_code} => {res_name}")


protein1 = fetch_pdb('9d3d')
protein2 = fetch_pdb('9ekf')

protein1.get_coordinates()

# compute_rmsd_numpy(protein1.get_coordinates(), protein2.get_coordinates())

protein1.get_coordinates().shape, protein2.get_coordinates().shape

Overlap between 6P60 chain A and 6X78 chain H (by residue seq + i_code):
  Residue 2 => VAL
  Residue 3 => GLN
  Residue 4 => LEU
  Residue 5 => GLN
  Residue 6 => GLU
  Residue 7 => SER
  Residue 8 => GLY
  Residue 9 => PRO
  Residue 10 => GLY
  Residue 11 => LEU
  Residue 12 => VAL
  Residue 13 => LYS
  Residue 14 => PRO
  Residue 15 => SER
  Residue 16 => GLU
  Residue 17 => THR
  Residue 18 => LEU
  Residue 19 => SER
  Residue 20 => LEU
  Residue 21 => THR
  Residue 22 => CYS
  Residue 23 => ALA
  Residue 24 => VAL
  Residue 25 => THR
  Residue 26 => GLY
  Residue 27 => GLY
  Residue 28 => SER
  Residue 29 => ILE
  Residue 30 => SER
  Residue 31 => ASP
  Residue 32 => ALA
  Residue 33 => TYR
  Residue 34 => TYR
  Residue 35 => TRP
  Residue 36 => SER
  Residue 37 => TRP
  Residue 38 => ILE
  Residue 39 => ARG
  Residue 40 => GLN
  Residue 41 => PHE
  Residue 42 => PRO
  Residue 43 => GLY
  Residue 44 => LYS
  Residue 45 => ARG
  Residue 46 => LEU
  Residue 47 => GLU
  Residue 48 =>

((17243, 3), (17537, 3))

In [1]:
import sys
from pathlib import Path

sys.path.insert(0, str(Path('../').resolve()))

from protein_design_tools.io.cif import fetch_cif
from protein_design_tools.utils.analysis import find_overlapping_residues

# Get PDB ID 6P60, chains A, B, C
protein1 = fetch_cif('6p60', chains=['A','B','C'])
# Get PDB ID 6X78, chains H, L, I
protein2 = fetch_cif('6x78', chains=['H','L','I'])

print(f"Initial shape of protein1: {protein1.get_coordinates().shape}")

# Remove water molecules
protein1.remove_water()
protein2.remove_water()

# Remove nonstandard residues (this will also remove water)
protein1.remove_non_standard_residues()
protein2.remove_non_standard_residues()

print(protein1.get_coordinates(chain='A').shape, protein2.get_coordinates(chain='H').shape)

# Suppose we want overlapping residues between chain A in protein1 and chain H in protein2
overlap_AB = find_overlapping_residues(
    protein1=protein1,
    chain_id1='A',
    protein2=protein2,
    chain_id2='H',
    match_res_names=False
)

print(len(overlap_AB))

Initial shape of protein1: (5323, 3)
(1663, 3) (1678, 3)
213


In [2]:
import sys
from pathlib import Path

sys.path.insert(0, str(Path('../').resolve()))

from protein_design_tools.io.pdb import fetch_pdb
from protein_design_tools.utils.analysis import find_overlapping_residues

# Get PDB ID 6P60, chains A, B, C
protein1 = fetch_pdb('6p60', chains=['A','B','C'])
# Get PDB ID 6X78, chains H, L, I
protein2 = fetch_pdb('6x78', chains=['H','L','I'])

print(f"Initial shape of protein1: {protein1.get_coordinates().shape}")

# Remove water molecules
protein1.remove_water()
protein2.remove_water()

# Remove nonstandard residues (this will also remove water)
protein1.remove_non_standard_residues()
protein2.remove_non_standard_residues()

print(protein1.get_coordinates(chain='A').shape, protein2.get_coordinates(chain='H').shape)

# Suppose we want overlapping residues between chain A in protein1 and chain H in protein2
overlap_AB = find_overlapping_residues(
    protein1=protein1,
    chain_id1='A',
    protein2=protein2,
    chain_id2='H',
    match_res_names=False
)

print(len(overlap_AB))

Initial shape of protein1: (5272, 3)
(1663, 3) (1678, 3)
213


In [5]:
import sys
from pathlib import Path

sys.path.insert(0, str(Path('../').resolve()))

from protein_design_tools.io.cif import fetch_cif
p2a = fetch_cif("6x78", chains=["H", "L", "I"])

print([c.name for c in p2a.chains])          # ['H', 'L', 'I']
print(p2a.get_coordinates(chain="H").shape)  # (1678, 3)  ← matches PDB case


from protein_design_tools.io.cif import fetch_cif
p2b = fetch_cif("6x78")

print([c.name for c in p2b.chains])          # ['A', 'B', 'H', 'L', 'G', 'I']
print(p2b.get_coordinates().shape)  

['H', 'L', 'I', 'A', 'B']
(1806, 3)
['A', 'B', 'H', 'L', 'G', 'I']
(7264, 3)


In [6]:
from protein_design_tools.io.cif import fetch_cif
p = fetch_cif("6x78", chains=["H","L","I"])
assert [c.name for c in p.chains] == ["H","L","I"]
assert p.get_coordinates(chain="H").shape == (1678, 3)   # when hydrogens skipped
print("✓ chain de-dup & hydrogen filter OK")

AssertionError: 