In [35]:
import mdtraj as md
import tempfile
import itertools
import numpy as np

output_dir = '../../data/241109_INFconstruct/output/Z1-B50W/241111'
filename = 'Z1-B50W'

# go through all lines of the pdb until you find TER of chain B and then remove all lines until you find the next chain A TER and add END
with open(f'{output_dir}/{filename}_solvated.pdb', 'r') as file:
    lines = file.readlines()


with tempfile.NamedTemporaryFile(delete=False, suffix='.pdb') as temp_file:
    for line in lines:
        temp_file.write(line.encode())
    temp_file.write(b'END\n')
    temp_filename = temp_file.name

traj = md.load(temp_filename)
protein = traj.topology.select('chainid 0 and name CA')

# binder indices
# binding site indices - on the INF: 55-135 (wide) or 80-100; but we might actually not care about this

# effectively split the protein into two parts: binder and INF
# in total, this has 241 residues
# 50-aa binder - 10aa linker - 10aa protease - 10aa linker - 161 INF-beta part (that was sent from MIT)
# this equals 241 residues, which is correct

binder_length = 50
linker1_length = 10
protease_length = 10
linker2_length = 10
inf_length = 161

binder_residue_indices = range(1, binder_length + 1)
linker1_residue_indices = range(binder_length + 1, binder_length + linker1_length + 1)
protease_residue_indices = range(binder_length + linker1_length + 1, binder_length + linker1_length + protease_length + 1)
linker2_residue_indices = range(binder_length + linker1_length + protease_length + 1, binder_length + linker1_length + protease_length + linker2_length + 1)
inf_residue_indices = range(binder_length + linker1_length + protease_length + linker2_length + 1, binder_length + linker1_length + protease_length + linker2_length + inf_length + 1)

assert len(protein) == binder_length + linker1_length + protease_length + linker2_length + inf_length, f"Protein length does not match expected length. Expected {binder_length + linker1_length + protease_length + linker2_length + inf_length}, got {len(protein)}"

binder = protein[:binder_length]
inf = protein[binder_length + linker1_length + protease_length + linker2_length:]

all_CA_atom_indices = traj.topology.select('chainid 0 and name CA')
binder_CA_atom_indices = all_CA_atom_indices[:binder_length]
inf_CA_atom_indices = all_CA_atom_indices[binder_length + linker1_length + protease_length + linker2_length:]



binder_CA_labels = ','.join([f"@CA-A_{i}" for i in binder_residue_indices])
inf_CA_labels = ','.join([f"@CA-A_{i}" for i in inf_residue_indices])

print(binder_CA_labels)
print(inf_CA_labels)

cutoff = 0.8
atom_pairs = list(itertools.product(binder_CA_atom_indices, inf_CA_atom_indices))
distances = md.compute_distances(traj, atom_pairs)
contact_atom_indices = [atom_pairs[i] for i in np.where(distances < cutoff)[1]] 
print(f"{contact_atom_indices=}")

import pydantic
class Residue(pydantic.BaseModel):
    index: int
    chain_id: str

class Contact(pydantic.BaseModel):
    residue1: Residue
    residue2: Residue

contacts = []
for i, j in contact_atom_indices:
    binder_residue_index = traj.topology.atom(i).residue.index + 1
    binder_residue_chain_id = traj.topology.atom(i).residue.chain.chain_id
    inf_residue_index = traj.topology.atom(j).residue.index + 1
    inf_residue_chain_id = traj.topology.atom(j).residue.chain.chain_id
    residue1 = Residue(
        index=binder_residue_index,
        chain_id=binder_residue_chain_id
    )
    residue2 = Residue(
        index=inf_residue_index,
        chain_id=inf_residue_chain_id
    )
    contact.append(
        Contact(
            residue1=residue1,
            residue2=residue2
        )
    )
contacts

@CA-A_1,@CA-A_2,@CA-A_3,@CA-A_4,@CA-A_5,@CA-A_6,@CA-A_7,@CA-A_8,@CA-A_9,@CA-A_10,@CA-A_11,@CA-A_12,@CA-A_13,@CA-A_14,@CA-A_15,@CA-A_16,@CA-A_17,@CA-A_18,@CA-A_19,@CA-A_20,@CA-A_21,@CA-A_22,@CA-A_23,@CA-A_24,@CA-A_25,@CA-A_26,@CA-A_27,@CA-A_28,@CA-A_29,@CA-A_30,@CA-A_31,@CA-A_32,@CA-A_33,@CA-A_34,@CA-A_35,@CA-A_36,@CA-A_37,@CA-A_38,@CA-A_39,@CA-A_40,@CA-A_41,@CA-A_42,@CA-A_43,@CA-A_44,@CA-A_45,@CA-A_46,@CA-A_47,@CA-A_48,@CA-A_49,@CA-A_50
@CA-A_81,@CA-A_82,@CA-A_83,@CA-A_84,@CA-A_85,@CA-A_86,@CA-A_87,@CA-A_88,@CA-A_89,@CA-A_90,@CA-A_91,@CA-A_92,@CA-A_93,@CA-A_94,@CA-A_95,@CA-A_96,@CA-A_97,@CA-A_98,@CA-A_99,@CA-A_100,@CA-A_101,@CA-A_102,@CA-A_103,@CA-A_104,@CA-A_105,@CA-A_106,@CA-A_107,@CA-A_108,@CA-A_109,@CA-A_110,@CA-A_111,@CA-A_112,@CA-A_113,@CA-A_114,@CA-A_115,@CA-A_116,@CA-A_117,@CA-A_118,@CA-A_119,@CA-A_120,@CA-A_121,@CA-A_122,@CA-A_123,@CA-A_124,@CA-A_125,@CA-A_126,@CA-A_127,@CA-A_128,@CA-A_129,@CA-A_130,@CA-A_131,@CA-A_132,@CA-A_133,@CA-A_134,@CA-A_135,@CA-A_136,@CA-A_137,@CA-A_13

ModuleNotFoundError: No module named 'pydantic'

In [1]:
import mdtraj as md
from src.plumed.cv import remove_non_protein_elements
output_dir = '../../data/241109_INFconstruct/output/Z1-B50W/241114'
filename = 'Z1-B50W'
traj = md.load(f'{output_dir}/{filename}_solvated.pdb')
traj = remove_non_protein_elements(traj)
print(traj.topology.atom(1).residue.chain.chain_id)


In [11]:
output_dir = '../../data/241010_FoldingUponBinding/output/241110/A-synuclein/alpha_1'
filename = 'A-synuclein_alpha'
traj = md.load(f'{output_dir}/{filename}_solvated.pdb')
print(traj.topology.atom(1).residue.chain.chain_id)
traj = traj.atom_slice(traj.topology.select('protein'))
print(traj.topology.atom(1).residue.chain.chain_id)

A
None


In [19]:
for i in traj.topology.atoms:
    print(dir(i))


['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'element', 'index', 'is_backbone', 'is_sidechain', 'n_bonds', 'name', 'residue', 'segment_id', 'serial']
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'element', 'index', 'is_backbone', 'is_sidechain', 'n_bonds', 'name', 'residue', 'segment_id', 'serial']
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq

In [40]:
traj = md.load(f'{output_dir}/{filename}_solvated.pdb')
protein_indices = traj.topology.select('protein')
traj = traj.atom_slice(traj.topology.select('protein'))
traj


<mdtraj.Trajectory with 1 frames, 4046 atoms, 241 residues, and unitcells at 0x7f7be0936f00>

In [41]:
traj = md.load(f'{output_dir}/{filename}_fixed.pdb')
traj


<mdtraj.Trajectory with 1 frames, 4046 atoms, 241 residues, without unitcells at 0x7f7bd85933e0>

In [29]:
import MDAnalysis as mda
output_dir = '../../data/241010_FoldingUponBinding/output/241110/A-synuclein/alpha_1'
filename = 'A-synuclein_alpha'
universe = mda.Universe(f'{output_dir}/{filename}_solvated.pdb')
protein_atoms = universe.select_atoms('protein')
new_universe = mda.Merge(protein_atoms)
chain_ids = new_universe.segments.chainIDs
chain_ids



[array(['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
        'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'

In [27]:
dir(new_universe.residues[0].segment)

['_SETATTR_WHITELIST',
 '__add__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__radd__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_add_prop',
 '_del_prop',
 '_derived_class',
 '_is_group',
 '_ix',
 '_mix',
 '_subclass',
 '_u',
 'atoms',
 'bfactors',
 'get_connections',
 'ix',
 'ix_array',
 'level',
 'mass',
 'residues',
 'segid',
 'segindex',
 'universe']

In [30]:
import MDAnalysis as mda
output_dir = '../../data/241109_INFconstruct/output/Z1-B50W/241114'
filename = 'Z1-B50W'
universe = mda.Universe(f'{output_dir}/{filename}_solvated.pdb')
protein_atoms = universe.select_atoms('protein')
new_universe = mda.Merge(protein_atoms)
chain_ids = new_universe.segments.chainIDs
chain_ids

[array(['A', 'A', 'A', ..., 'A', 'A', 'A'], dtype=object)]