# Get peptide molecule for training

In [1]:
import os

url = 'http://bioinfo.dcc.ufmg.br/propedia/public/pdb/structures/complex/3hpj_C_A.pdb'


!wget 'http://bioinfo.dcc.ufmg.br/propedia/public/pdb/structures/complex/3vfn_C_A.pdb'

try:
    os.mkdir('all_atom')
except FileExistsError:
    pass

!mv *.pdb all_atom/

--2022-07-28 08:46:28--  http://bioinfo.dcc.ufmg.br/propedia/public/pdb/structures/complex/3vfn_C_A.pdb
Resolving bioinfo.dcc.ufmg.br (bioinfo.dcc.ufmg.br)... 150.164.203.91
Connecting to bioinfo.dcc.ufmg.br (bioinfo.dcc.ufmg.br)|150.164.203.91|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 190512 (186K) [chemical/x-pdb]
Saving to: ‘3vfn_C_A.pdb’


2022-07-28 08:46:28 (458 KB/s) - ‘3vfn_C_A.pdb’ saved [190512/190512]



In [2]:
from torchmdexp.datasets.utils import CA_MAP
import numpy as np

def pdb2psf_CA(mol, bonds=True, angles=True, dihedrals=True):

    n = mol.numAtoms

    atom_types = []
    for i in range(n):
        atom_types.append(CA_MAP[(mol.resname[i], mol.name[i])])

    if bonds:
        bonds = np.concatenate(
            (
                np.arange(n - 1).reshape([n - 1, 1]),
                (np.arange(1, n).reshape([n - 1, 1])),
            ),
            axis=1,
        )
    else:
        bonds = np.empty([0, 2], dtype=np.int32)

    if angles:
        angles = np.concatenate(
            (
                np.arange(n - 2).reshape([n - 2, 1]),
                (np.arange(1, n - 1).reshape([n - 2, 1])),
                (np.arange(2, n).reshape([n - 2, 1])),
            ),
            axis=1,
        )
    else:
        angles = np.empty([0, 3], dtype=np.int32)

    if dihedrals:

        dihedrals = np.concatenate(
            (
                np.arange(n - 3).reshape([n - 3, 1]),
                (np.arange(1, n - 2).reshape([n - 3, 1])),
                (np.arange(2, n - 1).reshape([n - 3, 1])),
                (np.arange(3, n).reshape([n - 3, 1])),
            ),
            axis = 1,
        )
    else:
        dihedrals = np.empty([0, 4], dtype=np.int32)

    mol.atomtype = np.array(atom_types)
    mol.bonds = bonds
    mol.angles = angles
    mol.dihedrals = dihedrals

    return mol

In [3]:
from moleculekit.molecule import Molecule


try:
    os.makedirs('CA/levels/level_0/ground_truth')
except FileExistsError:
    pass

try:
    os.makedirs('CA/topologies')
except FileExistsError:
    pass

def listdir_nohidden(path):
    for f in os.listdir(path):
        if not f.startswith('.'):
            yield os.path.splitext(f)[0]

names = [name.lstrip() for name in listdir_nohidden('all_atom')]
name_paths = [os.path.join('all_atom', name + '.pdb') for name in names]


mols = [Molecule(os.path.join(name_path)) for name_path in name_paths]

for mol, name in zip(mols, names):
    mol.filter('name CA')
    mol.write(os.path.join('CA/levels/level_0/ground_truth', name + '.pdb'))
    
    topo = pdb2psf_CA(mol, angles=False)
    topo.write(os.path.join('CA/topologies', name + '.psf'))

2022-07-28 08:46:40,017 - moleculekit.molecule - INFO - Removed 2063 atoms. 289 atoms remaining in the molecule.
