In [1]:
from __future__ import print_function, absolute_import
import numpy as np
import os

In [2]:
class Molecule:
    coords = []
    charges = []
    elements = []
    filename = ""
    _dir_name = ""
    
    def __init__(self, file):
        self.filename = file
        self._read_file()
    
    def _read_file(self):
        with open(self.filename, 'r') as f:
            content = f.readlines()
        
        # Split lines for space character
        content = [s.split() for s in content]
        # Choose only those that start with "ATOM"
        content = [line for line in content if line[0]=="ATOM"]
        # Get the attributes
        self.coords = np.array([line[-7:-4] for line in content], dtype=np.float32)
        self.charges = np.array([line[-2] for line in content], dtype=np.float32)
        self.elements = np.array([line[-1] for line in content], dtype=object)

In [3]:
import glob

In [4]:
files = glob.glob("../../pdbbind_data/refined-set-2016/*/*_pocket.pdbqt")
print("Number of files {}".format(len(files)))

Number of files 4057


In [5]:
# Get a Molecule object
mol = Molecule(files[0])

In [6]:
print(mol.charges)

[ 0.241 -0.297  0.15  -0.273  0.149  0.241 -0.273 -0.295  0.15   0.176
  0.243 -0.273 -0.298  0.15   0.137  0.227 -0.286  0.028  0.002  0.
  0.     0.029  0.002  0.     0.     0.149  0.241 -0.297  0.15  -0.273
  0.242 -0.273 -0.272  0.094  0.024  0.031  0.152  0.156 -0.238  0.142
  0.14   0.367 -0.246 -0.246  0.027  0.002  0.002  0.     0.149  0.243
 -0.297  0.15  -0.273  0.176  0.238 -0.296  0.15  -0.274  0.145 -0.238
  0.142  0.026  0.002  0.     0.     0.241 -0.273 -0.295  0.15   0.176
  0.243 -0.298  0.15  -0.273  0.137  0.026  0.002  0.002  0.227 -0.286
  0.027  0.002  0.002  0.     0.178  0.24  -0.295  0.15  -0.274  0.169
 -0.236  0.142  0.243 -0.296  0.15  -0.273  0.163  0.229 -0.286  0.166
 -0.38   0.211  0.133 -0.239  0.142  0.224 -0.287  0.025  0.002  0.
  0.     0.244 -0.272  0.152  0.031  0.024  0.094 -0.273  0.241 -0.273
 -0.297  0.15   0.158  0.241 -0.273 -0.298  0.15   0.143  0.227 -0.286
  0.075  0.082  0.115 -0.221  0.199 -0.227  0.114  0.222 -0.275 -0.329
  0.145  0.1

In [7]:
print(mol.coords)

[[26.13  25.302  9.259]
 [26.992 25.415 10.271]
 [27.728 26.148 10.227]
 ...
 [28.048 17.962 10.491]
 [28.211 21.059 13.735]
 [31.981 27.383 13.796]]


In [8]:
print(mol.elements)

['C' 'N' 'HD' 'OA' 'C' 'C' 'OA' 'N' 'HD' 'C' 'C' 'OA' 'N' 'HD' 'C' 'C'
 'OA' 'C' 'C' 'C' 'C' 'C' 'C' 'C' 'C' 'C' 'C' 'N' 'HD' 'OA' 'C' 'OA' 'N'
 'C' 'C' 'C' 'C' 'C' 'NA' 'HD' 'C' 'C' 'OA' 'OA' 'C' 'C' 'C' 'C' 'C' 'C'
 'N' 'HD' 'OA' 'C' 'C' 'N' 'HD' 'OA' 'C' 'NA' 'HD' 'C' 'C' 'C' 'C' 'C'
 'OA' 'N' 'HD' 'C' 'C' 'N' 'HD' 'OA' 'C' 'C' 'C' 'C' 'C' 'OA' 'C' 'C' 'C'
 'C' 'C' 'C' 'N' 'HD' 'OA' 'C' 'NA' 'HD' 'C' 'N' 'HD' 'OA' 'C' 'C' 'OA'
 'C' 'OA' 'HD' 'C' 'NA' 'HD' 'C' 'OA' 'C' 'C' 'C' 'C' 'C' 'N' 'C' 'C' 'C'
 'C' 'OA' 'C' 'OA' 'N' 'HD' 'C' 'C' 'OA' 'N' 'HD' 'C' 'C' 'OA' 'C' 'A' 'A'
 'NA' 'A' 'NA' 'C' 'C' 'OA' 'N' 'HD' 'HD' 'C' 'C' 'N' 'HD' 'OA' 'C' 'NA'
 'HD' 'C' 'OA' 'HD' 'C' 'A' 'A' 'A' 'A' 'A' 'A' 'A' 'N' 'HD' 'A' 'C' 'C'
 'OA' 'N' 'C' 'C' 'C' 'C' 'C' 'OA' 'NA' 'HD' 'C' 'C' 'N' 'HD' 'HD' 'OA'
 'C' 'C' 'OA' 'N' 'HD' 'C' 'C' 'NA' 'HD' 'C' 'OA' 'C' 'A' 'A' 'A' 'A' 'A'
 'A' 'OA' 'HD' 'C' 'NA' 'HD' 'C' 'OA' 'C' 'OA' 'HD' 'N' 'HD' 'HD' 'C' 'C'
 'C' 'C' 'NA' 'C' 'C' 'C' 'OA' 'C' 'OA' 'P' 'OA' 'O

In [9]:
print(type(mol.coords), type(mol.charges), type(mol.elements))

<class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [10]:
print(mol.coords.shape, mol.charges.shape, mol.elements.shape)

(420, 3) (420,) (420,)
