In [1]:
from puresnet.sparse import get_data,get_coordinates_features,get_HET_ATOM
from puresnet.dataset import SparseDataset,get_trainVal_loder,custom_collation_fn
import MinkowskiEngine as ME
from torch.utils.data import DataLoader



# Parsing pdb file to create sparse tensor    

In [2]:
prot_dic,prot_chains=get_data(pdb_file='7W15',rscb=True) # pdb_path can be provided but rscb should be False

In [3]:
sparse_coord,sparse_feature,information=get_coordinates_features(vertex_dict=prot_dic,chain=prot_chains)

### sparse_coord is coordinate of the atoms, sparse_feature is features of each atoms, information is raw information of atoms as in pdb file. 

In [4]:
sparse_coord,sparse_feature,information 

(tensor([[ 0, 16, 35],
         [ 1, 14, 34],
         [ 1, 15, 36],
         ...,
         [95, 48, 37],
         [95, 50, 51],
         [96, 45, 37]]),
 tensor([[0., 1., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0.,  ..., 1., 1., 0.],
         [0., 0., 0.,  ..., 1., 1., 0.]], dtype=torch.float64),
 array([['4717', 'CD1', 'C', ..., '-51.611', '-33.711', '28.831'],
        ['4716', 'CG2', 'C', ..., '-50.239', '-36.05', '27.449'],
        ['4715', 'CG1', 'C', ..., '-50.216', '-34.317', '29.152'],
        ...,
        ['1323', 'O', 'O', ..., '43.325', '-1.919', '30.697'],
        ['1369', 'OE2', 'O', ..., '42.969', '0.789', '45.024'],
        ['1325', 'OG', 'O', ..., '44.091', '-5.041', '30.497']],
       dtype='<U32'))

In [5]:
ligand_dict=get_HET_ATOM(pdb_file='7W15',ligand_name='GTP',rscb=True)

### Ligand_dict is the coordinates of ligand present in PDB

In [6]:
ligand_dict 

{'402:B:GTP': [[18.926, 1.025, 23.536],
  [18.059, 0.816, 24.748],
  [19.499, -0.258, 23.004],
  [19.976, 2.08, 23.749],
  [17.948, 1.58, 22.378],
  [17.96, 1.379, 20.79],
  [19.049, 2.215, 20.216],
  [17.919, -0.078, 20.494],
  [16.555, 2.025, 20.378],
  [15.047, 1.634, 20.752],
  [14.571, 0.578, 19.813],
  [14.98, 1.357, 22.214],
  [14.278, 3.001, 20.434],
  [14.668, 4.188, 21.161],
  [13.452, 4.799, 21.811],
  [12.415, 4.99, 20.819],
  [12.766, 3.964, 22.895],
  [13.42, 4.09, 24.15],
  [11.374, 4.599, 22.913],
  [11.327, 5.774, 23.691],
  [11.148, 4.912, 21.435],
  [10.35, 3.908, 20.746],
  [10.808, 2.831, 20.03],
  [9.844, 2.102, 19.523],
  [8.68, 2.735, 19.937],
  [7.324, 2.402, 19.694],
  [6.874, 1.45, 19.044],
  [6.459, 3.315, 20.297],
  [6.847, 4.405, 21.035],
  [5.869, 5.172, 21.535],
  [8.12, 4.723, 21.266],
  [8.978, 3.848, 20.69]],
 '402:A:GTP': [[-23.644, -28.07, 18.967],
  [-24.063, -26.794, 18.292],
  [-24.718, -29.124, 18.931],
  [-23.099, -27.847, 20.352],
  [-22.42, -

# Creating Custom SparseDataset
## Dataset Directory structure

<style>
.folder {
  display: flex;
  align-items: center;
  margin-left: 15px;
}

.file {
  display: flex;
  align-items: center;
  margin-left: 30px;
}

.icon {
  margin-right: 5px;
}
</style>

<div class="folder">
  <span class="icon">📂</span>sparse
  <div class="folder" style="margin-top: 5px; margin-left: 15px;">
    <span class="icon">📂</span>PDBID
    <div class="file" style="margin-top: 5px; margin-left: 25px;">
      <span class="icon">📄</span>coords.pt
    </div>
    <div class="file" style="margin-top: 5px; margin-left: 25px;">
      <span class="icon">📄</span>feat.pt
    </div>
    <div class="file" style="margin-top: 5px; margin-left: 25px;">
      <span class="icon">📄</span>label.pt
    </div>
  </div>
</div>


In [7]:
dataset=SparseDataset(path='sparse/')

### You can write own custom_collation_fn according to requirement. Refer MinkowskiEngine Documentation (https://nvidia.github.io/MinkowskiEngine/overview.html)

In [8]:
data_loder=DataLoader(dataset=dataset,batch_size=1,collate_fn=custom_collation_fn)

In [9]:
for x in data_loder:
    coord,feature,label=x
    inputs = ME.SparseTensor(feature, coordinates=coord)
    print(inputs)
    break

SparseTensor(
  coordinates=tensor([[  0, -84, -38,  36],
        [  0, -83, -39,  36],
        [  0, -83, -30,  26],
        ...,
        [  0,   0, -29,  56],
        [  0,   0, -21,  59],
        [  0,   0, -20,  58]], dtype=torch.int32)
  features=tensor([[0., 0., 0.,  ..., 1., 1., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 1., 1., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 1., 1., 0.]])
  coordinate_map_key=coordinate map key:[1, 1, 1]
  coordinate_manager=CoordinateMapManagerCPU(
	[1, 1, 1]:	CoordinateMapCPU:8324x4
	algorithm=MinkowskiAlgorithm.DEFAULT
  )
  spatial dimension=3)
