In [94]:
from schnetpack.datasets import QM9

qm9data = QM9('../data/qm9.db', download=False)

In [2]:
import numpy as np
import torch

from dscribe.descriptors import SOAP
from ase.io import read
from ase import Atoms
from ase.build import molecule

In [3]:
'''
xyz format
- line 1: number of atoms n
- line 2: scalar properties
- line 3, ..., n+1: element type, coordinates xyz, Mulliken partial charges on atoms
'''
def parse_xyz(filename):
    num_atoms = 0
    scalar_properties = []
    atomic_symbols = []
    xyz = []
    charges = []
    smiles = ""
    with open(filename, 'r') as f:
        for line_num, line in enumerate(f):
            if line_num == 0:
                num_atoms = int(line)
            elif line_num == 1:
                scalar_properties = [float(i) for i in line.split()[2:]]
            elif 2 <= line_num <= 1 + num_atoms:
                atom_symbol, x, y, z, charge = line.split()
                atomic_symbols.append(atom_symbol)
                xyz.append([float(x), float(y), float(z)])
                charges.append(float(charge))
            elif line_num == num_atoms + 3:
                smiles = str(line.split())

    result = {
        'num_atoms': num_atoms,
        'atomic_symbols': atomic_symbols,
        'pos': torch.tensor(xyz),
        'charges': np.array(charges),
        'smiles': smiles
    }
    return result

In [9]:
soaps = []
num_files = 133885
temp = 10
for i in range(133880, num_files+1):
    
    # create molecule object
    file_name = "../../data/dsgdb9nsd.xyz/dsgdb9nsd_" + \
        str(i).zfill(6) + ".xyz"
    molecule = parse_xyz(file_name)
    molecule_obj = Atoms(symbols=molecule["atomic_symbols"], positions=molecule["pos"])
    
    # set up soap descriptor
    species = set()
    species.update(molecule_obj.get_chemical_symbols())
    
    soap = SOAP(
        species=species,
        periodic=False,
        rcut=5,
        nmax=8,
        lmax=8,
        average="outer",
        sparse=False
    )
    feature_vector = soap.create(molecule_obj)
    print(feature_vector)
    soaps.append(feature_vector)
    

[ 0.00583903  0.02317044  0.05105605 ...  0.0035384  -0.00360856
  0.00375531]
[ 0.00592888  0.02332346  0.05267758 ...  0.00400366 -0.00380279
  0.00371058]
[ 0.00607159  0.02390242  0.05401356 ...  0.00424634 -0.00399596
  0.00385569]
[ 0.00658821  0.0261824   0.0586933  ...  0.0064761  -0.00606609
  0.0058116 ]
[ 0.00647181  0.02587481  0.05691346 ...  0.00370507 -0.00344536
  0.00328494]
[ 0.005732    0.02269543  0.05071326 ...  0.00336161 -0.00354908
  0.00378139]


In [10]:
soaps

[array([ 0.00583903,  0.02317044,  0.05105605, ...,  0.0035384 ,
        -0.00360856,  0.00375531], dtype=float32),
 array([ 0.00592888,  0.02332346,  0.05267758, ...,  0.00400366,
        -0.00380279,  0.00371058], dtype=float32),
 array([ 0.00607159,  0.02390242,  0.05401356, ...,  0.00424634,
        -0.00399596,  0.00385569], dtype=float32),
 array([ 0.00658821,  0.0261824 ,  0.0586933 , ...,  0.0064761 ,
        -0.00606609,  0.0058116 ], dtype=float32),
 array([ 0.00647181,  0.02587481,  0.05691346, ...,  0.00370507,
        -0.00344536,  0.00328494], dtype=float32),
 array([ 0.005732  ,  0.02269543,  0.05071326, ...,  0.00336161,
        -0.00354908,  0.00378139], dtype=float32)]