MoleculeSets (MSets) are objects which contain geometries of a single molecule. They can be saved and loaded in JSON format.

In [1]:
from tensorchem.dataset.molecule import MoleculeSet as MSet

In [2]:
mset = MSet()
mset.filename = "../data/chemspider_1.mset"
mset.load()

MSets contain a list of internal atom objects. A minimal atom object has an atomic number (at_num) property. MSets will generally have a set of minimal atom objects, and geometries will be built by filling in coordinates and properties of the atoms.

In [3]:
print(type(mset.atoms))
print(type(mset.atoms[0]))
print([atom.at_num for atom in mset.atoms])
mset.atoms[0].__dict__

<class 'tuple'>
<class 'tensorchem.dataset.molecule.Atom'>
[8, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


{'at_num': 8, 'xyz': (None, None, None), 'labels': {}}

MSets will typically contain a list of geometries. Each geometry is the same molecule, but should have a different coordinates and labels.

In [4]:
print(len(mset))
print(mset[0])
print(mset[0].labels)

218
30

O     3.2063876339     -0.6707270711     0.229857304
N     0.6483069702     0.5304208499     0.1283209082
C     -0.6592259395     0.8925946828     -0.5088617461
C     -1.7236068981     1.429474823     0.4916227865
C     -1.2268321478     -0.1807404342     -1.487619928
C     -2.591247975     0.3301385863     1.1548847261
C     -2.1384801617     -1.2602704236     -0.8462667096
C     -3.1905697266     -0.65350216     0.1167462092
C     0.8707975622     -0.6540844726     1.0077923788
C     1.8632786992     1.3733869179     -0.0839543744
C     2.0427067778     -1.4926322842     0.4414494014
C     3.0055823809     0.4773834018     -0.6164265033
H     -0.4097199463     1.7238423179     -1.1346934058
H     -2.3735651285     2.0953149129     -0.0366899589
H     -1.1850490203     1.9108679552     1.2809981994
H     -0.3981405359     -0.6796362103     -1.9450879664
H     -1.8467758131     0.3523385553     -2.1778346499
H     -3.3924519689     0.7987678788     1.6872093321
H     -1.9610303

Geometries themselves are also a list of atom objects, but in a geometry the atoms should have a set of coordinates.

In [5]:
geom = mset[0]
for atom in geom.atoms:
    print(atom.at_num, atom.xyz)

8 (3.2063876339, -0.6707270711, 0.229857304)
7 (0.6483069702, 0.5304208499, 0.1283209082)
6 (-0.6592259395, 0.8925946828, -0.5088617461)
6 (-1.7236068981, 1.429474823, 0.4916227865)
6 (-1.2268321478, -0.1807404342, -1.487619928)
6 (-2.591247975, 0.3301385863, 1.1548847261)
6 (-2.1384801617, -1.2602704236, -0.8462667096)
6 (-3.1905697266, -0.65350216, 0.1167462092)
6 (0.8707975622, -0.6540844726, 1.0077923788)
6 (1.8632786992, 1.3733869179, -0.0839543744)
6 (2.0427067778, -1.4926322842, 0.4414494014)
6 (3.0055823809, 0.4773834018, -0.6164265033)
1 (-0.4097199463, 1.7238423179, -1.1346934058)
1 (-2.3735651285, 2.0953149129, -0.0366899589)
1 (-1.1850490203, 1.9108679552, 1.2809981994)
1 (-0.3981405359, -0.6796362103, -1.9450879664)
1 (-1.8467758131, 0.3523385553, -2.1778346499)
1 (-3.3924519689, 0.7987678788, 1.6872093321)
1 (-1.9610303379, -0.23181813, 1.8120986889)
1 (-1.5258334807, -1.9448779191, -0.297747941)
1 (-2.6665606932, -1.7510951924, -1.6369127694)
1 (-3.9182730217, -0.1279957

In [6]:
mset2 = MSet()
mset2.filename = "../data/chemspider_2.mset"
mset2.load()

In [7]:
print(mset.chem_formula)
print(mset2.chem_formula)

{8: 1, 7: 1, 6: 10, 1: 18}
{6: 8, 1: 11, 8: 2}


In [9]:
print(mset.is_isomer(mset2))
print(mset.is_isomer(mset))

False
True
