In [6]:
import importlib

from monty.serialization import dumpfn, loadfn

from pymatgen import Element

import utils

In [2]:
DATASET_PATH = "/Users/ewcss/Documents/grad/publications/2021/li_dataset/data/libe.json"

In [3]:
data = loadfn(DATASET_PATH)

In [7]:
# First, look at gaps between singlets and triplets
small_singlets = list()
triplets = list()
for m in data:
    # Only molecules with < 50 electrons were calculated in triplet state
    if sum([Element(s).Z for s in m["species"]]) - m["charge"] >= 50:
        continue
    
    if m["spin_multiplicity"] == 1:
        small_singlets.append(m)
    elif m["spin_multiplicity"] == 3:
        triplets.append(m)

In [8]:
print("Singlets", len(small_singlets))
print("Triplets", len(triplets))

Singlets 2809
Triplets 2386


In [13]:
match = 0
pairs = list()
for triplet in triplets:
    for singlet in small_singlets:
        if triplet["molecule_graph"].isomorphic_to(singlet["molecule_graph"]) and triplet["charge"] == singlet["charge"]:
            match += 1
            pairs.append((singlet, triplet))
            break

In [14]:
print("Triplets with no matching singlet (defined by isomorphism and charge): {}".format(len(triplets) - match))

Triplets with no matching singlet (defined by isomorphism and charge): 447


In [16]:
singlet_lower = 0
for pair in pairs:
    if pair[0]["thermo"]["raw"]["electronic_energy_Ha"] < pair[1]["thermo"]["raw"]["electronic_energy_Ha"]:
        singlet_lower += 1

In [17]:
print("Singlet lower in {} out of {} cases ({} %)".format(singlet_lower, len(pairs), singlet_lower / len(pairs) * 100))

Singlet lower in 1708 out of 1939 cases (88.08664259927798 %)
