# Calculating Computational Errors in Formation Energy

## Load Imports and Files

In [131]:
%run imports.py

Imports successfully loaded


#### Computed Data

In [2]:
all_computed_entries = loadfn('data/computed_entries.json')


No electronegativity for Ne. Setting to NaN. This has no physical meaning, and is mainly done to avoid errors caused by the code expecting a float.


No electronegativity for Ar. Setting to NaN. This has no physical meaning, and is mainly done to avoid errors caused by the code expecting a float.



In [92]:
binary_formulas_ids = loadfn('data/binary_formulas_ids.json')
element_formulas_ids = loadfn('data/element_formulas_ids.json')

#### Experimental Data

In [125]:
mp_expt_data = loadfn('data/MP_expt_energy_all.json')
mpcontribs_data = loadfn('data/2020-09-10_final_mpthermo_contribs.json')

## Calculate Computed Formation Energies

In [129]:
def get_rxn_energy_from_objects(compound_entry, element_entries_list):
    rxn = ComputedReaction(element_entries_list, [compound_entry])
    assert(len(rxn.products) == 1)
    product = rxn.products[0]
    if np.random.random_sample() < 0.05:
        print(str(rxn), '(product has {} atoms)'.format(int(product.num_atoms)))
    return rxn.calculated_reaction_energy / product.num_atoms

def get_formation_energy(f):
    ID = str(binary_formulas_ids[f])
    compound_entry = all_computed_entries[ID]
    elem_ids_list = [str(element_formulas_ids[str(e)]) for e in compound_entry.composition.elements]
    entry_list = list()
    for elemID in elem_ids_list:
        entry_list.append(all_computed_entries[elemID])
    return get_rxn_energy_from_objects(compound_entry, entry_list)

In [130]:
computed_formation_energies = {f:get_formation_energy(f) for f in binary_formulas_ids.keys()}

3 Li + 0.5 N2 -> Li3N (product has 4 atoms)
Be + 2 Br -> BeBr2 (product has 3 atoms)
Hf + 2 Cl2 -> HfCl4 (product has 5 atoms)
Bi + 3 Br -> BiBr3 (product has 4 atoms)
Hg + Br -> HgBr (product has 2 atoms)
4 Ni + 3 B -> Ni4B3 (product has 7 atoms)
2 K + S -> K2S (product has 3 atoms)
Na + Br -> NaBr (product has 2 atoms)
Nb + 0.5 N2 -> NbN (product has 2 atoms)
Si + 4 Br -> SiBr4 (product has 5 atoms)
Pd + 0.5 O2 -> PdO (product has 2 atoms)
Mn + As -> MnAs (product has 2 atoms)
Cs + 0.5 F2 -> CsF (product has 2 atoms)
Mn + Sb -> MnSb (product has 2 atoms)
Cd + 2 Br -> CdBr2 (product has 3 atoms)
Sr + 2 I -> SrI2 (product has 3 atoms)
Ti + H2 -> TiH2 (product has 3 atoms)
Mn + O2 -> MnO2 (product has 3 atoms)
2 Mg + Cu -> Mg2Cu (product has 3 atoms)
Mn + 2 S -> MnS2 (product has 3 atoms)


In [132]:
sample(computed_formation_energies)

AuCl : -0.22487297125000083
SiO2 : -3.0322210149999997
CrB : -0.5015574837500001
Cr7C3 : -0.07071246000000145
CrN : -0.6127655081249959
Mn2B : -0.2830155081321853
CaSi : -0.5650863549999965
Li2Te : -1.1428724855555525
RuCl3 : -0.6651705231249956
SbCl3 : -0.9073948843749946
TiI4 : -0.7723885633333282
HoCl3 : -2.4074358327083267
HgO : -0.30253178708333195
WO3 : -2.215627367812491
NaF : -2.913346032499998
TiB2 : -1.0897324405555568
Ti3O5 : -3.2178924800000015
MgCu2 : -0.12568925000000064


## Organize and Convert (eV/atom) Experimental Data

In [87]:
formula_extractor = lambda e: e['formula']
phase_extractor = lambda e: e['data']['phase']
enthalpy_extractor = lambda e: e['data']['298K'].get('ΔHᶠ', np.inf)

_solids = list(set(phases) - {'amorph', 'gas', 'gas.', 'liq', 'liquid', 'none', 'ref'})
_dsolids = [e for e in d if phase_extractor(e) in _solids]

mpcontribs_solid_data = {Composition(formula_extractor(e)).reduced_formula: \
                         Quantity(enthalpy_extractor(e)).to('eV/atom').magnitude \
                         for e in _dsolids if enthalpy_extractor(e) != np.inf}

In [89]:
def flip_formula(f):
    c = Composition(f)
    rev_elems = reversed(c.elements)
    dicts = c.as_dict()
    newformula = ''
    for e in rev_elems:
        num = int(dicts[str(e)])
        newformula += str(e) + (str(num) if num > 1 else '')
    return newformula

def experimental_formation_energy(f):
    try:
        return mpcontribs_solid_data[f]
    except KeyError:
        try:
            return mpcontribs_solid_data[flip_formula(f)]
        except KeyError:
            return mp_expt_data[f]

In [133]:
experimental_formation_energies = {f:experimental_formation_energy(f) / Composition(f).num_atoms \
                                   for f in binary_formulas_ids.keys()}
sample(experimental_formation_energies)

LiH : -0.46963096879937977
AsF3 : -2.4790155440414505
CrN : -0.6070974593852131
FeCl2 : -1.1809497964696893
SrO : -3.068010375107416
Li2O2 : -1.639163658553558
MnBr2 : -1.3297357968984367
MnTe : -0.5658891232319146
IrO2 : -0.8616162840905953
Mg3N2 : -0.955745272060079
P2O5 : -4.456547115595681
TiH2 : -0.4986871987807108
TiF3 : -3.7195550049135093
MnO : -1.9946036953476551
Zn3N2 : -0.046846498846305026
Fe2P : -0.5534519996444001
TaCl5 : -1.4837747546646334
NaI : -1.4917241494909865
TaN : -1.3074526171374732


##### Problem?

In [122]:
print([f for f in experimental_formation_energies.keys() if experimental_formation_energies[f] > 0])
print(experimental_formation_energies['TiCr2'])

['TiCr2']
0.0010364269656262172


## Calculate Errors and Write Locally

In [116]:
absolute_errors = {f:computed_formation_energies[f]-experimental_formation_energies[f] \
                   for f in binary_formulas_ids.keys()}
relative_errors = {f:(computed_formation_energies[f]-experimental_formation_energies[f])/experimental_formation_energies[f] \
                   for f in binary_formulas_ids.keys()}

In [134]:
sample(absolute_errors)

BaH2 : 0.0001755016913479457
NdF3 : 0.012198873136208377
CeCl3 : 0.18421454644775892
MgO : 0.11112332620036636
MgF2 : 0.08844333778539415
SiO2 : 0.1145681737646651
HgBr : 0.6048849037396409
KO2 : 0.037576742397489094
Fe2B : 0.050834690755621414
FeF3 : 0.2209728162384348
ZnO : 0.21792565475994974
Ni3S2 : 0.03832853305601258
Ni3S4 : -0.02593194517209224
Ni3P : 0.1747209092048665
MgH2 : -0.012335339981764781
CsBr : 0.10251954182434275
TlCl : -0.014181959345627604
UO2 : 0.33958227818148057
Mn3O4 : -0.0651794963209551
V3Si : -0.025642751565107758
SrF2 : 0.01659522557545756
ZrN : -0.08581870439734818


In [135]:
sample(relative_errors)

CaF2 : -0.009892821495280407
AsCl3 : -0.06812733742076625
AgCl : -0.16714996823718878
SO3 : 0.07711275666298324
VF4 : 0.030561319945711762
CoF3 : -0.02507375376276877
TiSi : 0.15093177796108376
TiSi2 : 0.12313549937103056
FeS : -0.2930407008455182
RbF : -0.015068095044816272
RbCl : -0.044954500924602535
SbF3 : 0.013297915655125675
InBr : 6.107064039599332
WCl4 : -0.03138279355178374
TaFe2 : -0.38677413597929217
Si3N4 : 0.20855977206335027
HgBr2 : -0.17504072396660303


##### Compounds with high error (> 1.5 eV)

In [136]:
{key:value for (key, value) in absolute_errors.items() if abs(value) > 1.5}

{'VO2': 2.328797360043708, 'P2O5': 2.3106033421603063}

In [137]:
dumpfn(absolute_errors, 'data/absolute_errors.json')
dumpfn(relative_errors, 'data/relative_errors.json')