# Core Imports and Setup

In [1]:
import os
from pathlib import Path

import numpy as np

import warnings
warnings.filterwarnings("ignore")

import logging
logging.getLogger("openff.toolkit").setLevel(logging.ERROR)

from openff import toolkit, evaluator

from openff.units import unit

from matplotlib import pyplot

import matplotlib.pyplot as plt

from scipy.stats import linregress

# 0) Registering Custom ThermoML Properties

In [2]:
from openff.evaluator import properties
from openff.evaluator.datasets.thermoml import thermoml_property
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase

@thermoml_property("Osmotic coefficient", supported_phases=PropertyPhase.Liquid)
class OsmoticCoefficient(PhysicalProperty):
    """A class representation of a osmotic coeff property"""

    @classmethod
    def default_unit(cls):
        return unit.dimensionless
    
...

custom_thermoml_props = [
    OsmoticCoefficient,
]

for custom_prop_cls in custom_thermoml_props:    
    setattr(properties, custom_prop_cls.__name__, custom_prop_cls)

# 1) - Loading ThermoML Data Sets

## Extracting data from ThermoML

In [3]:
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase, PhysicalPropertyDataSet
from openff.evaluator.datasets.thermoml import thermoml_property, ThermoMLDataSet

data_set_initial = PhysicalPropertyDataSet.from_json("training-properties-with-water.json")
data_set_initial.to_pandas()

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,Component 2,Role 2,Mole Fraction 2,Exact Amount 2,Density Value (g / ml),Density Uncertainty (g / ml),EnthalpyOfMixing Value (kJ / mol),EnthalpyOfMixing Uncertainty (kJ / mol),Source
0,6385,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.24520,,O,Solvent,0.75480,,0.903811,,,,10.1016/j.fluid.2010.05.001
1,6386,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.50500,,O,Solvent,0.49500,,0.858158,,,,10.1016/j.fluid.2010.05.001
2,6387,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.79070,,O,Solvent,0.20930,,0.826047,,,,10.1016/j.fluid.2010.05.001
3,6388,298.15,101.3,Liquid,2,CN(C)CCO,Solvent,0.26433,,O,Solvent,0.73567,,0.959000,,,,10.1021/acs.jced.6b00888
4,6389,298.15,101.3,Liquid,2,CN(C)CCO,Solvent,0.50770,,O,Solvent,0.49230,,0.923722,,,,10.1016/j.jct.2004.11.016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,7294,298.15,101.0,Liquid,2,C1COCCN1,Solvent,0.49110,,O,Solvent,0.50890,,,,-3.131,,10.1016/j.jct.2015.06.006
119,7295,298.15,101.0,Liquid,2,C1COCCN1,Solvent,0.71490,,O,Solvent,0.28510,,,,-1.753,,10.1016/j.jct.2015.06.006
120,7344,303.15,100.0,Liquid,2,NCCCNCCCN,Solvent,0.30050,,O,Solvent,0.69950,,,,-6.588,,10.1016/j.jct.2015.04.030
121,7345,303.15,100.0,Liquid,2,NCCCNCCCN,Solvent,0.49980,,O,Solvent,0.50020,,,,-5.544,,10.1016/j.jct.2015.04.030


In [4]:
print(len(data_set_initial))
print(data_set_initial.property_types)
print(data_set_initial.substances)

123
{'Density', 'EnthalpyOfMixing'}
{<Substance OCC(O)CO{solv}{x=0.490710}|O{solv}{x=0.509290}>, <Substance C1COCCN1{solv}{x=0.793000}|O{solv}{x=0.207000}>, <Substance Cn1ccnc1{solv}{x=0.498200}|O{solv}{x=0.501800}>, <Substance CCN(CC)CCO{solv}{x=0.700300}|O{solv}{x=0.299700}>, <Substance C1CCNCC1{solv}{x=0.500000}|O{solv}{x=0.500000}>, <Substance CN1CCCCC1{solv}{x=0.505000}|O{solv}{x=0.495000}>, <Substance CN(C)CCCO{solv}{x=0.199800}|O{solv}{x=0.800200}>, <Substance CCN(CC)CCO{solv}{x=0.193290}|O{solv}{x=0.806710}>, <Substance CCN(CC)CCO{solv}{x=0.200800}|O{solv}{x=0.799200}>, <Substance OCCN1CCOCC1{solv}{x=0.300200}|O{solv}{x=0.699800}>, <Substance NCCNCCN{solv}{x=0.702000}|O{solv}{x=0.298000}>, <Substance OCCN(CCO)CCO{solv}{x=0.107783}|O{solv}{x=0.892217}>, <Substance C1COCCN1{solv}{x=0.714900}|O{solv}{x=0.285100}>, <Substance CC(C)O{solv}{x=0.764357}|O{solv}{x=0.235643}>, <Substance OC1=NCCC1{solv}{x=0.769300}|O{solv}{x=0.230700}>, <Substance CN(C)CCCO{solv}{x=0.289504}|O{solv}{x=0

## Filtering data set

In [5]:
from rdkit import Chem
from rdkit.Chem import FilterCatalog
from openff.evaluator.substances import Component, Substance

In [17]:
def get_func_smiles(initial_data_set):
    subs=list(set(initial_data_set.substances))
    smiles_list=[]

    for i in subs:
        comps=[]
        comps.append(i.components[0].smiles)
        comps.append(i.components[1].smiles)
        for o in comps:
            comps.remove('O')
            [smiles_list.append(y) for y in comps]


    mols_list=[]

    for s in smiles_list:
        m=Chem.MolFromSmiles(s)
        mols_list.append(m)

 
    alcohols=[]
    amines=[]
    both=[]
    other=[]
    
    fc = FilterCatalog.GetFunctionalGroupHierarchy()
    for i in range(len(mols_list)):
        whtv=mols_list[i]
        if not fc.GetMatches(whtv):
            other.append(i)
        else:
            for match in fc.GetMatches(whtv):
                ffg=match.GetDescription()
                # print(i,ffg)
                if ffg == 'Amine':
                    amines.append(i)
                elif ffg == 'Alcohol':
                    alcohols.append(i)
                if i in amines and i in alcohols:
                    both.append(i)
                    amines.remove(i)
                    alcohols.remove(i)
    

    def get_smiles(func_type):
        smiles=[]

        for x in func_type:
            comp0=subs[x].components[0].smiles
            comp1=subs[x].components[1].smiles
            if comp0 != 'O':
                smiles.append(comp0)
            if comp1 != 'O':
                smiles.append(comp1)

        return smiles

    smiles_results={}

    smiles_results['alcohols']=get_smiles(alcohols)
    smiles_results['amines']=get_smiles(amines)
    smiles_results['both']=get_smiles(both)
    smiles_results['other']=get_smiles(other)

    return smiles_results

    

In [15]:
# subs=list(set(data_set_initial.substances))
# smiles_list=[]

# for i in subs:
#     comps=[]
#     comps.append(i.components[0].smiles)
#     comps.append(i.components[1].smiles)
#     for o in comps:
#         comps.remove('O')
#         [smiles_list.append(y) for y in comps]
        

# print(smiles_list)
# print(len(smiles_list))

# mols_list=[]

# for s in smiles_list:
#     m=Chem.MolFromSmiles(s)
#     mols_list.append(m)

In [16]:
# fc = FilterCatalog.GetFunctionalGroupHierarchy()
# amines=[]
# alcohols=[]
# both=[]
# other=[]
# for i in range(len(mols_list)):
#     whtv=mols_list[i]
#     if not fc.GetMatches(whtv):
#         other.append(i)
#     else:
#         for match in fc.GetMatches(whtv):
#             ffg=match.GetDescription()
#             # print(i,ffg)
#             if ffg == 'Amine':
#                 amines.append(i)
#             elif ffg == 'Alcohol':
#                 alcohols.append(i)
#             if i in amines and i in alcohols:
#                 both.append(i)
#                 amines.remove(i)
#                 alcohols.remove(i)

In [8]:
# et=mols_list[1]

# if match in fc.GetMatches(et):
#     for match in fc.GetMatches(et):
#         xyz=match.GetDescription()
#         print(xyz)
# else:
#     print("other")

In [18]:
smiles_results=get_func_smiles(data_set_initial)

In [19]:
smiles_results['alcohols']

['OCC(O)CO',
 'OCCOCCO',
 'CC(C)O',
 'CC(C)O',
 'OCC(O)CO',
 'CC(C)O',
 'CC(C)O',
 'OCC(O)CO',
 'OCC(O)CO',
 'OCCOCCO',
 'CC(C)O',
 'OCCOCCO',
 'OCCOCCO',
 'CC(C)O',
 'OCC(O)CO',
 'OCCOCCO',
 'OCCOCCO',
 'OCC(O)CO']

In [10]:
print(amines)
print(len(amines))

[3, 8, 9, 19, 23, 32, 35, 36, 37, 39, 41, 45, 55, 58, 62, 68, 70, 71, 72, 81, 82, 86, 87, 96, 104, 105, 106, 108, 113, 115]
30


In [11]:
print(both)
print(len(both))

[0, 1, 6, 7, 10, 12, 14, 15, 16, 17, 18, 20, 21, 22, 25, 26, 27, 28, 30, 31, 34, 38, 42, 47, 48, 49, 52, 53, 54, 56, 57, 60, 61, 64, 65, 66, 67, 73, 75, 76, 77, 79, 80, 84, 88, 90, 92, 93, 94, 95, 97, 98, 99, 100, 101, 103, 107, 109, 110, 111, 112, 114]
62


In [12]:
print(other)
print(len(other))

[4, 5, 46, 51, 59, 69, 91, 102, 117, 118]
10


In [13]:
alcohol_smiles=[]

for x in alcohols:
    comp0=subs[x].components[0].smiles
    comp1=subs[x].components[1].smiles
    if comp0 != 'O':
        alcohol_smiles.append(comp0)
    if comp1 != 'O':
        alcohol_smiles.append(comp1)

   
print(alcohol_smiles)
print(len(alcohol_smiles))

['OCC(O)CO', 'OCCOCCO', 'CC(C)O', 'CC(C)O', 'OCC(O)CO', 'CC(C)O', 'CC(C)O', 'OCC(O)CO', 'OCC(O)CO', 'OCCOCCO', 'CC(C)O', 'OCCOCCO', 'OCCOCCO', 'CC(C)O', 'OCC(O)CO', 'OCCOCCO', 'OCCOCCO', 'OCC(O)CO']
18


In [None]:
# print(alcohols)
# print(amines)
# for x in amines:
#     print(subs[x])
# for o in alcohols:
#     print(subs[o])

In [None]:
from openff.evaluator.datasets.curation.components.filtering import FilterBySmiles, FilterBySmilesSchema


data_set_alcohols= FilterBySmiles.apply(
    data_set_initial, FilterBySmilesSchema())

In [None]:
from openff.evaluator.datasets.curation.components.filtering import FilterByPropertyTypes, FilterByPropertyTypesSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByTemperature, FilterByTemperatureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByPressure, FilterByPressureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterBySmiles, FilterBySmilesSchema

data_set_hmix_dens= FilterByPropertyTypes.apply(
    data_set_initial, FilterByPropertyTypesSchema(property_types=["EnthalpyOfMixing","Density"]))

print(len(data_set_hmix_dens))

### Inspecting and saving new properties

In [None]:
# save for future use
data_set_path = Path('filtered_dataset_hmix_dens.json')
data_set_hmix_dens.json(data_set_path, format=True)

# inspect new properties
pandas_data_set = data_set_hmix_dens.to_pandas()

## 3) Analysing Data Sets

### Plotting results separately

In [None]:
def analysis_waters():

    water_models=('TIP3P','TIP3P_FB')

    for w in water_models:

        experimental_data_set_path = 'filtered_dataset_hmix_dens.json'
        estimated_data_set_path = '%s/estimated_dataset_hmix_dens.json' %w

        experimental_data_set = PhysicalPropertyDataSet.from_json(experimental_data_set_path)
        estimated_data_set = PhysicalPropertyDataSet.from_json(estimated_data_set_path)

        properties_by_type = {"Density": [], "EnthalpyOfMixing": []}

        for experimental_property in experimental_data_set:
            # Find the estimated property which has the same id as the
            # experimental property.
            estimated_property = next(
                x for x in estimated_data_set if x.id == experimental_property.id
            )

            # Add this pair of properties to the list of pairs
            property_type = experimental_property.__class__.__name__
            properties_by_type[property_type].append(
                (experimental_property, estimated_property)
            )
        
        # Create the figure we will plot to.
        figure, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(10.0, 5.0))

        # Set the axis titles
        axes[0].set_xlabel("OpenFF 2.1.0")
        axes[0].set_ylabel("Experimental")
        axes[0].set_title("Density $kg m^{-3}$ - %s" %w)

        axes[1].set_xlabel("OpenFF 2.1.0")
        axes[1].set_ylabel("Experimental")
        axes[1].set_title("$H_{mix}$ $kJ mol^{-1}$ - %s" %w)

        # Define the preferred units of the properties
        from openff.units import unit

        preferred_units = {
            "Density": unit.kilogram / unit.meter**3,
            "EnthalpyOfMixing": unit.kilojoule / unit.mole,
        }

        for index, property_type in enumerate(properties_by_type):
            experimental_values = []
            estimated_values = []

            preferred_unit = preferred_units[property_type]

            # Convert the values of our properties to the preferred units.
            for experimental_property, estimated_property in properties_by_type[property_type]:
                experimental_values.append(
                    experimental_property.value.to(preferred_unit).magnitude
                )
                estimated_values.append(estimated_property.value.to(preferred_unit).magnitude)

            axes[index].plot(
                estimated_values, experimental_values, marker="x", linestyle="None"
            )
        

In [None]:
# analysis_waters()

### Plotting results all together

In [None]:
def analysis_all(phys_prop,water_model):
    
    experimental_data_set_path = 'filtered_dataset_hmix_dens.json'
    estimated_data_set_path = ('%s/estimated_dataset_hmix_dens.json'%water_model)

    experimental_data_set = PhysicalPropertyDataSet.from_json(experimental_data_set_path)
    estimated_data_set = PhysicalPropertyDataSet.from_json(estimated_data_set_path)

    properties_by_type = {phys_prop: []}

    for experimental_property in experimental_data_set:

        if (type(experimental_property).__qualname__) == phys_prop:
    #     # Find the estimated property which has the same id as the
    #     # experimental property.
            property_type = experimental_property.__class__.__name__
            estimated_property = next(
                x for x in estimated_data_set if x.id == experimental_property.id
            )
            # Add this pair of properties to the list of pairs
            properties_by_type[property_type].append((experimental_property, estimated_property))
    
    preferred_units = {
        "Density": unit.kilogram / unit.meter**3,
        "EnthalpyOfMixing": unit.kilojoule / unit.mole,
    }

    experimental_values = []
    estimated_values = []
    preferred_unit = preferred_units[property_type]
    
    for experimental_property, estimated_property in properties_by_type[property_type]:
        experimental_values.append(
            experimental_property.value.to(preferred_unit).magnitude
        )
        estimated_values.append(
            estimated_property.value.to(preferred_unit).magnitude
        )

    def results_stats(x,y):    
        slope, intercept, r_value, p_value, std_err = linregress(x, y)
        r2_value=r_value**2
        
        xarr=np.array(x)
        yarr=np.array(y)
        mse= ((xarr - yarr) ** 2).mean()
        rmse=np.sqrt(mse)

        stats=dict()
        stats['slope']=slope
        stats['intercept']=intercept
        stats['r_value']=r_value
        stats['r2_value']=r2_value
        stats['p_value']=p_value
        stats['std_err']=std_err
        stats['rmse']=rmse
        return stats

    results=dict()
    results["%s:%s-Experimental_Values" %(water_model,phys_prop)]=experimental_values
    results["%s:%s-Estimated_Values" %(water_model,phys_prop)]=estimated_values
    results["%s:%s-Stats" %(water_model,phys_prop)]=results_stats(estimated_values,experimental_values)
    
    return results

In [None]:
waters=('TIP3P','TIP3P_FB','TIP4P','TIP4P_FB','OPC','OPC3')

results=dict()

for w in waters:
    dens_result=analysis_all('Density', w)
    results.update(dens_result)
    hmix_result=analysis_all('EnthalpyOfMixing',w)
    results.update(hmix_result)

In [None]:
plt.figure(figsize=(12.0, 5.0))

plt.subplot(1,2,1)
plt.plot(results['TIP3P:EnthalpyOfMixing-Estimated_Values'], results['TIP3P:EnthalpyOfMixing-Experimental_Values'], marker='x', linestyle='None', color='r', label='TIP3P')
plt.axline(xy1=(0,results['TIP3P:EnthalpyOfMixing-Stats']['intercept']),slope=results['TIP3P:EnthalpyOfMixing-Stats']['slope'], linestyle="--", color='r', alpha=0.5, label='TIP3P $r^{2}$=%.3f, RMSE=%.3f'%(results['TIP3P:EnthalpyOfMixing-Stats']['r2_value'],results['TIP3P:EnthalpyOfMixing-Stats']['rmse']))
plt.plot(results['TIP3P_FB:EnthalpyOfMixing-Estimated_Values'], results['TIP3P_FB:EnthalpyOfMixing-Experimental_Values'], marker='x', linestyle='None', color='b',label='TIP3P_FB')
plt.axline(xy1=(0,results['TIP3P_FB:EnthalpyOfMixing-Stats']['intercept']),slope=results['TIP3P_FB:EnthalpyOfMixing-Stats']['slope'], linestyle="--", color='b', alpha=0.5, label='TIP3P_FB $r^{2}$=%.3f, RMSE=%.3f'%(results['TIP3P_FB:EnthalpyOfMixing-Stats']['r2_value'], results['TIP3P_FB:EnthalpyOfMixing-Stats']['rmse']))
plt.plot(results['TIP4P:EnthalpyOfMixing-Estimated_Values'], results['TIP4P:EnthalpyOfMixing-Experimental_Values'], marker='x', linestyle='None', color='g', label='TIP4P')
plt.axline(xy1=(0,results['TIP4P:EnthalpyOfMixing-Stats']['intercept']),slope=results['TIP4P:EnthalpyOfMixing-Stats']['slope'], linestyle="--", color='g', alpha=0.5, label='TIP4P $r^{2}$=%.3f, RMSE=%.3f'%(results['TIP4P:EnthalpyOfMixing-Stats']['r2_value'],results['TIP4P:EnthalpyOfMixing-Stats']['rmse']))
plt.plot(results['TIP4P_FB:EnthalpyOfMixing-Estimated_Values'], results['TIP4P_FB:EnthalpyOfMixing-Experimental_Values'], marker='x', linestyle='None', color='c',label='TIP4P_FB')
plt.axline(xy1=(0,results['TIP4P_FB:EnthalpyOfMixing-Stats']['intercept']),slope=results['TIP4P_FB:EnthalpyOfMixing-Stats']['slope'], linestyle="--", color='c', alpha=0.5, label='TIP4P_FB $r^{2}$=%.3f, RMSE=%.3f'%(results['TIP4P_FB:EnthalpyOfMixing-Stats']['r2_value'],results['TIP4P_FB:EnthalpyOfMixing-Stats']['rmse']))
plt.plot(results['OPC:EnthalpyOfMixing-Estimated_Values'], results['OPC:EnthalpyOfMixing-Experimental_Values'], marker='x', linestyle='None', color='m', label='OPC')
plt.axline(xy1=(0,results['OPC:EnthalpyOfMixing-Stats']['intercept']),slope=results['OPC:EnthalpyOfMixing-Stats']['slope'], linestyle="--", color='m', alpha=0.5, label='OPC $r^{2}$=%.3f, RMSE=%.3f'%(results['OPC:EnthalpyOfMixing-Stats']['r2_value'],results['OPC:EnthalpyOfMixing-Stats']['rmse']))
plt.plot(results['OPC3:EnthalpyOfMixing-Estimated_Values'], results['OPC3:EnthalpyOfMixing-Experimental_Values'], marker='x', linestyle='None', color='darkorange', label='OPC3')
plt.axline(xy1=(0,results['OPC3:EnthalpyOfMixing-Stats']['intercept']),slope=results['OPC3:EnthalpyOfMixing-Stats']['slope'], linestyle="--", color='darkorange', alpha=0.5, label='OPC3 $r^{2}$=%.3f, RMSE=%.3f'%(results['OPC3:EnthalpyOfMixing-Stats']['r2_value'],results['OPC3:EnthalpyOfMixing-Stats']['rmse']))
plt.title('$H_{mix}$ $kJ mol^{-1}$')
plt.xlabel('OpenFF 2.1.0')
plt.ylabel('Experimental')
plt.legend(prop={'size': 6})

plt.subplot(1,2,2)
plt.plot(results['TIP3P:Density-Estimated_Values'], results['TIP3P:Density-Experimental_Values'], marker='x', linestyle='None', color='r', label='TIP3P')
plt.axline(xy1=(0,results['TIP3P:Density-Stats']['intercept']),slope=results['TIP3P:Density-Stats']['slope'], linestyle="--", color='r', alpha=0.5, label='TIP3P $r^{2}$=%.3f, RMSE=%.3f'%(results['TIP3P:Density-Stats']['r2_value'], results['TIP3P:Density-Stats']['rmse']))
plt.plot(results['TIP3P_FB:Density-Estimated_Values'], results['TIP3P_FB:Density-Experimental_Values'], marker='x', linestyle='None', color='b',label='TIP3P_FB')
plt.axline(xy1=(0,results['TIP3P_FB:Density-Stats']['intercept']),slope=results['TIP3P_FB:Density-Stats']['slope'], linestyle="--", color='b', alpha=0.5, label='TIP3P_FB $r^{2}$=%.3f, RMSE=%.3f'%(results['TIP3P_FB:Density-Stats']['r2_value'], results['TIP3P_FB:Density-Stats']['rmse']))
plt.plot(results['TIP4P:Density-Estimated_Values'], results['TIP4P:Density-Experimental_Values'], marker='x', linestyle='None', color='g', label='TIP4P')
plt.axline(xy1=(0,results['TIP4P:Density-Stats']['intercept']),slope=results['TIP4P:Density-Stats']['slope'], linestyle="--", color='g', alpha=0.5, label='TIP4P $r^{2}$=%.3f, RMSE=%.3f'%(results['TIP4P:Density-Stats']['r2_value'], results['TIP4P:Density-Stats']['rmse']))
plt.plot(results['TIP4P_FB:Density-Estimated_Values'], results['TIP4P_FB:Density-Experimental_Values'], marker='x', linestyle='None', color='c',label='TIP4P_FB')
plt.axline(xy1=(0,results['TIP4P_FB:Density-Stats']['intercept']),slope=results['TIP4P_FB:Density-Stats']['slope'], linestyle="--", color='c', alpha=0.5, label='TIP4P_FB $r^{2}$=%.3f, RMSE=%.3f'%(results['TIP4P_FB:Density-Stats']['r2_value'], results['TIP4P_FB:Density-Stats']['rmse']))
plt.plot(results['OPC:Density-Estimated_Values'], results['OPC:Density-Experimental_Values'], marker='x', linestyle='None', color='m',label='OPC')
plt.axline(xy1=(0,results['OPC:Density-Stats']['intercept']),slope=results['OPC:Density-Stats']['slope'], linestyle="--", color='m', alpha=0.5, label='OPC $r^{2}$=%.3f, RMSE=%.3f'%(results['OPC:Density-Stats']['r2_value'], results['OPC:Density-Stats']['rmse']))
plt.plot(results['OPC3:Density-Estimated_Values'], results['OPC3:Density-Experimental_Values'], marker='x', linestyle='None', color='darkorange',label='OPC3')
plt.axline(xy1=(0,results['OPC3:Density-Stats']['intercept']),slope=results['OPC3:Density-Stats']['slope'], linestyle="--", color='darkorange', alpha=0.5, label='OPC3 $r^{2}$=%.3f, RMSE=%.3f'%(results['OPC3:Density-Stats']['r2_value'], results['OPC3:Density-Stats']['rmse']))
plt.xlim(800,1200)
plt.ylim(800,1200)
plt.title("Density $kg m^{-3}$")
plt.xlabel('OpenFF 2.1.0')
plt.ylabel('Experimental')
plt.legend(prop={'size': 6})

plt.savefig('hmix_dens_plot.png')

plt.show()

## Breakdown into functional groups

In [None]:
filters = FilterCatalog.GetFlattenedFunctionalGroupHierarchy()
for k, pat in filters.items():
    print(f"{k} -> {Chem.MolToSmarts(pat)}")

In [None]:
# experimental_data_set_path = 'filtered_dataset_hmix_dens.json'
# estimated_data_set_path = ('TIP3P/estimated_dataset_hmix_dens.json')

# experimental_data_set = PhysicalPropertyDataSet.from_json(experimental_data_set_path)
# estimated_data_set = PhysicalPropertyDataSet.from_json(estimated_data_set_path)

# for experimental_property in experimental_data_set:
#     print(type(experimental_property))

In [None]:
# results_amines=dict()


# for w in waters:
#     dens_result=analysis_all('Density', w)
#     results.update(dens_result)
#     hmix_result=analysis_all('EnthalpyOfMixing',w)
#     results.update(hmix_result)