# Core Imports and Setup

In [1]:
import os
from pathlib import Path

import warnings
warnings.filterwarnings("ignore")

import logging
logging.getLogger("openff.toolkit").setLevel(logging.ERROR)

from openff import toolkit, evaluator

from openff.units import unit

# 0) Registering Custom ThermoML Properties

In [2]:
from openff.evaluator import properties
from openff.evaluator.datasets.thermoml import thermoml_property
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase

@thermoml_property("Osmotic coefficient", supported_phases=PropertyPhase.Liquid)
class OsmoticCoefficient(PhysicalProperty):
    """A class representation of a osmotic coeff property"""

    @classmethod
    def default_unit(cls):
        return unit.dimensionless
    
...

custom_thermoml_props = [
    OsmoticCoefficient,
]

for custom_prop_cls in custom_thermoml_props:    
    setattr(properties, custom_prop_cls.__name__, custom_prop_cls)

# 1) - Loading ThermoML Data Sets

## Extracting data from ThermoML

In [3]:
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase, PhysicalPropertyDataSet
from openff.evaluator.datasets.thermoml import thermoml_property, ThermoMLDataSet

data_set_initial = PhysicalPropertyDataSet.from_json("training-properties-with-water.json")
data_set_initial.to_pandas()

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,Component 2,Role 2,Mole Fraction 2,Exact Amount 2,Density Value (g / ml),Density Uncertainty (g / ml),EnthalpyOfMixing Value (kJ / mol),EnthalpyOfMixing Uncertainty (kJ / mol),Source
0,6385,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.24520,,O,Solvent,0.75480,,0.903811,,,,10.1016/j.fluid.2010.05.001
1,6386,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.50500,,O,Solvent,0.49500,,0.858158,,,,10.1016/j.fluid.2010.05.001
2,6387,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.79070,,O,Solvent,0.20930,,0.826047,,,,10.1016/j.fluid.2010.05.001
3,6388,298.15,101.3,Liquid,2,CN(C)CCO,Solvent,0.26433,,O,Solvent,0.73567,,0.959000,,,,10.1021/acs.jced.6b00888
4,6389,298.15,101.3,Liquid,2,CN(C)CCO,Solvent,0.50770,,O,Solvent,0.49230,,0.923722,,,,10.1016/j.jct.2004.11.016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,7294,298.15,101.0,Liquid,2,C1COCCN1,Solvent,0.49110,,O,Solvent,0.50890,,,,-3.131,,10.1016/j.jct.2015.06.006
119,7295,298.15,101.0,Liquid,2,C1COCCN1,Solvent,0.71490,,O,Solvent,0.28510,,,,-1.753,,10.1016/j.jct.2015.06.006
120,7344,303.15,100.0,Liquid,2,NCCCNCCCN,Solvent,0.30050,,O,Solvent,0.69950,,,,-6.588,,10.1016/j.jct.2015.04.030
121,7345,303.15,100.0,Liquid,2,NCCCNCCCN,Solvent,0.49980,,O,Solvent,0.50020,,,,-5.544,,10.1016/j.jct.2015.04.030


In [4]:
print(len(data_set_initial))
print(data_set_initial.property_types)
print(data_set_initial.substances)

123
{'Density', 'EnthalpyOfMixing'}
{<Substance CNCCO{solv}{x=0.501700}|O{solv}{x=0.498300}>, <Substance OCCNCCO{solv}{x=0.285600}|O{solv}{x=0.714400}>, <Substance CN1CCCCC1{solv}{x=0.505000}|O{solv}{x=0.495000}>, <Substance Cc1ccncc1{solv}{x=0.500000}|O{solv}{x=0.500000}>, <Substance OCCN(CCO)CCO{solv}{x=0.107783}|O{solv}{x=0.892217}>, <Substance CNCCO{solv}{x=0.198700}|O{solv}{x=0.801300}>, <Substance CN(C)CCCO{solv}{x=0.199800}|O{solv}{x=0.800200}>, <Substance CC(C)O{solv}{x=0.764357}|O{solv}{x=0.235643}>, <Substance CNCCO{solv}{x=0.800900}|O{solv}{x=0.199100}>, <Substance CN(C)CCCO{solv}{x=0.501900}|O{solv}{x=0.498100}>, <Substance CN(CCO)CCO{solv}{x=0.259300}|O{solv}{x=0.740700}>, <Substance OCCOCCO{solv}{x=0.500600}|O{solv}{x=0.499400}>, <Substance CCN(CC)CCO{solv}{x=0.193290}|O{solv}{x=0.806710}>, <Substance OCCN(CCO)CCO{solv}{x=0.490200}|O{solv}{x=0.509800}>, <Substance NCCNCCO{solv}{x=0.069900}|O{solv}{x=0.930100}>, <Substance C1CCNCC1{solv}{x=0.250000}|O{solv}{x=0.750000}>, <

## Filtering data set

In [5]:
from openff.evaluator.datasets.curation.components.filtering import FilterByPropertyTypes, FilterByPropertyTypesSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByTemperature, FilterByTemperatureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByPressure, FilterByPressureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterBySmiles, FilterBySmilesSchema

data_set_hmix_dens= FilterByPropertyTypes.apply(
    data_set_initial, FilterByPropertyTypesSchema(property_types=["EnthalpyOfMixing","Density"]))

print(len(data_set_hmix_dens))

123


### Inspecting and saving new properties

In [6]:
# save for future use
data_set_path = Path('filtered_dataset_hmix_dens.json')
data_set_hmix_dens.json(data_set_path, format=True)

# inspect new properties
pandas_data_set = data_set_hmix_dens.to_pandas()


In [7]:
pandas_data_set

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,Component 2,Role 2,Mole Fraction 2,Exact Amount 2,Density Value (g / ml),Density Uncertainty (g / ml),EnthalpyOfMixing Value (kJ / mol),EnthalpyOfMixing Uncertainty (kJ / mol),Source
0,6385,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.24520,,O,Solvent,0.75480,,0.903811,,,,10.1016/j.fluid.2010.05.001
1,6386,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.50500,,O,Solvent,0.49500,,0.858158,,,,10.1016/j.fluid.2010.05.001
2,6387,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.79070,,O,Solvent,0.20930,,0.826047,,,,10.1016/j.fluid.2010.05.001
3,6388,298.15,101.3,Liquid,2,CN(C)CCO,Solvent,0.26433,,O,Solvent,0.73567,,0.959000,,,,10.1021/acs.jced.6b00888
4,6389,298.15,101.3,Liquid,2,CN(C)CCO,Solvent,0.50770,,O,Solvent,0.49230,,0.923722,,,,10.1016/j.jct.2004.11.016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,7294,298.15,101.0,Liquid,2,C1COCCN1,Solvent,0.49110,,O,Solvent,0.50890,,,,-3.131,,10.1016/j.jct.2015.06.006
119,7295,298.15,101.0,Liquid,2,C1COCCN1,Solvent,0.71490,,O,Solvent,0.28510,,,,-1.753,,10.1016/j.jct.2015.06.006
120,7344,303.15,100.0,Liquid,2,NCCCNCCCN,Solvent,0.30050,,O,Solvent,0.69950,,,,-6.588,,10.1016/j.jct.2015.04.030
121,7345,303.15,100.0,Liquid,2,NCCCNCCCN,Solvent,0.49980,,O,Solvent,0.50020,,,,-5.544,,10.1016/j.jct.2015.04.030


# 2) Estimating Data Sets

### Loading data set and FF parameters

In [8]:
# load data
data_set_path = Path('filtered_dataset_hmix_dens.json')
data_set = PhysicalPropertyDataSet.from_json(data_set_path)

In [9]:
from openff.toolkit.typing.engines.smirnoff import forcefield, ForceField
from openff.evaluator.forcefield import SmirnoffForceFieldSource

forcefield.get_available_force_fields()

['ff14sb_0.0.4.offxml',
 'ff14sb_off_impropers_0.0.2.offxml',
 'ff14sb_0.0.2.offxml',
 'ff14sb_0.0.1.offxml',
 'ff14sb_off_impropers_0.0.1.offxml',
 'ff14sb_off_impropers_0.0.3.offxml',
 'ff14sb_off_impropers_0.0.4.offxml',
 'ff14sb_0.0.3.offxml',
 'smirnoff99Frosst-1.0.9.offxml',
 'smirnoff99Frosst-1.0.7.offxml',
 'smirnoff99Frosst-1.0.5.offxml',
 'smirnoff99Frosst-1.0.0.offxml',
 'smirnoff99Frosst-1.0.6.offxml',
 'smirnoff99Frosst-1.0.2.offxml',
 'smirnoff99Frosst-1.0.1.offxml',
 'smirnoff99Frosst-1.0.8.offxml',
 'smirnoff99Frosst-1.0.4.offxml',
 'smirnoff99Frosst-1.0.3.offxml',
 'smirnoff99Frosst-1.1.0.offxml',
 'opc3-1.0.1.offxml',
 'openff-2.0.0.offxml',
 'openff_unconstrained-1.0.0-RC2.offxml',
 'tip3p_fb-1.1.0.offxml',
 'opc-1.0.2.offxml',
 'opc-1.0.0.offxml',
 'openff_unconstrained-2.1.0.offxml',
 'openff_unconstrained-1.3.1.offxml',
 'openff-1.3.0.offxml',
 'openff-1.0.0-RC2.offxml',
 'openff-1.0.1.offxml',
 'opc3-1.0.0.offxml',
 'tip4p_fb-1.0.1.offxml',
 'spce-1.0.0.offxml',


In [10]:
# # load FF
# ff_path = ForceField("openff-2.0.0.offxml", "opc3.offxml")
# force_field_source = SmirnoffForceFieldSource.from_object(ff_path)

In [14]:
from openff.interchange import Interchange
from openff.toolkit import Molecule, ForceField
from openff.units import unit

# gopal=ForceField('/home/bamo6610/Documents/evaluator-blanca/evaluator_water_calcs/hmix_density/calc_with_dois/GOPAL/gopal.offxml')


In [26]:
cwd=os.getcwd()
gopal_path="%s/gopal.offxml"%cwd
print(gopal_path)

/home/bamo6610/Documents/evaluator-blanca/evaluator_water_calcs/hmix_density/calc_with_dois/GOPAL/gopal.offxml


In [27]:
# load FF
# Evaluator wants to work with a JSON file for the force field
force_field = ForceField("openff-2.0.0.offxml",gopal_path)
with open("force-field.json", "w") as file:
    file.write(SmirnoffForceFieldSource.from_object(force_field).json())

force_field_source = SmirnoffForceFieldSource.from_json("force-field.json")

### Defining Calculation Schemas

In [16]:
from openff.evaluator.properties import Density, EnthalpyOfMixing
from openff.evaluator.client import RequestOptions

# density_schema = Density.default_simulation_schema(n_molecules=256)
density_schema = Density.default_simulation_schema(n_molecules=256)
hmix_schema = EnthalpyOfMixing.default_simulation_schema(n_molecules=256)

# Create an options object which defines how the data set should be estimated.
estimation_options = RequestOptions()

# Specify that we only wish to use molecular simulation to estimate the data set.
estimation_options.calculation_layers = ["SimulationLayer"]

# Add our custom schemas, specifying that the should be used by the 'SimulationLayer'
estimation_options.add_schema("SimulationLayer", "Density", density_schema)
estimation_options.add_schema("SimulationLayer", "EnthalpyOfMixing", hmix_schema)

## Launching a Server and Client

In [17]:
from openff.evaluator.backends import ComputeResources
from openff.evaluator.backends.dask import DaskLocalCluster
from openff.evaluator.server import EvaluatorServer
from openff.evaluator.client import EvaluatorClient
from openff.evaluator.client import ConnectionOptions

# define client to submit queries
port = 8118
evaluator_client = EvaluatorClient(ConnectionOptions(server_port=port))

# define available / preferred resources
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
resources = ComputeResources(
    number_of_threads=1,
    number_of_gpus=1,
    preferred_gpu_toolkit=ComputeResources.GPUToolkit.CUDA,
)

with DaskLocalCluster(number_of_workers=1, resources_per_worker=resources) as calculation_backend:
    # spin up server
    evaluator_server = EvaluatorServer(calculation_backend=calculation_backend, delete_working_files=False, port=port)
    evaluator_server.start(asynchronous=True)

    # estimate data set by submitting calculation schemas to newly-created server
    request, exception = evaluator_client.request_estimate(
        property_set=data_set,
        force_field_source=force_field_source,
        options=estimation_options,
    )

    # Wait for the results.
    results, exception = request.results(synchronous=True, polling_interval=30)
    assert exception is None

2024-07-03 08:54:23,382 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.
Traceback (most recent call last):
  File "/home/bamo6610/miniconda3/envs/evaluator-blanca/lib/python3.10/site-packages/distributed/worker.py", line 1252, in heartbeat
    response = await retry_operation(
  File "/home/bamo6610/miniconda3/envs/evaluator-blanca/lib/python3.10/site-packages/distributed/utils_comm.py", line 455, in retry_operation
    return await retry(
  File "/home/bamo6610/miniconda3/envs/evaluator-blanca/lib/python3.10/site-packages/distributed/utils_comm.py", line 434, in retry
    return await coro()
  File "/home/bamo6610/miniconda3/envs/evaluator-blanca/lib/python3.10/site-packages/distributed/core.py", line 1394, in send_recv_from_rpc
    return await send_recv(comm=comm, op=key, **kwargs)
  File "/home/bamo6610/miniconda3/envs/evaluator-blanca/lib/python3.10/site-packages/distributed/core.py", line 1153, in send_recv
    response = await comm.read(dese

KeyboardInterrupt: 

In [13]:
print(len(results.queued_properties))

print(len(results.estimated_properties))

print(len(results.unsuccessful_properties))
print(len(results.exceptions))

0
0
57
57


In [17]:
print(results.exceptions)

[WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None), WorkflowException(None),

In [14]:
results.estimated_properties.json("estimated_dataset_hmix_dens.json", format=True)

'{\n  "@type": "openff.evaluator.datasets.datasets.PhysicalPropertyDataSet",\n  "properties": []\n}'

## 3) Analysing Data Sets

### Loading the Data Sets

In [24]:
experimental_data_set_path = "filtered_dataset_hmix.json"
estimated_data_set_path = "estimated_dataset_hmix.json"

experimental_data_set = PhysicalPropertyDataSet.from_json(experimental_data_set_path)
estimated_data_set = PhysicalPropertyDataSet.from_json(estimated_data_set_path)

In [25]:
experimental_data_set.to_pandas().head()

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,Component 2,Role 2,Mole Fraction 2,Exact Amount 2,EnthalpyOfMixing Value (kJ / mol),EnthalpyOfMixing Uncertainty (kJ / mol),Source
0,6391,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.2052,,O,Solvent,0.7948,,-2.587,,10.1016/j.jct.2007.03.010
1,6392,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.5365,,O,Solvent,0.4635,,-2.575,,10.1016/j.jct.2007.03.010
2,6393,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.7996,,O,Solvent,0.2004,,-1.247,,10.1016/j.jct.2007.03.010
3,6395,303.15,100.0,Liquid,2,CCN(CC)CCO,Solvent,0.2008,,O,Solvent,0.7992,,-2.185,,10.1016/j.jct.2015.04.030
4,6396,303.15,100.0,Liquid,2,CCN(CC)CCO,Solvent,0.5002,,O,Solvent,0.4998,,-2.504,,10.1016/j.jct.2015.04.030


In [26]:
estimated_data_set.to_pandas().head()