# Core Imports and Setup

In [1]:
import os
from pathlib import Path

import warnings
warnings.filterwarnings("ignore")

import logging
logging.getLogger("openff.toolkit").setLevel(logging.ERROR)

from openff import toolkit, evaluator

# 0) Registering Custom ThermoML Properties

In [2]:
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase
from openff.evaluator.datasets.thermoml import thermoml_property
from openff.evaluator import properties

from openff.units import unit

@thermoml_property("Osmotic coefficient", supported_phases=PropertyPhase.Liquid)
class OsmoticCoefficient(PhysicalProperty):
    """A class representation of a osmotic coeff property"""

    @classmethod
    def default_unit(cls):
        return unit.dimensionless
    
setattr(properties, OsmoticCoefficient.__name__, OsmoticCoefficient)

# 1) - Loading ThermoML Data Sets

## Extracting data from ThermoML

In [3]:
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase
from openff.evaluator.datasets.thermoml import thermoml_property
from openff.units import unit
from openff.evaluator.datasets.thermoml import ThermoMLDataSet

data_set = ThermoMLDataSet.from_doi(
        "10.1016/j.jct.2013.08.018",
        "10.1016/j.fluid.2006.09.025",
        "10.1016/j.jct.2009.06.024",
        "10.1016/j.jct.2011.03.012",
        "10.1016/j.jct.2008.12.021",
        "10.1021/je400821q",
        "10.1021/je4008475",
        "10.1016/j.jct.2011.06.018",
        "10.1016/j.jct.2008.05.011",
        "10.1016/j.fluid.2014.02.004",
        "10.1021/je500772z",
        "10.1021/je100720x",
        "10.1016/j.fluid.2007.03.019",
        "10.1016/j.jct.2008.08.011",
        "10.1016/j.jct.2012.01.007",
        "10.1016/j.fluid.2015.08.035",
        "10.1016/j.jct.2008.07.011",
        "10.1016/j.fluid.2009.02.007",
        "10.1016/j.jct.2017.05.006",
        "10.1016/j.jct.2014.11.014",
        "10.1016/j.jct.2016.07.003",
        "10.1021/acs.jced.5b00184",
        "10.1021/je800609u",
        "10.1021/acs.jced.8b00400",
        "10.1016/j.jct.2014.10.008",
        "10.1021/je050348f",
        "10.1016/j.jct.2016.06.034",
        "10.1007/s10765-009-0566-6",
        "10.1016/j.fluid.2004.11.022",
        "10.1021/je800307g",
        "10.1021/je100760v",
        "10.1021/je700751a",
        "10.1016/j.jct.2019.105878",
        "10.1016/j.jct.2013.03.003",
        "10.1016/j.jct.2017.09.012",
        "10.1021/je900457z",
        "10.1021/acs.jced.9b00112",
        "10.1016/j.jct.2014.07.010",
        "10.1021/je500271z"
    )

In [4]:
len(data_set), data_set.property_types

(2524, {'Density', 'OsmoticCoefficient'})

In [5]:
ThermoMLDataSet.registered_properties['Osmotic coefficient'].conversion_function

functools.partial(<function _default_mapping at 0x7f97f58bf250>, <class '__main__.OsmoticCoefficient'>)

## Filtering data set

In [6]:
from openff.evaluator.datasets.curation.components.filtering import FilterByPropertyTypes, FilterByPropertyTypesSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByTemperature, FilterByTemperatureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByPressure, FilterByPressureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterBySmiles, FilterBySmilesSchema

# Property
data_set = FilterByPropertyTypes.apply(
    data_set, FilterByPropertyTypesSchema(property_types=["OsmoticCoefficient"])
)

In [7]:
data_set.to_pandas()

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,OsmoticCoefficient Value (),OsmoticCoefficient Uncertainty (),Source
0,d360a32cc19e4c4db78f767611956ccd,298.1500,101.0,Liquid,1,O,Solvent,1.0,,0.9100,0.00550,10.1016/j.jct.2013.08.018
1,eac8c47c497f47008c0bc4dedcd319c9,298.1500,101.0,Liquid,1,O,Solvent,1.0,,0.9060,0.00550,10.1016/j.jct.2013.08.018
2,5f61c1080b394ac990d06ab026070ae2,298.1500,101.0,Liquid,1,O,Solvent,1.0,,0.8970,0.00550,10.1016/j.jct.2013.08.018
3,c77a24644b3346f799aae6a3b4bb527e,298.1500,101.0,Liquid,1,O,Solvent,1.0,,0.8910,0.00550,10.1016/j.jct.2013.08.018
4,3c6ac7191800445db4a03f9ca51f0c9c,298.1500,101.0,Liquid,1,O,Solvent,1.0,,0.9490,0.00500,10.1016/j.jct.2013.08.018
...,...,...,...,...,...,...,...,...,...,...,...,...
1913,62e2f5751fcf4a999d2d84cf049f8042,273.1244,101.0,Liquid,1,O,Solvent,1.0,,0.7321,0.00670,10.1021/acs.jced.9b00112
1914,97837e7d98844febb4ea0a9fd09c42b8,273.1243,101.0,Liquid,1,O,Solvent,1.0,,0.7308,0.00675,10.1021/acs.jced.9b00112
1915,293b2667d507442bad59e95b79089a08,273.1218,101.0,Liquid,1,O,Solvent,1.0,,0.7229,0.00695,10.1021/acs.jced.9b00112
1916,bb23f7b8030f496a81a1e8e0f00334d6,273.1207,101.0,Liquid,1,O,Solvent,1.0,,0.7193,0.00700,10.1021/acs.jced.9b00112


In [8]:
from openff.evaluator.datasets.curation.components.filtering import FilterByPropertyTypes, FilterByPropertyTypesSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByTemperature, FilterByTemperatureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByPressure, FilterByPressureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterBySmiles, FilterBySmilesSchema

# # Property
# data_set = FilterByPropertyTypes.apply(
#     data_set, FilterByPropertyTypesSchema(property_types=["Density"])
# )

# Temperature
data_set = FilterByTemperature.apply(
    data_set, FilterByTemperatureSchema(minimum_temperature=298.0, maximum_temperature=330.0)
)

# Pressure
data_set = FilterByPressure.apply(
    data_set, FilterByPressureSchema(minimum_pressure=100.0, maximum_pressure=105.426)
)

# # Solvent
# data_set = FilterBySmiles.apply(
#     data_set, FilterBySmilesSchema(smiles_to_include=["CCO", "CC(C)O"])
# )

print(len(data_set))

1594


In [9]:
pandas_data_set = data_set.to_pandas()
pandas_data_set[
    [
        "Temperature (K)",
        "Pressure (kPa)",
        "Component 1",
        "OsmoticCoefficient Value ()",
        "Source",
    ]
].head()

Unnamed: 0,Temperature (K),Pressure (kPa),Component 1,OsmoticCoefficient Value (),Source
0,298.15,101.0,O,0.91,10.1016/j.jct.2013.08.018
1,298.15,101.0,O,0.906,10.1016/j.jct.2013.08.018
2,298.15,101.0,O,0.897,10.1016/j.jct.2013.08.018
3,298.15,101.0,O,0.891,10.1016/j.jct.2013.08.018
4,298.15,101.0,O,0.949,10.1016/j.jct.2013.08.018


## Adding Extra Data

### Defining new properties

In [10]:
from openff.evaluator.datasets import MeasurementSource, PropertyPhase
from openff.evaluator.substances import Substance
from openff.evaluator.thermodynamics import ThermodynamicState
from openff.evaluator.properties import EnthalpyOfVaporization

from openff.units import unit

# define thermo state
thermodynamic_state = ThermodynamicState(
    temperature=298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere
)

# define compounds 
ethanol = Substance.from_components("CCO")
isopropanol = Substance.from_components("CC(C)O")

# define source of measurements
source = MeasurementSource(doi="10.1016/S0021-9614(71)80108-8")

# define measurement values
ethanol_hvap = EnthalpyOfVaporization(
    thermodynamic_state=thermodynamic_state,
    phase=PropertyPhase.Liquid | PropertyPhase.Gas,
    substance=ethanol,
    value=42.26 * unit.kilojoule / unit.mole,
    uncertainty=0.02 * unit.kilojoule / unit.mole,
    source=source,
)
isopropanol_hvap = EnthalpyOfVaporization(
    thermodynamic_state=thermodynamic_state,
    phase=PropertyPhase.Liquid | PropertyPhase.Gas,
    substance=isopropanol,
    value=45.34 * unit.kilojoule / unit.mole,
    uncertainty=0.02 * unit.kilojoule / unit.mole,
    source=source,
)
data_set.add_properties(ethanol_hvap, isopropanol_hvap)

### Inspecting and saving new properties

In [11]:
# save for future use
data_set_path = Path('filtered_data_set.json')
data_set.json(data_set_path, format=True)

# inspect new properties
pandas_data_set = data_set.to_pandas()
pandas_data_set

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,OsmoticCoefficient Value (),OsmoticCoefficient Uncertainty (),EnthalpyOfVaporization Value (kJ / mol),EnthalpyOfVaporization Uncertainty (kJ / mol),Source
0,d360a32cc19e4c4db78f767611956ccd,298.15,101.000,Liquid,1,O,Solvent,1.0,,0.910,0.0055,,,10.1016/j.jct.2013.08.018
1,eac8c47c497f47008c0bc4dedcd319c9,298.15,101.000,Liquid,1,O,Solvent,1.0,,0.906,0.0055,,,10.1016/j.jct.2013.08.018
2,5f61c1080b394ac990d06ab026070ae2,298.15,101.000,Liquid,1,O,Solvent,1.0,,0.897,0.0055,,,10.1016/j.jct.2013.08.018
3,c77a24644b3346f799aae6a3b4bb527e,298.15,101.000,Liquid,1,O,Solvent,1.0,,0.891,0.0055,,,10.1016/j.jct.2013.08.018
4,3c6ac7191800445db4a03f9ca51f0c9c,298.15,101.000,Liquid,1,O,Solvent,1.0,,0.949,0.0050,,,10.1016/j.jct.2013.08.018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1591,b1422f13278c42d3b1ac1126279096e2,328.15,101.000,Liquid,1,O,Solvent,1.0,,0.788,0.0085,,,10.1021/je900457z
1592,8d66ca8c6e754c38a44af917141cf9cf,328.15,101.000,Liquid,1,O,Solvent,1.0,,0.793,0.0085,,,10.1021/je900457z
1593,af845b711d284b6f9733747d73be0720,328.15,101.000,Liquid,1,O,Solvent,1.0,,0.836,0.0090,,,10.1021/je900457z
1594,cbd6bbfd87bf4a44a6ab481b7e41ab4d,298.15,101.325,Liquid + Gas,1,CCO,Solvent,1.0,,,,42.26,0.02,10.1016/S0021-9614(71)80108-8


# 2) Estimating Data Sets

## Loading data set and FF parameters

In [12]:
from openff.evaluator.datasets import PhysicalPropertyDataSet
from openff.evaluator.forcefield import SmirnoffForceFieldSource

# load data
data_set_path = Path('filtered_data_set.json')
data_set = PhysicalPropertyDataSet.from_json(data_set_path)

# load FF
force_field_path = "openff-1.0.0.offxml"
force_field_source = SmirnoffForceFieldSource.from_path(force_field_path)

## Defining Calculation Schemas

In [13]:
from openff.evaluator.properties import Density, EnthalpyOfVaporization
from openff.evaluator.client import RequestOptions

density_schema = Density.default_simulation_schema(n_molecules=256)
h_vap_schema = EnthalpyOfVaporization.default_simulation_schema(n_molecules=256)

# Create an options object which defines how the data set should be estimated.
estimation_options = RequestOptions()

# Specify that we only wish to use molecular simulation to estimate the data set.
estimation_options.calculation_layers = ["SimulationLayer"]

# Add our custom schemas, specifying that the should be used by the 'SimulationLayer'
estimation_options.add_schema("SimulationLayer", "Density", density_schema)
estimation_options.add_schema("SimulationLayer", "EnthalpyOfVaporization", h_vap_schema)

## Launching a Server and Client

In [14]:
from openff.evaluator.backends import ComputeResources
from openff.evaluator.backends.dask import DaskLocalCluster
from openff.evaluator.server import EvaluatorServer
from openff.evaluator.client import EvaluatorClient

# define client to submit queries
evaluator_client = EvaluatorClient()

# define available / preferred resources
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
resources = ComputeResources(
    number_of_threads=1,
    number_of_gpus=1,
    preferred_gpu_toolkit=ComputeResources.GPUToolkit.CUDA,
)

with DaskLocalCluster(number_of_workers=1, resources_per_worker=resources) as calculation_backend:
    # spin up server
    evaluator_server = EvaluatorServer(calculation_backend=calculation_backend)
    evaluator_server.start(asynchronous=True)

    # estimate data set by submitting calculation schemas to newly-created server
    request, exception = evaluator_client.request_estimate(
        property_set=data_set,
        force_field_source=force_field_source,
        options=estimation_options,
    )

    # Wait for the results.
    results, exception = request.results(synchronous=True, polling_interval=30)

INFO:distributed.http.proxy:To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
INFO:distributed.scheduler:State start
INFO:distributed.scheduler:  Scheduler at: inproc://172.21.16.71/172456/1
INFO:distributed.scheduler:  dashboard at:  http://172.21.16.71:8787/status
INFO:distributed.scheduler:Registering Worker plugin shuffle
INFO:distributed.worker:      Start worker at: inproc://172.21.16.71/172456/4
INFO:distributed.worker:         Listening to:         inproc172.21.16.71
INFO:distributed.worker:          Worker name:                          0
INFO:distributed.worker:         dashboard at:         172.21.16.71:41917
INFO:distributed.worker:Waiting to connect to: inproc://172.21.16.71/172456/1
INFO:distributed.worker:-------------------------------------------------
INFO:distributed.worker:              Threads:                          1
INFO:distributed.worker:               Memory:                  31.00 GiB

ValueError: No calculation schema could be found for the OsmoticCoefficient properties.