# Core Imports and Setup

In [1]:
import os
from pathlib import Path

import warnings
warnings.filterwarnings("ignore")

import logging
logging.getLogger("openff.toolkit").setLevel(logging.ERROR)

from openff import toolkit, evaluator

from openff.units import unit

# 0) Registering Custom ThermoML Properties

In [2]:
from openff.evaluator import properties
from openff.evaluator.datasets.thermoml import thermoml_property
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase

@thermoml_property("Osmotic coefficient", supported_phases=PropertyPhase.Liquid)
class OsmoticCoefficient(PhysicalProperty):
    """A class representation of a osmotic coeff property"""

    @classmethod
    def default_unit(cls):
        return unit.dimensionless
    
...

custom_thermoml_props = [
    OsmoticCoefficient,
]

for custom_prop_cls in custom_thermoml_props:    
    setattr(properties, custom_prop_cls.__name__, custom_prop_cls)

# 1) - Loading ThermoML Data Sets

## Extracting data from ThermoML

In [3]:
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase, PhysicalPropertyDataSet
from openff.evaluator.datasets.thermoml import thermoml_property, ThermoMLDataSet

data_set_initial = PhysicalPropertyDataSet.from_json("training-properties-with-water.json")
data_set_initial.to_pandas()

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,Component 2,Role 2,Mole Fraction 2,Exact Amount 2,EnthalpyOfMixing Value (kJ / mol),EnthalpyOfMixing Uncertainty (kJ / mol),Density Value (g / ml),Density Uncertainty (g / ml),Source
0,6385,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.24520,,O,Solvent,0.75480,,,,0.903811,,10.1016/j.fluid.2010.05.001
1,6386,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.50500,,O,Solvent,0.49500,,,,0.858158,,10.1016/j.fluid.2010.05.001
2,6387,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.79070,,O,Solvent,0.20930,,,,0.826047,,10.1016/j.fluid.2010.05.001
3,6388,298.15,101.3,Liquid,2,CN(C)CCO,Solvent,0.26433,,O,Solvent,0.73567,,,,0.959000,,10.1021/acs.jced.6b00888
4,6389,298.15,101.3,Liquid,2,CN(C)CCO,Solvent,0.50770,,O,Solvent,0.49230,,,,0.923722,,10.1016/j.jct.2004.11.016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,7294,298.15,101.0,Liquid,2,C1COCCN1,Solvent,0.49110,,O,Solvent,0.50890,,-3.131,,,,10.1016/j.jct.2015.06.006
119,7295,298.15,101.0,Liquid,2,C1COCCN1,Solvent,0.71490,,O,Solvent,0.28510,,-1.753,,,,10.1016/j.jct.2015.06.006
120,7344,303.15,100.0,Liquid,2,NCCCNCCCN,Solvent,0.30050,,O,Solvent,0.69950,,-6.588,,,,10.1016/j.jct.2015.04.030
121,7345,303.15,100.0,Liquid,2,NCCCNCCCN,Solvent,0.49980,,O,Solvent,0.50020,,-5.544,,,,10.1016/j.jct.2015.04.030


In [4]:
print(len(data_set_initial))
print(data_set_initial.property_types)
print(data_set_initial.substances)

123
{'EnthalpyOfMixing', 'Density'}
{<Substance OCCN(CCO)CCO{solv}{x=0.710000}|O{solv}{x=0.290000}>, <Substance Cc1cccnc1{solv}{x=0.225000}|O{solv}{x=0.775000}>, <Substance OC1=NCCC1{solv}{x=0.478300}|O{solv}{x=0.521700}>, <Substance Cc1ccncc1{solv}{x=0.750000}|O{solv}{x=0.250000}>, <Substance CNCCO{solv}{x=0.501700}|O{solv}{x=0.498300}>, <Substance CC(C)O{solv}{x=0.203100}|O{solv}{x=0.796900}>, <Substance OC1=NCCC1{solv}{x=0.744800}|O{solv}{x=0.255200}>, <Substance CN(C)CCO{solv}{x=0.536500}|O{solv}{x=0.463500}>, <Substance OCCNCCO{solv}{x=0.700042}|O{solv}{x=0.299958}>, <Substance NCCCO{solv}{x=0.489642}|O{solv}{x=0.510358}>, <Substance CN1CCNCC1{solv}{x=0.209100}|O{solv}{x=0.790900}>, <Substance NCCNCCN{solv}{x=0.241600}|O{solv}{x=0.758400}>, <Substance OCCN1CCOCC1{solv}{x=0.246600}|O{solv}{x=0.753400}>, <Substance CN(C)CCCO{solv}{x=0.501900}|O{solv}{x=0.498100}>, <Substance Cc1cccnc1{solv}{x=0.500000}|O{solv}{x=0.500000}>, <Substance OCCN(CCO)CCO{solv}{x=0.206300}|O{solv}{x=0.79370

## Filtering data set

In [5]:
from openff.evaluator.datasets.curation.components.filtering import FilterByPropertyTypes, FilterByPropertyTypesSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByTemperature, FilterByTemperatureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByPressure, FilterByPressureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterBySmiles, FilterBySmilesSchema

data_set_hmix= FilterByPropertyTypes.apply(
    data_set_initial, FilterByPropertyTypesSchema(property_types=["EnthalpyOfMixing"]))

print(len(data_set_hmix))

57


### Inspecting and saving new properties

In [6]:
# save for future use
data_set_path = Path('filtered_dataset_hmix.json')
data_set_hmix.json(data_set_path, format=True)

# inspect new properties
pandas_data_set = data_set_hmix.to_pandas()
pandas_data_set.head()

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,Component 2,Role 2,Mole Fraction 2,Exact Amount 2,EnthalpyOfMixing Value (kJ / mol),EnthalpyOfMixing Uncertainty (kJ / mol),Source
0,6391,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.2052,,O,Solvent,0.7948,,-2.587,,10.1016/j.jct.2007.03.010
1,6392,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.5365,,O,Solvent,0.4635,,-2.575,,10.1016/j.jct.2007.03.010
2,6393,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.7996,,O,Solvent,0.2004,,-1.247,,10.1016/j.jct.2007.03.010
3,6395,303.15,100.0,Liquid,2,CCN(CC)CCO,Solvent,0.2008,,O,Solvent,0.7992,,-2.185,,10.1016/j.jct.2015.04.030
4,6396,303.15,100.0,Liquid,2,CCN(CC)CCO,Solvent,0.5002,,O,Solvent,0.4998,,-2.504,,10.1016/j.jct.2015.04.030


# 2) Estimating Data Sets

### Loading data set and FF parameters

In [7]:
from openff.evaluator.forcefield import SmirnoffForceFieldSource

# load data
data_set_path = Path('filtered_dataset_hmix.json')
data_set = PhysicalPropertyDataSet.from_json(data_set_path)

# load FF
ff_path = "openff-1.0.0.offxml"
force_field_source = SmirnoffForceFieldSource.from_path(ff_path)


In [8]:
# from openff.evaluator.protocols.forcefield import BuildSmirnoffSystem

# opc3="forcefields/opc3.offxml"
# sage="openff-2.0.0.offxml"

# assign_parameters=BuildSmirnoffSystem(f"assign_parameters")
# assign_parameters.water_model=opc3
# assign_parameters.force_field_path=sage
# force_field_source=SmirnoffForceFieldSource.from_path(assign_parameters)

### Defining Calculation Schemas

In [9]:
from openff.evaluator.properties import Density, EnthalpyOfMixing
from openff.evaluator.client import RequestOptions

# density_schema = Density.default_simulation_schema(n_molecules=256)
h_mix_schema = EnthalpyOfMixing.default_simulation_schema(n_molecules=256)

# Create an options object which defines how the data set should be estimated.
estimation_options = RequestOptions()

# Specify that we only wish to use molecular simulation to estimate the data set.
estimation_options.calculation_layers = ["SimulationLayer"]

# Add our custom schemas, specifying that the should be used by the 'SimulationLayer'
# estimation_options.add_schema("SimulationLayer", "Density", density_schema)
estimation_options.add_schema("SimulationLayer", "EnthalpyOfMixing", h_mix_schema)

## Launching a Server and Client

In [10]:
from openff.evaluator.backends import ComputeResources
from openff.evaluator.backends.dask import DaskLocalCluster
from openff.evaluator.server import EvaluatorServer
from openff.evaluator.client import EvaluatorClient
from openff.evaluator.client import ConnectionOptions

# define client to submit queries
port = 8118
evaluator_client = EvaluatorClient(ConnectionOptions(server_port=port))

# define available / preferred resources
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
resources = ComputeResources(
    number_of_threads=1,
    number_of_gpus=1,
    preferred_gpu_toolkit=ComputeResources.GPUToolkit.CUDA,
)

with DaskLocalCluster(number_of_workers=1, resources_per_worker=resources) as calculation_backend:
    # spin up server
    evaluator_server = EvaluatorServer(calculation_backend=calculation_backend, delete_working_files=False, port=port)
    evaluator_server.start(asynchronous=True)

    # estimate data set by submitting calculation schemas to newly-created server
    request, exception = evaluator_client.request_estimate(
        property_set=data_set,
        force_field_source=force_field_source,
        options=estimation_options,
    )

    # Wait for the results.
    results, exception = request.results(synchronous=True, polling_interval=30)
    assert exception is None



In [11]:
print(len(results.queued_properties))
print(len(results.estimated_properties))

print(len(results.unsuccessful_properties))
print(len(results.exceptions))

0
57
0
0


In [12]:
results.estimated_properties.json("estimated_dataset_hmix.json", format=True)

'{\n  "@type": "openff.evaluator.datasets.datasets.PhysicalPropertyDataSet",\n  "properties": [\n    {\n      "@type": "openff.evaluator.properties.enthalpy.EnthalpyOfMixing",\n      "gradients": [],\n      "id": "6391",\n      "phase": 2,\n      "source": {\n        "@type": "openff.evaluator.datasets.provenance.CalculationSource",\n        "fidelity": "SimulationLayer",\n        "provenance": "{\\"protocol_schemas\\": [{\\"id\\": \\"6391|build_coordinates_mixture\\", \\"type\\": \\"BuildCoordinatesPackmol\\", \\"inputs\\": {\\".allow_merging\\": true, \\".max_molecules\\": 256, \\".count_exact_amount\\": true, \\".mass_density\\": {\\"value\\": 0.95, \\"unit\\": \\"gram / milliliter\\", \\"@type\\": \\"openff.evaluator.unit.Quantity\\"}, \\".box_aspect_ratio\\": [1.0, 1.0, 1.0], \\".substance\\": {\\"components\\": [{\\"smiles\\": \\"CN(C)CCO\\", \\"role\\": {\\"value\\": \\"solv\\", \\"@type\\": \\"openff.evaluator.substances.components.Component.Role\\"}, \\"@type\\": \\"openff.eva

## 3) Analysing Data Sets

### Loading the Data Sets

In [13]:
experimental_data_set_path = "filtered_dataset_hmix.json"
estimated_data_set_path = "estimated_dataset_hmix.json"

experimental_data_set = PhysicalPropertyDataSet.from_json(experimental_data_set_path)
estimated_data_set = PhysicalPropertyDataSet.from_json(estimated_data_set_path)

In [14]:
experimental_data_set.to_pandas().head()

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,Component 2,Role 2,Mole Fraction 2,Exact Amount 2,EnthalpyOfMixing Value (kJ / mol),EnthalpyOfMixing Uncertainty (kJ / mol),Source
0,6391,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.2052,,O,Solvent,0.7948,,-2.587,,10.1016/j.jct.2007.03.010
1,6392,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.5365,,O,Solvent,0.4635,,-2.575,,10.1016/j.jct.2007.03.010
2,6393,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.7996,,O,Solvent,0.2004,,-1.247,,10.1016/j.jct.2007.03.010
3,6395,303.15,100.0,Liquid,2,CCN(CC)CCO,Solvent,0.2008,,O,Solvent,0.7992,,-2.185,,10.1016/j.jct.2015.04.030
4,6396,303.15,100.0,Liquid,2,CCN(CC)CCO,Solvent,0.5002,,O,Solvent,0.4998,,-2.504,,10.1016/j.jct.2015.04.030


In [15]:
estimated_data_set.to_pandas().head()

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,Component 2,Role 2,Mole Fraction 2,Exact Amount 2,EnthalpyOfMixing Value (kJ / mol),EnthalpyOfMixing Uncertainty (kJ / mol),Source
0,6391,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.2052,,O,Solvent,0.7948,,-1.143498,0.089307,SimulationLayer
1,6392,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.5365,,O,Solvent,0.4635,,-1.965739,0.083544,SimulationLayer
2,6393,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.7996,,O,Solvent,0.2004,,-0.764064,0.094474,SimulationLayer
3,6395,303.15,100.0,Liquid,2,CCN(CC)CCO,Solvent,0.2008,,O,Solvent,0.7992,,-0.961122,0.057662,SimulationLayer
4,6396,303.15,100.0,Liquid,2,CCN(CC)CCO,Solvent,0.5002,,O,Solvent,0.4998,,-1.187426,0.080617,SimulationLayer
