# Core Imports and Setup

In [1]:
import os
from pathlib import Path

import warnings
warnings.filterwarnings("ignore")

import logging
logging.getLogger("openff.toolkit").setLevel(logging.ERROR)

from openff import toolkit, evaluator

from openff.units import unit

# 0) Registering Custom ThermoML Properties

In [2]:
from openff.evaluator import properties
from openff.evaluator.datasets.thermoml import thermoml_property
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase

@thermoml_property("Osmotic coefficient", supported_phases=PropertyPhase.Liquid)
class OsmoticCoefficient(PhysicalProperty):
    """A class representation of a osmotic coeff property"""

    @classmethod
    def default_unit(cls):
        return unit.dimensionless
    
...

custom_thermoml_props = [
    OsmoticCoefficient,
]

for custom_prop_cls in custom_thermoml_props:    
    setattr(properties, custom_prop_cls.__name__, custom_prop_cls)

# 1) - Loading ThermoML Data Sets

## Extracting data from ThermoML

In [3]:
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase, PhysicalPropertyDataSet
from openff.evaluator.datasets.thermoml import thermoml_property, ThermoMLDataSet

data_set_initial = PhysicalPropertyDataSet.from_json("training-properties-with-water.json")
data_set_initial.to_pandas()

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,Component 2,Role 2,Mole Fraction 2,Exact Amount 2,EnthalpyOfMixing Value (kJ / mol),EnthalpyOfMixing Uncertainty (kJ / mol),Density Value (g / ml),Density Uncertainty (g / ml),Source
0,6385,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.24520,,O,Solvent,0.75480,,,,0.903811,,10.1016/j.fluid.2010.05.001
1,6386,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.50500,,O,Solvent,0.49500,,,,0.858158,,10.1016/j.fluid.2010.05.001
2,6387,298.15,101.0,Liquid,2,CN1CCCCC1,Solvent,0.79070,,O,Solvent,0.20930,,,,0.826047,,10.1016/j.fluid.2010.05.001
3,6388,298.15,101.3,Liquid,2,CN(C)CCO,Solvent,0.26433,,O,Solvent,0.73567,,,,0.959000,,10.1021/acs.jced.6b00888
4,6389,298.15,101.3,Liquid,2,CN(C)CCO,Solvent,0.50770,,O,Solvent,0.49230,,,,0.923722,,10.1016/j.jct.2004.11.016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,7294,298.15,101.0,Liquid,2,C1COCCN1,Solvent,0.49110,,O,Solvent,0.50890,,-3.131,,,,10.1016/j.jct.2015.06.006
119,7295,298.15,101.0,Liquid,2,C1COCCN1,Solvent,0.71490,,O,Solvent,0.28510,,-1.753,,,,10.1016/j.jct.2015.06.006
120,7344,303.15,100.0,Liquid,2,NCCCNCCCN,Solvent,0.30050,,O,Solvent,0.69950,,-6.588,,,,10.1016/j.jct.2015.04.030
121,7345,303.15,100.0,Liquid,2,NCCCNCCCN,Solvent,0.49980,,O,Solvent,0.50020,,-5.544,,,,10.1016/j.jct.2015.04.030


In [4]:
print(len(data_set_initial))
print(data_set_initial.property_types)
print(data_set_initial.substances)

123
{'EnthalpyOfMixing', 'Density'}
{<Substance CN1CCNCC1{solv}{x=0.209100}|O{solv}{x=0.790900}>, <Substance C1CCNCC1{solv}{x=0.500000}|O{solv}{x=0.500000}>, <Substance OCCNCCO{solv}{x=0.742100}|O{solv}{x=0.257900}>, <Substance C1COCCN1{solv}{x=0.793000}|O{solv}{x=0.207000}>, <Substance CC(C)O{solv}{x=0.504800}|O{solv}{x=0.495200}>, <Substance CN1CCNCC1{solv}{x=0.510900}|O{solv}{x=0.489100}>, <Substance CN1CCCCC1{solv}{x=0.505000}|O{solv}{x=0.495000}>, <Substance OC1=NCCC1{solv}{x=0.231900}|O{solv}{x=0.768100}>, <Substance NCCNCCO{solv}{x=0.070000}|O{solv}{x=0.930000}>, <Substance Cc1cccnc1{solv}{x=0.700000}|O{solv}{x=0.300000}>, <Substance CN1CCNCC1{solv}{x=0.792600}|O{solv}{x=0.207400}>, <Substance NCCNCCO{solv}{x=0.399700}|O{solv}{x=0.600300}>, <Substance OCCNCCO{solv}{x=0.700042}|O{solv}{x=0.299958}>, <Substance C1CCNCC1{solv}{x=0.250000}|O{solv}{x=0.750000}>, <Substance NCCNCCN{solv}{x=0.241600}|O{solv}{x=0.758400}>, <Substance CN(CCO)CCO{solv}{x=0.494100}|O{solv}{x=0.505900}>, <S

## Filtering data set

In [5]:
from openff.evaluator.datasets.curation.components.filtering import FilterByPropertyTypes, FilterByPropertyTypesSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByTemperature, FilterByTemperatureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByPressure, FilterByPressureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterBySmiles, FilterBySmilesSchema

data_set_hmix= FilterByPropertyTypes.apply(
    data_set_initial, FilterByPropertyTypesSchema(property_types=["EnthalpyOfMixing"]))

print(len(data_set_hmix))

57


### Inspecting and saving new properties

In [6]:
# save for future use
data_set_path = Path('filtered_dataset_hmix.json')
data_set_hmix.json(data_set_path, format=True)

# inspect new properties
pandas_data_set = data_set_hmix.to_pandas()
pandas_data_set.head()

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,Component 2,Role 2,Mole Fraction 2,Exact Amount 2,EnthalpyOfMixing Value (kJ / mol),EnthalpyOfMixing Uncertainty (kJ / mol),Source
0,6391,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.2052,,O,Solvent,0.7948,,-2.587,,10.1016/j.jct.2007.03.010
1,6392,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.5365,,O,Solvent,0.4635,,-2.575,,10.1016/j.jct.2007.03.010
2,6393,298.15,101.0,Liquid,2,CN(C)CCO,Solvent,0.7996,,O,Solvent,0.2004,,-1.247,,10.1016/j.jct.2007.03.010
3,6395,303.15,100.0,Liquid,2,CCN(CC)CCO,Solvent,0.2008,,O,Solvent,0.7992,,-2.185,,10.1016/j.jct.2015.04.030
4,6396,303.15,100.0,Liquid,2,CCN(CC)CCO,Solvent,0.5002,,O,Solvent,0.4998,,-2.504,,10.1016/j.jct.2015.04.030


# 2) Estimating Data Sets

### Loading data set and FF parameters

In [7]:
from openff.evaluator.forcefield import SmirnoffForceFieldSource

# load data
data_set_path = Path('filtered_dataset_hmix.json')
data_set = PhysicalPropertyDataSet.from_json(data_set_path)

# load FF
ff_path = "openff-2.0.0.offxml"
force_field_source = SmirnoffForceFieldSource.from_path(ff_path)


In [31]:
from openff.evaluator.protocols.forcefield import BuildSmirnoffSystem

opc3="forcefields/opc3.offxml"
sage="openff-2.0.0.offxml"

assign_parameters=BuildSmirnoffSystem(f"assign_parameters")
assign_parameters.water_model=opc3
assign_parameters.force_field_path=sage
force_field_source=SmirnoffForceFieldSource.from_object(assign_parameters)

AttributeError: 'BuildSmirnoffSystem' object has no attribute 'to_string'

### Defining Calculation Schemas

In [8]:
from openff.evaluator.properties import Density, EnthalpyOfMixing
from openff.evaluator.client import RequestOptions

# density_schema = Density.default_simulation_schema(n_molecules=256)
h_mix_schema = EnthalpyOfMixing.default_simulation_schema(n_molecules=100)

# Create an options object which defines how the data set should be estimated.
estimation_options = RequestOptions()

# Specify that we only wish to use molecular simulation to estimate the data set.
estimation_options.calculation_layers = ["SimulationLayer"]

# Add our custom schemas, specifying that the should be used by the 'SimulationLayer'
# estimation_options.add_schema("SimulationLayer", "Density", density_schema)
estimation_options.add_schema("SimulationLayer", "EnthalpyOfMixing", h_mix_schema)

## Launching a Server and Client

In [9]:
from openff.evaluator.backends import ComputeResources
from openff.evaluator.backends.dask import DaskLocalCluster
from openff.evaluator.server import EvaluatorServer
from openff.evaluator.client import EvaluatorClient
from openff.evaluator.client import ConnectionOptions

# define client to submit queries
port = 8119
evaluator_client = EvaluatorClient(ConnectionOptions(server_port=port))

# define available / preferred resources
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
resources = ComputeResources(
    number_of_threads=1,
    number_of_gpus=1,
    preferred_gpu_toolkit=ComputeResources.GPUToolkit.CUDA,
)

with DaskLocalCluster(number_of_workers=1, resources_per_worker=resources) as calculation_backend:
    # spin up server
    evaluator_server = EvaluatorServer(calculation_backend=calculation_backend, delete_working_files=False, port=port)
    evaluator_server.start(asynchronous=True)

    # estimate data set by submitting calculation schemas to newly-created server
    request, exception = evaluator_client.request_estimate(
        property_set=data_set,
        force_field_source=force_field_source,
        options=estimation_options,
    )

    # Wait for the results.
    results, exception = request.results(synchronous=True, polling_interval=30)
    assert exception is None

Error processing layer results for request 50b27d965818481ba1d619b707823785
Traceback (most recent call last):
  File "/home/bamo6610/miniconda3/envs/evaluator-blanca/lib/python3.10/site-packages/openff/evaluator/layers/layers.py", line 221, in _process_results
    results = list(results_future.result())
  File "/home/bamo6610/miniconda3/envs/evaluator-blanca/lib/python3.10/site-packages/distributed/client.py", line 322, in result
    return self.client.sync(self._result, callback_timeout=timeout)
  File "/home/bamo6610/miniconda3/envs/evaluator-blanca/lib/python3.10/site-packages/distributed/client.py", line 336, in _result
    raise exception
concurrent.futures._base.CancelledError: _wrapped_function-52f58341c1531563e6c556261a739f80


In [None]:
a = results.estimated_properties.json("estimated_dataset_hmix.json", format=True)

## Analysing Data Sets

### Loading the Data Sets

In [None]:
experimental_data_set_path = "filtered_dataset_hmix.json"
estimated_data_set_path = "estimated_dataset_hmix.json"

# If you have not yet completed the previous tutorials or do not have the data set files
# available, this tutorial will use copies provided by the framework

if not (
    Path(experimental_data_set_path).exists()
    and Path(estimated_data_set_path).exists()
):
    from openff.evaluator.utils import get_data_filename

    experimental_data_set_path = get_data_filename(
        "tutorials/tutorial01/filtered_data_set.json"
    )
    estimated_data_set_path = get_data_filename(
        "tutorials/tutorial02/estimated_data_set.json"
    )

experimental_data_set = PhysicalPropertyDataSet.from_json(experimental_data_set_path)
estimated_data_set = PhysicalPropertyDataSet.from_json(estimated_data_set_path)