# Core Imports and Setup

In [11]:
import os
from pathlib import Path

import warnings
warnings.filterwarnings("ignore")

import logging
logging.getLogger("openff.toolkit").setLevel(logging.ERROR)

from openff import toolkit, evaluator

# 0) Registering Custom ThermoML Properties

In [12]:
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase
from openff.evaluator.datasets.thermoml import thermoml_property
from openff.evaluator import properties

from openff.units import unit

@thermoml_property("Osmotic coefficient", supported_phases=PropertyPhase.Liquid)
class OsmoticCoefficient(PhysicalProperty):
    """A class representation of a osmotic coeff property"""

    @classmethod
    def default_unit(cls):
        return unit.dimensionless
    
setattr(properties, OsmoticCoefficient.__name__, OsmoticCoefficient)

# 1) - Loading ThermoML Data Sets

## Extracting data from ThermoML

In [13]:
from openff.evaluator.datasets import PhysicalProperty, PropertyPhase
from openff.evaluator.datasets.thermoml import thermoml_property
from openff.units import unit
from openff.evaluator.datasets.thermoml import ThermoMLDataSet

data_set = ThermoMLDataSet.from_doi(
        "10.1016/j.jct.2013.08.018",
        "10.1016/j.fluid.2006.09.025",
        "10.1016/j.jct.2009.06.024",
        "10.1016/j.jct.2011.03.012",
        "10.1016/j.jct.2008.12.021",
        "10.1021/je400821q",
        "10.1021/je4008475",
        "10.1016/j.jct.2011.06.018",
        "10.1016/j.jct.2008.05.011",
        "10.1016/j.fluid.2014.02.004",
        "10.1021/je500772z",
        "10.1021/je100720x",
        "10.1016/j.fluid.2007.03.019",
        "10.1016/j.jct.2008.08.011",
        "10.1016/j.jct.2012.01.007",
        "10.1016/j.fluid.2015.08.035",
        "10.1016/j.jct.2008.07.011",
        "10.1016/j.fluid.2009.02.007",
        "10.1016/j.jct.2017.05.006",
        "10.1016/j.jct.2014.11.014",
        "10.1016/j.jct.2016.07.003",
        "10.1021/acs.jced.5b00184",
        "10.1021/je800609u",
        "10.1021/acs.jced.8b00400",
        "10.1016/j.jct.2014.10.008",
        "10.1021/je050348f",
        "10.1016/j.jct.2016.06.034",
        "10.1007/s10765-009-0566-6",
        "10.1016/j.fluid.2004.11.022",
        "10.1021/je800307g",
        "10.1021/je100760v",
        "10.1021/je700751a",
        "10.1016/j.jct.2019.105878",
        "10.1016/j.jct.2013.03.003",
        "10.1016/j.jct.2017.09.012",
        "10.1021/je900457z",
        "10.1021/acs.jced.9b00112",
        "10.1016/j.jct.2014.07.010",
        "10.1021/je500271z"
    )

In [14]:
len(data_set), data_set.property_types

(2524, {'Density', 'OsmoticCoefficient'})

In [15]:
ThermoMLDataSet.registered_properties['Osmotic coefficient'].conversion_function

functools.partial(<function _default_mapping at 0x7f87b01db640>, <class '__main__.OsmoticCoefficient'>)

## Filtering data set

In [16]:
from openff.evaluator.datasets.curation.components.filtering import FilterByPropertyTypes, FilterByPropertyTypesSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByTemperature, FilterByTemperatureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByPressure, FilterByPressureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterBySmiles, FilterBySmilesSchema

# Property
data_set = FilterByPropertyTypes.apply(
    data_set, FilterByPropertyTypesSchema(property_types=["OsmoticCoefficient"])
)

In [17]:
data_set.to_pandas()

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,OsmoticCoefficient Value (),OsmoticCoefficient Uncertainty (),Source
0,3f6d591b65c2487e82e94ce2f17390ed,298.1500,101.0,Liquid,1,O,Solvent,1.0,,0.9100,0.00550,10.1016/j.jct.2013.08.018
1,684fa1a3918744aea34cbff3d01ade8f,298.1500,101.0,Liquid,1,O,Solvent,1.0,,0.9060,0.00550,10.1016/j.jct.2013.08.018
2,84e553880d5b4026b35c8c4e7ddbfd87,298.1500,101.0,Liquid,1,O,Solvent,1.0,,0.8970,0.00550,10.1016/j.jct.2013.08.018
3,7b254dbd0d20414cbef7cb5a1a8250c2,298.1500,101.0,Liquid,1,O,Solvent,1.0,,0.8910,0.00550,10.1016/j.jct.2013.08.018
4,0ac9c8674fd847abbeaa78bbee8bbb52,298.1500,101.0,Liquid,1,O,Solvent,1.0,,0.9490,0.00500,10.1016/j.jct.2013.08.018
...,...,...,...,...,...,...,...,...,...,...,...,...
1913,38b7b750b9b04ffcbb0baf9cbc49a8d8,273.1244,101.0,Liquid,1,O,Solvent,1.0,,0.7321,0.00670,10.1021/acs.jced.9b00112
1914,c6770abdcd7442658f762ed75a51e33b,273.1243,101.0,Liquid,1,O,Solvent,1.0,,0.7308,0.00675,10.1021/acs.jced.9b00112
1915,2288b2e3b9e14bea8962050f17415e93,273.1218,101.0,Liquid,1,O,Solvent,1.0,,0.7229,0.00695,10.1021/acs.jced.9b00112
1916,6c0c68c6b4214a86b70b061b0fcf4cc5,273.1207,101.0,Liquid,1,O,Solvent,1.0,,0.7193,0.00700,10.1021/acs.jced.9b00112


In [18]:
from openff.evaluator.datasets.curation.components.filtering import FilterByPropertyTypes, FilterByPropertyTypesSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByTemperature, FilterByTemperatureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterByPressure, FilterByPressureSchema
from openff.evaluator.datasets.curation.components.filtering import FilterBySmiles, FilterBySmilesSchema

# # Property
# data_set = FilterByPropertyTypes.apply(
#     data_set, FilterByPropertyTypesSchema(property_types=["Density"])
# )

# Temperature
data_set = FilterByTemperature.apply(
    data_set, FilterByTemperatureSchema(minimum_temperature=298.0, maximum_temperature=330.0)
)

# Pressure
data_set = FilterByPressure.apply(
    data_set, FilterByPressureSchema(minimum_pressure=100.0, maximum_pressure=105.426)
)

# # SMILES
# data_set = FilterBySmiles.apply(
#     data_set, FilterBySmilesSchema(smiles_to_include=["[Na+].[Cl-]"])
# )

print(len(data_set))

1594


In [19]:
pandas_data_set = data_set.to_pandas()
pandas_data_set[
    [
        "Temperature (K)",
        "Pressure (kPa)",
        "Component 1",
        "OsmoticCoefficient Value ()",
        "Source",
    ]
].head()

Unnamed: 0,Temperature (K),Pressure (kPa),Component 1,OsmoticCoefficient Value (),Source
0,298.15,101.0,O,0.91,10.1016/j.jct.2013.08.018
1,298.15,101.0,O,0.906,10.1016/j.jct.2013.08.018
2,298.15,101.0,O,0.897,10.1016/j.jct.2013.08.018
3,298.15,101.0,O,0.891,10.1016/j.jct.2013.08.018
4,298.15,101.0,O,0.949,10.1016/j.jct.2013.08.018


In [20]:
from openff.evaluator.substances import Substance

