# Chemprop scikit-learn Estimator Example
Demonstrating usage of modules in chemprop.sklearn_integration.chemprop_estimator with common scikit-learn workflows including cross-validation and hyperparameter tuning.

In [1]:
from chemprop.sklearn_integration import ChempropMoleculeTransformer, ChempropReactionTransformer, ChempropMulticomponentTransformer, ChempropRegressor, ChempropEnsembleRegressor
import numpy as np

# Sample data
X_mol = np.array([
    "CCO", "CCN", "CCC", "COC", "CNC", "CCCl", "CCBr", "CCF", "CCI", "CC=O",
    "CC#N", "CC(C)O", "CC(C)N", "CC(C)C", "COC(C)", "CN(C)C", "C1CCCCC1", "C1=CC=CC=C1",
    "CC(C)(C)O", "CC(C)(C)N", "COCCO", "CCOC(=O)C", "CCN(CC)CC", "CN1CCCC1", "C(CO)N"
])
X_rxn = np.array([
    "CCO>>CC=O","CCBr.CN>>CCN","CC(=O)O.CCO>>CC(=O)OCC","C=CC=C.C=C>>c1ccccc1","CC(=O)Cl.O>>CC(=O)O",
    "c1ccccc1.BrBr>>c1ccc(Br)cc1","BrCCBr>>BrC#CBr","ClCCCl.O>>ClCCO","CO.O=C=O>>COC(=O)O","CC(C)(C)Br.CN>>CC(C)(C)CN",
    "C=O.CC>>CCO","CC=O.CC=O>>CC(O)CC=O","CCBr.C#N>>CC#N","O=CC=O.CC>>O=CC(C)O","CC(=O)OCC.CO>>CC(=O)OCO",
    "c1ccccc1O.ClCl>>c1ccc(Cl)cc1O","CCBr.O>>CCO","C=C.C=C>>C1=CCC=CC1","CC(=O)O.O>>CC(=O)OO"
])
y = np.array([
    0.50, 0.60, 0.55, 0.58, 0.52, 0.62, 0.65, 0.57, 0.59, 0.61,
    0.56, 0.60, 0.54, 0.53, 0.62, 0.63, 0.45, 0.40,
    0.64, 0.66, 0.59, 0.51, 0.48, 0.46, 0.49
])

## Building a pipeline
Pipeline ChempropMoleculeTransformer/ChempropReactionTransformer/ ChempropMulticomponentTransformer and ChempropRegressor together to obtain an sklearn module that encapulates full chemprop capabilities.

In [2]:
from sklearn.pipeline import Pipeline

mol_pipeline = Pipeline([
    ("featurizer", ChempropMoleculeTransformer(keep_h=True, add_h=True, ignore_stereo=True, reorder_atoms=True)),
    ("regressor", ChempropRegressor(batch_size=8, message_hidden_dim=100, depth=5, ffn_num_layers=2, epochs=5, patience=5))
])
mol_pipeline.fit(X_mol, y)
y_pred = mol_pipeline.predict(X_mol[:5])
print(f"Predictions: {y_pred}")

rxn_pipeline = Pipeline([
    ("featurizer", ChempropReactionTransformer()),
    ("regressor", ChempropEnsembleRegressor(ensemble_size=3, epochs=5)) # Enables training an ensemble of chemprop regressors and predicting based on their average
])
rxn_pipeline.fit(X_rxn, y)
scores = rxn_pipeline.score(X_rxn[:5],y[:5], metric="mse") # suppot metrics in ["mae", "rmse", "mse", "r2", "accuracy"] and defaulted to "rmse"
print(f"MSE: {scores}")

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (4) is smaller 

Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 42.16it/s, v_num=562, train_loss_step=0.0184, train_loss_epoch=0.996]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 33.69it/s, v_num=562, train_loss_step=0.0184, train_loss_epoch=0.996]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 772.15it/s]
Predictions: [[0.55926204]
 [0.55918133]
 [0.5590431 ]
 [0.559295  ]
 [0.5591987 ]]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (1) is smaller 

Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 17.72it/s, v_num=564, train_loss_step=0.980, train_loss_epoch=0.980]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 11.01it/s, v_num=564, train_loss_step=0.980, train_loss_epoch=0.980]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (1) is smaller 

Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 15.18it/s, v_num=565, train_loss_step=0.976, train_loss_epoch=0.976]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 10.00it/s, v_num=565, train_loss_step=0.976, train_loss_epoch=0.976]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name    

Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 15.09it/s, v_num=566, train_loss_step=0.982, train_loss_epoch=0.982]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 10.08it/s, v_num=566, train_loss_step=0.982, train_loss_epoch=0.982]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 92.01it/s] 

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 108.77it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 141.05it/s]
MSE: [0.04094457458187538]


Alternatively, pass in a path to a csv data file instead of X/y arrays, and include the roles of the columns in arguments of the transformer.

In [3]:
from pathlib import Path
multicomponent_pipeline = Pipeline([
    ("featurizer", ChempropMulticomponentTransformer(
                    smiles_cols=["solvent_smiles"],
                    rxn_cols=["rxn_smiles"],
                    target_cols=["target"],
                )),
    ("regressor", ChempropRegressor(epochs=5))
])

multicomponent_pipeline.fit(X=Path("../../tests/data/regression/rxn+mol/rxn+mol.csv")) #substitute with target datapath
score = multicomponent_pipeline.score(X=Path("../../tests/data/regression/rxn+mol/rxn+mol.csv"))
print(f"RMSE: {score}")

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (7) is smaller 

Epoch 4: 100%|██████████| 7/7 [00:01<00:00,  6.56it/s, v_num=570, train_loss_step=0.473, train_loss_epoch=0.962]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 7/7 [00:01<00:00,  6.23it/s, v_num=570, train_loss_step=0.473, train_loss_epoch=0.962]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 7/7 [00:00<00:00, 12.31it/s]
RMSE: [2.886401268439276]


## Applying the Sklearn Toolbox
You can easily validate the pipeline on your dataset with the sklearn cross_val_score function. However, note that sklearn helpers do not recognize path input, so make sure that the data is inputed as matrix-like X/y.

In [4]:
from sklearn.model_selection import cross_val_score
import pandas as pd

multicomponent_pipeline = Pipeline([
    # component_types have to be specified for each input column in order for multicomponent transformer, if input is matrix-like
    ("featurizer", ChempropMulticomponentTransformer(component_types=["reaction", "molecule"])),
    ("regressor", ChempropRegressor(epochs=5))
])

df = pd.read_csv("../../tests/data/regression/rxn+mol/rxn+mol.csv")
X = df[["rxn_smiles","solvent_smiles"]].to_numpy(dtype=str)
y = df["target"].to_numpy(dtype=float)

scores = cross_val_score(multicomponent_pipeline, X, y, cv=5, scoring='neg_mean_squared_error')
print("Cross-validation MSE scores:", -scores)
print("Average MSE:", -scores.mean())

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  6.91it/s, v_num=572, train_loss_step=0.463, train_loss_epoch=0.977]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  6.25it/s, v_num=572, train_loss_step=0.463, train_loss_epoch=0.977]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 24.15it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  5.98it/s, v_num=574, train_loss_step=0.959, train_loss_epoch=0.964]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  5.50it/s, v_num=574, train_loss_step=0.959, train_loss_epoch=0.964]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 22.20it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  5.81it/s, v_num=576, train_loss_step=0.599, train_loss_epoch=0.971]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  5.37it/s, v_num=576, train_loss_step=0.599, train_loss_epoch=0.971]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 19.43it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.94it/s, v_num=578, train_loss_step=0.553, train_loss_epoch=0.937]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.58it/s, v_num=578, train_loss_step=0.553, train_loss_epoch=0.937]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 21.37it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  6.55it/s, v_num=580, train_loss_step=1.070, train_loss_epoch=0.962]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  6.02it/s, v_num=580, train_loss_step=1.070, train_loss_epoch=0.962]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 2/2 [00:00<00:00,  8.70it/s]
Cross-validation MSE scores: [ 7.01979769  4.73737702 15.56364967 10.87271144  4.46612433]
Average MSE: 8.531932029060028


You can also tune the hyperparameters of the chemprop pipeline with the sklearn GridSearchCV function.

In [5]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
param_grid = {
    'regressor__dropout': [0,0.2],
    'regressor__depth': [3, 6],
    'regressor__ffn_hidden_dim': [300, 1000, 1700, 2400],
    'regressor__ffn_num_layers': [1,2]
}

grid = GridSearchCV(multicomponent_pipeline, param_grid, cv=3, scoring='neg_mean_squared_error')

#Alternatively use RandomizedSearchCV to test a number of randomly sampled parametered values from the grid to prevent exploding runtime.
grid = RandomizedSearchCV(multicomponent_pipeline, param_grid, n_iter=5, cv=3, scoring='neg_mean_squared_error')
grid.fit(X, y)

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.21it/s, v_num=582, train_loss_step=0.697, train_loss_epoch=0.953]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.08it/s, v_num=582, train_loss_step=0.697, train_loss_epoch=0.953]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 12.27it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  4.35it/s, v_num=584, train_loss_step=0.378, train_loss_epoch=0.990]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  4.19it/s, v_num=584, train_loss_step=0.378, train_loss_epoch=0.990]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 12.89it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.86it/s, v_num=586, train_loss_step=0.757, train_loss_epoch=0.959]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.67it/s, v_num=586, train_loss_step=0.757, train_loss_epoch=0.959]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 13.49it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.24it/s, v_num=588, train_loss_step=0.301, train_loss_epoch=0.977]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:02<00:00,  2.46it/s, v_num=588, train_loss_step=0.301, train_loss_epoch=0.977]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 12.11it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  2.71it/s, v_num=590, train_loss_step=0.332, train_loss_epoch=0.990]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s, v_num=590, train_loss_step=0.332, train_loss_epoch=0.990]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 10.75it/s]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name    

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  2.88it/s, v_num=592, train_loss_step=0.570, train_loss_epoch=0.971]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:02<00:00,  2.26it/s, v_num=592, train_loss_step=0.570, train_loss_epoch=0.971]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False





TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 11.76it/s]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name    

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  4.14it/s, v_num=594, train_loss_step=0.856, train_loss_epoch=0.941]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.78it/s, v_num=594, train_loss_step=0.856, train_loss_epoch=0.941]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00,  9.36it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  4.35it/s, v_num=596, train_loss_step=0.225, train_loss_epoch=0.983]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.90it/s, v_num=596, train_loss_step=0.225, train_loss_epoch=0.983]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 12.91it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.77it/s, v_num=598, train_loss_step=1.220, train_loss_epoch=0.942]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.42it/s, v_num=598, train_loss_step=1.220, train_loss_epoch=0.942]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 14.05it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.99it/s, v_num=600, train_loss_step=1.140, train_loss_epoch=0.955]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.71it/s, v_num=600, train_loss_step=1.140, train_loss_epoch=0.955]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 11.63it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  4.01it/s, v_num=602, train_loss_step=1.020, train_loss_epoch=0.976]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.75it/s, v_num=602, train_loss_step=1.020, train_loss_epoch=0.976]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 12.09it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  4.11it/s, v_num=604, train_loss_step=0.515, train_loss_epoch=0.952]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.84it/s, v_num=604, train_loss_step=0.515, train_loss_epoch=0.952]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 14.52it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  6.70it/s, v_num=606, train_loss_step=1.690, train_loss_epoch=0.939]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  5.69it/s, v_num=606, train_loss_step=1.690, train_loss_epoch=0.939]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 19.43it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  7.12it/s, v_num=608, train_loss_step=1.800, train_loss_epoch=0.961]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  6.08it/s, v_num=608, train_loss_step=1.800, train_loss_epoch=0.961]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 19.26it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  5.58it/s, v_num=610, train_loss_step=0.307, train_loss_epoch=0.956]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  4.71it/s, v_num=610, train_loss_step=0.307, train_loss_epoch=0.956]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 20.52it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (7) is smaller 

Epoch 4: 100%|██████████| 7/7 [00:01<00:00,  6.39it/s, v_num=612, train_loss_step=0.462, train_loss_epoch=0.901]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 7/7 [00:01<00:00,  5.53it/s, v_num=612, train_loss_step=0.462, train_loss_epoch=0.901]


0,1,2
,estimator,Pipeline(step...r(epochs=5))])
,param_distributions,"{'regressor__depth': [3, 6], 'regressor__dropout': [0, 0.2], 'regressor__ffn_hidden_dim': [300, 1000, ...], 'regressor__ffn_num_layers': [1, 2]}"
,n_iter,5
,scoring,'neg_mean_squared_error'
,n_jobs,
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,random_state,

0,1,2
,component_types,"['reaction', 'molecule']"
,keep_h,False
,add_h,False
,ignore_stereo,False
,reorder_atoms,False
,smiles_cols,
,rxn_cols,
,target_cols,
,ignore_cols,
,weight_col,

0,1,2
,num_workers,0
,batch_size,64
,output_dir,WindowsPath('...-01T20-58-46')
,checkpoint,
,molecule_featurizers,
,no_descriptor_scaling,False
,message_hidden_dim,300
,depth,3
,dropout,0
,aggregation,'norm'


In [6]:
print("Best parameters:", grid.best_params_)
print("Best score (MSE):", -grid.best_score_)

Best parameters: {'regressor__ffn_num_layers': 1, 'regressor__ffn_hidden_dim': 2400, 'regressor__dropout': 0, 'regressor__depth': 3}
Best score (MSE): 8.469136605212476


## Saving model(s) & Reloading from checkpoint
Save a model as best.pt with the _save_model method of the regressor object.

In [7]:
best_regressor = grid.best_estimator_["regressor"]
best_regressor.save_model("checkpoints") # make sure to point to an existing directory

Load a model by specifying the checkpoint argument of the regressor with a list of paths to .pt/.ckpt files.

In [8]:
model_path = Path("../../tests/data/example_model_v2_regression_rxn+mol.pt")
loaded_regressor = ChempropRegressor(checkpoint = [model_path])

#load multiple models with the ensemble regressor
loaded_ensemble_regressor = ChempropRegressor(checkpoint = [model_path, model_path])