# Chemprop scikit-learn Estimator Example
Demonstrating usage of modules in chemprop.sklearn_integration.chemprop_estimator with common scikit-learn workflows including cross-validation and hyperparameter tuning.

In [2]:
from chemprop.sklearn_integration import ChempropMoleculeTransformer, ChempropReactionTransformer, ChempropMulticomponentTransformer, ChempropRegressor, ChempropEnsembleRegressor
import numpy as np

# Sample data
X_mol = np.array([
    "CCO", "CCN", "CCC", "COC", "CNC", "CCCl", "CCBr", "CCF", "CCI", "CC=O",
    "CC#N", "CC(C)O", "CC(C)N", "CC(C)C", "COC(C)", "CN(C)C", "C1CCCCC1", "C1=CC=CC=C1",
    "CC(C)(C)O", "CC(C)(C)N", "COCCO", "CCOC(=O)C", "CCN(CC)CC", "CN1CCCC1", "C(CO)N"
])
X_rxn = np.array([
    "CCO>>CC=O","CCBr.CN>>CCN","CC(=O)O.CCO>>CC(=O)OCC","C=CC=C.C=C>>c1ccccc1","CC(=O)Cl.O>>CC(=O)O",
    "c1ccccc1.BrBr>>c1ccc(Br)cc1","BrCCBr>>BrC#CBr","ClCCCl.O>>ClCCO","CO.O=C=O>>COC(=O)O","CC(C)(C)Br.CN>>CC(C)(C)CN",
    "C=O.CC>>CCO","CC=O.CC=O>>CC(O)CC=O","CCBr.C#N>>CC#N","O=CC=O.CC>>O=CC(C)O","CC(=O)OCC.CO>>CC(=O)OCO",
    "c1ccccc1O.ClCl>>c1ccc(Cl)cc1O","CCBr.O>>CCO","C=C.C=C>>C1=CCC=CC1","CC(=O)O.O>>CC(=O)OO"
])
y = np.array([
    0.50, 0.60, 0.55, 0.58, 0.52, 0.62, 0.65, 0.57, 0.59, 0.61,
    0.56, 0.60, 0.54, 0.53, 0.62, 0.63, 0.45, 0.40,
    0.64, 0.66, 0.59, 0.51, 0.48, 0.46, 0.49
])

## Building a pipeline
Pipeline ChempropMoleculeTransformer/ChempropReactionTransformer/ ChempropMulticomponentTransformer and ChempropRegressor together to obtain an sklearn module that encapulates full chemprop capabilities.

In [4]:
from sklearn.pipeline import Pipeline

mol_pipeline = Pipeline([
    ("featurizer", ChempropMoleculeTransformer(keep_h=True, add_h=True, ignore_stereo=True, reorder_atoms=True)),
    ("regressor", ChempropRegressor(batch_size=8, message_hidden_dim=100, depth=5, ffn_num_layers=2, epochs=5, patience=5))
])
mol_pipeline.fit(X_mol, y)
y_pred = mol_pipeline.predict(X_mol[:5])
print(f"Predictions: {y_pred}")

rxn_pipeline = Pipeline([
    ("featurizer", ChempropReactionTransformer()),
    ("regressor", ChempropEnsembleRegressor(ensemble_size=3, epochs=5)) #Enables training an ensemble of chemprop regressors and predicting based on their average
])
rxn_pipeline.fit(X_rxn, y)
scores = rxn_pipeline.score(X_rxn[:5],y[:5], metric="mse") # suppot metrics in ["mae", "rmse", "mse", "r2", "accuracy"] and defaulted to "rmse"
print(f"MSE: {scores}")

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (4) is smaller 

Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 44.63it/s, v_num=431, train_loss_step=1.920, train_loss_epoch=0.996]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 34.53it/s, v_num=431, train_loss_step=1.920, train_loss_epoch=0.996]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 90.81it/s]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.



Predictions: [0.5569765  0.5569186  0.5568565  0.55700105 0.55692434]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name    

Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 17.82it/s, v_num=433, train_loss_step=0.986, train_loss_epoch=0.986]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 11.90it/s, v_num=433, train_loss_step=0.986, train_loss_epoch=0.986]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name    

Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 25.30it/s, v_num=434, train_loss_step=0.981, train_loss_epoch=0.981]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 16.03it/s, v_num=434, train_loss_step=0.981, train_loss_epoch=0.981]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name    

Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 32.28it/s, v_num=435, train_loss_step=0.984, train_loss_epoch=0.984]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 18.76it/s, v_num=435, train_loss_step=0.984, train_loss_epoch=0.984]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 95.82it/s] 


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 85.49it/s] 

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 103.13it/s]
MSE: 0.04064236407230231


Alternatively, pass in a path to a csv data file instead of X/y arrays, and include the roles of the columns in arguments of the transformer.

In [5]:
from pathlib import Path
multicomponent_pipeline = Pipeline([
    ("featurizer", ChempropMulticomponentTransformer(
                    smiles_cols="solvent_smiles",
                    rxn_cols="rxn_smiles",
                    target_cols="target",
                )),
    ("regressor", ChempropRegressor(epochs=5))
])

multicomponent_pipeline.fit(X=Path("../../tests/data/regression/rxn+mol/rxn+mol.csv")) #substitute with target datapath
score = multicomponent_pipeline.score(X=Path("../../tests/data/regression/rxn+mol/rxn+mol.csv"))
print(f"RMSE: {score}")

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (7) is smaller 

Epoch 4: 100%|██████████| 7/7 [00:00<00:00,  8.56it/s, v_num=439, train_loss_step=0.884, train_loss_epoch=0.920]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 7/7 [00:00<00:00,  8.19it/s, v_num=439, train_loss_step=0.884, train_loss_epoch=0.920]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 7/7 [00:00<00:00, 18.23it/s]
RMSE: 2.8127583706642625


## Applying the Sklearn Toolbox
You can easily validate the pipeline on your dataset with the sklearn cross_val_score function. However, note that sklearn helpers does not recognize path input, so make sure that the data is inputed as matrix-like X/y.

In [None]:
from sklearn.model_selection import cross_val_score
import pandas as pd

multicomponent_pipeline = Pipeline([
    # component_types have to be specified for each input column in order for multicomponent transformer, if input is matrix-like
    ("featurizer", ChempropMulticomponentTransformer(component_types=["reaction", "molecule"])),
    ("regressor", ChempropRegressor(epochs=5))
])

df = pd.read_csv("../../tests/data/regression/rxn+mol/rxn+mol.csv")
X = df[["rxn_smiles","solvent_smiles"]].to_numpy(dtype=str)
y = df["target"].to_numpy(dtype=float)

scores = cross_val_score(multicomponent_pipeline, X, y, cv=5, scoring='neg_mean_squared_error')
print("Cross-validation MSE scores:", -scores)
print("Average MSE:", -scores.mean())

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  8.13it/s, v_num=441, train_loss_step=0.690, train_loss_epoch=0.966]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  7.60it/s, v_num=441, train_loss_step=0.690, train_loss_epoch=0.966]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 33.13it/s]






Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  9.32it/s, v_num=443, train_loss_step=1.900, train_loss_epoch=0.954]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  8.45it/s, v_num=443, train_loss_step=1.900, train_loss_epoch=0.954]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 34.81it/s]






Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  9.59it/s, v_num=445, train_loss_step=1.270, train_loss_epoch=0.982]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  8.84it/s, v_num=445, train_loss_step=1.270, train_loss_epoch=0.982]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 30.33it/s]






Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  7.80it/s, v_num=447, train_loss_step=1.410, train_loss_epoch=0.962]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  7.22it/s, v_num=447, train_loss_step=1.410, train_loss_epoch=0.962]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False





TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 33.13it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  6.34it/s, v_num=449, train_loss_step=0.483, train_loss_epoch=0.967]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  5.94it/s, v_num=449, train_loss_step=0.483, train_loss_epoch=0.967]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 22.36it/s]
Cross-validation MSE scores: [ 7.11410377  4.81397046 15.76161768 10.87173608  4.45902203]
Average MSE: 8.604090003607292


You can also tune the hyperparameters of the chemprop pipeline with the sklearn GridSearchCV function.

In [8]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
param_grid = {
    'regressor__dropout': [0,0.2],
    'regressor__depth': [3, 6],
    'regressor__ffn_hidden_dim': [300, 1000, 1700, 2400],
    'regressor__ffn_num_layers': [1,2]
}

grid = GridSearchCV(multicomponent_pipeline, param_grid, cv=3, scoring='neg_mean_squared_error')

#Alternatively use RandomizedSearchCV to test a number of randomly sampled parametered values from the grid to prevent exploding runtime.
grid = RandomizedSearchCV(multicomponent_pipeline, param_grid, n_iter=5, cv=3, scoring='neg_mean_squared_error')
grid.fit(X, y)

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00, 11.40it/s, v_num=451, train_loss_step=0.561, train_loss_epoch=0.980]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00, 10.27it/s, v_num=451, train_loss_step=0.561, train_loss_epoch=0.980]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 22.59it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  8.76it/s, v_num=453, train_loss_step=2.070, train_loss_epoch=0.980]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  8.22it/s, v_num=453, train_loss_step=2.070, train_loss_epoch=0.980]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs





c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 18.86it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  9.75it/s, v_num=455, train_loss_step=0.541, train_loss_epoch=0.963]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  9.05it/s, v_num=455, train_loss_step=0.541, train_loss_epoch=0.963]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False





TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 29.84it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00, 10.39it/s, v_num=457, train_loss_step=0.411, train_loss_epoch=0.980]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  9.46it/s, v_num=457, train_loss_step=0.411, train_loss_epoch=0.980]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 25.31it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  8.83it/s, v_num=459, train_loss_step=0.225, train_loss_epoch=0.980]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  8.18it/s, v_num=459, train_loss_step=0.225, train_loss_epoch=0.980]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 22.66it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00, 10.12it/s, v_num=461, train_loss_step=0.892, train_loss_epoch=0.947]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  9.43it/s, v_num=461, train_loss_step=0.892, train_loss_epoch=0.947]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs





c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 29.76it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  8.18it/s, v_num=463, train_loss_step=0.451, train_loss_epoch=0.983]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  7.59it/s, v_num=463, train_loss_step=0.451, train_loss_epoch=0.983]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False





TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 26.41it/s]






Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  9.91it/s, v_num=465, train_loss_step=0.281, train_loss_epoch=0.982]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  9.20it/s, v_num=465, train_loss_step=0.281, train_loss_epoch=0.982]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores





HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 23.44it/s]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name    

Epoch 4: 100%|██████████| 5/5 [00:00<00:00, 10.21it/s, v_num=467, train_loss_step=0.299, train_loss_epoch=0.965]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  9.33it/s, v_num=467, train_loss_step=0.299, train_loss_epoch=0.965]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 29.47it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00, 10.48it/s, v_num=469, train_loss_step=0.432, train_loss_epoch=0.975]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  9.71it/s, v_num=469, train_loss_step=0.432, train_loss_epoch=0.975]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs





c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 21.79it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00, 10.14it/s, v_num=471, train_loss_step=1.190, train_loss_epoch=0.987]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  9.24it/s, v_num=471, train_loss_step=1.190, train_loss_epoch=0.987]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False





TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 17.43it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  7.53it/s, v_num=473, train_loss_step=0.265, train_loss_epoch=0.966]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:00<00:00,  6.93it/s, v_num=473, train_loss_step=0.265, train_loss_epoch=0.966]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 25.81it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.46it/s, v_num=475, train_loss_step=1.130, train_loss_epoch=0.984]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.25it/s, v_num=475, train_loss_step=1.130, train_loss_epoch=0.984]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00,  8.83it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.62it/s, v_num=477, train_loss_step=0.208, train_loss_epoch=0.986]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.38it/s, v_num=477, train_loss_step=0.208, train_loss_epoch=0.986]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00,  9.19it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (5) is smaller 

Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.26it/s, v_num=479, train_loss_step=1.170, train_loss_epoch=0.968]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 5/5 [00:01<00:00,  3.06it/s, v_num=479, train_loss_step=1.170, train_loss_epoch=0.968]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00,  9.12it/s]


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
Loading `train_dataloader` to estimate number of stepping batches.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\jxl05\miniconda3\envs\chemprop1\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (7) is smaller 

Epoch 4: 100%|██████████| 7/7 [00:00<00:00,  9.91it/s, v_num=481, train_loss_step=0.855, train_loss_epoch=0.962]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 7/7 [00:00<00:00,  9.13it/s, v_num=481, train_loss_step=0.855, train_loss_epoch=0.962]


0,1,2
,estimator,Pipeline(step...r(epochs=5))])
,param_distributions,"{'regressor__depth': [3, 6], 'regressor__dropout': [0, 0.2], 'regressor__ffn_hidden_dim': [300, 1000, ...], 'regressor__ffn_num_layers': [1, 2]}"
,n_iter,5
,scoring,'neg_mean_squared_error'
,n_jobs,
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,random_state,

0,1,2
,component_types,"['reaction', 'molecule']"
,keep_h,False
,add_h,False
,ignore_stereo,False
,reorder_atoms,False
,smiles_cols,
,rxn_cols,
,target_cols,
,ignore_cols,
,weight_col,

0,1,2
,num_workers,0
,batch_size,64
,output_dir,WindowsPath('...-29T23-02-16')
,checkpoint,
,molecule_featurizers,
,no_descriptor_scaling,False
,message_hidden_dim,300
,depth,6
,dropout,0
,aggregation,'norm'


In [9]:
print("Best parameters:", grid.best_params_)
print("Best score (MSE):", -grid.best_score_)

Best parameters: {'regressor__depth': 6, 'regressor__dropout': 0, 'regressor__ffn_hidden_dim': 1700, 'regressor__ffn_num_layers': 1}
Best score (MSE): 8.116683249601653


## Saving model(s) & Reloading from checkpoint
Save a model as best.pt with the _save_model method of the regressor object.

In [None]:
best_regressor = grid.best_estimator_["regressor"]
best_regressor.save_model("checkpoints") # make sure to point to an existing directory

0,1,2
,num_workers,0
,batch_size,64
,output_dir,
,checkpoint,
,molecule_featurizers,
,no_descriptor_scaling,False
,message_hidden_dim,300
,depth,3
,dropout,0.2
,aggregation,'norm'


Load a model by specifying the checkpoint argument of the regressor with a list of paths to .pt/.ckpt files.

In [7]:
model_path = Path("../../tests/data/example_model_v2_regression_rxn+mol.pt")
loaded_regressor = ChempropRegressor(checkpoint = [model_path])

#load multiple models with the ensemble regressor
loaded_ensemble_regressor = ChempropRegressor(checkpoint = [model_path, model_path])

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
