Skip to content

Commit

Permalink
add notebooks to document modules
Browse files Browse the repository at this point in the history
  • Loading branch information
KnathanM committed Apr 24, 2024
1 parent 22fb3f6 commit a82d17d
Show file tree
Hide file tree
Showing 8 changed files with 2,499 additions and 0 deletions.
230 changes: 230 additions & 0 deletions examples/super_minimal.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from lightning import pytorch as pl\n",
"import numpy as np\n",
"from chemprop import data, featurizers, models, nn"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Step 1: Make datapoints"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"smis = [\"C\", \"CC\", \"CCC\", \"CCCC\", \"CCCCC\"]\n",
"ys = np.random.rand(len(smis), 1)\n",
"datapoints = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Step 2: Make a dataset and dataloader"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"dataset = data.MoleculeDataset(datapoints)\n",
"dataloader = data.build_dataloader(dataset)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Step 3: Define the model"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"chemprop_model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), nn.RegressionFFN())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Step 4: Set up the trainer"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"GPU available: False, used: False\n",
"TPU available: False, using: 0 TPU cores\n",
"IPU available: False, using: 0 IPUs\n",
"HPU available: False, using: 0 HPUs\n"
]
}
],
"source": [
"trainer = pl.Trainer(logger=False, enable_checkpointing=False, max_epochs=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Step 5: Train the model"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.\n",
"Loading `train_dataloader` to estimate number of stepping batches.\n",
"/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n",
"\n",
" | Name | Type | Params\n",
"-------------------------------------------------------\n",
"0 | message_passing | BondMessagePassing | 227 K \n",
"1 | agg | MeanAggregation | 0 \n",
"2 | bn | BatchNorm1d | 600 \n",
"3 | predictor | RegressionFFN | 90.6 K\n",
"4 | X_d_transform | Identity | 0 \n",
"-------------------------------------------------------\n",
"318 K Trainable params\n",
"0 Non-trainable params\n",
"318 K Total params\n",
"1.276 Total estimated model params size (MB)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 1.90it/s, train_loss=0.293]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"`Trainer.fit` stopped: `max_epochs=1` reached.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 1.90it/s, train_loss=0.293]\n"
]
}
],
"source": [
"trainer.fit(chemprop_model, dataloader)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Step 6: Use the model to make predictions"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 5.97it/s]\n"
]
}
],
"source": [
"dataloader = data.build_dataloader(dataset, shuffle=False)\n",
"preds = trainer.predict(chemprop_model, dataloader)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[tensor([[-0.0043],\n",
" [-0.0097],\n",
" [-0.0117],\n",
" [-0.0121],\n",
" [-0.0144]])]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"preds"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "chemprop",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit a82d17d

Please sign in to comment.