add notebooks to document modules

chemprop · Apr 24, 2024 · a82d17d · a82d17d
1 parent 22fb3f6
commit a82d17d
Show file tree

Hide file tree

Showing 8 changed files with 2,499 additions and 0 deletions.
diff --git a/examples/super_minimal.ipynb b/examples/super_minimal.ipynb
@@ -0,0 +1,230 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lightning import pytorch as pl\n",
+    "import numpy as np\n",
+    "from chemprop import data, featurizers, models, nn"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Step 1: Make datapoints"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "smis = [\"C\", \"CC\", \"CCC\", \"CCCC\", \"CCCCC\"]\n",
+    "ys = np.random.rand(len(smis), 1)\n",
+    "datapoints = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Step 2: Make a dataset and dataloader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = data.MoleculeDataset(datapoints)\n",
+    "dataloader = data.build_dataloader(dataset)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Step 3: Define the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chemprop_model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), nn.RegressionFFN())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Step 4: Set up the trainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "GPU available: False, used: False\n",
+      "TPU available: False, using: 0 TPU cores\n",
+      "IPU available: False, using: 0 IPUs\n",
+      "HPU available: False, using: 0 HPUs\n"
+     ]
+    }
+   ],
+   "source": [
+    "trainer = pl.Trainer(logger=False, enable_checkpointing=False, max_epochs=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Step 5: Train the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.\n",
+      "Loading `train_dataloader` to estimate number of stepping batches.\n",
+      "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n",
+      "\n",
+      "  | Name            | Type               | Params\n",
+      "-------------------------------------------------------\n",
+      "0 | message_passing | BondMessagePassing | 227 K \n",
+      "1 | agg             | MeanAggregation    | 0     \n",
+      "2 | bn              | BatchNorm1d        | 600   \n",
+      "3 | predictor       | RegressionFFN      | 90.6 K\n",
+      "4 | X_d_transform   | Identity           | 0     \n",
+      "-------------------------------------------------------\n",
+      "318 K     Trainable params\n",
+      "0         Non-trainable params\n",
+      "318 K     Total params\n",
+      "1.276     Total estimated model params size (MB)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  1.90it/s, train_loss=0.293]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "`Trainer.fit` stopped: `max_epochs=1` reached.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  1.90it/s, train_loss=0.293]\n"
+     ]
+    }
+   ],
+   "source": [
+    "trainer.fit(chemprop_model, dataloader)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Step 6: Use the model to make predictions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  5.97it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "dataloader = data.build_dataloader(dataset, shuffle=False)\n",
+    "preds = trainer.predict(chemprop_model, dataloader)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[tensor([[-0.0043],\n",
+       "         [-0.0097],\n",
+       "         [-0.0117],\n",
+       "         [-0.0121],\n",
+       "         [-0.0144]])]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "preds"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "chemprop",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}