In [1]:
!pip install rdkit --pre deepchem torch_geometric triton

Collecting rdkit
  Downloading rdkit-2024.9.6-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.0 kB)
Collecting deepchem
  Downloading deepchem-2.8.1.dev20250415010951-py3-none-any.whl.metadata (2.0 kB)
Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting numpy (from rdkit)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m500.5 kB/s[0m eta [36m0:00:00[0m
Downloading rdkit-2024.9.6-cp311-cp311-manylinux_2_28_x86_64.whl (34.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.3/34.3 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading deepchem-2.8.1.dev20250415010951-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

In [1]:
import deepchem as dc
import numpy as np
import torch
import time
import matplotlib.pyplot as plt
from torch.profiler import profile, record_function, ProfilerActivity
from deepchem.models.torch_models import DMPNN
from deepchem.models.optimizers import Adam
import deepchem.models.losses as losses
import os
import datetime

from torch_geometric.data import Batch
from torch_geometric.data import Data

Instructions for updating:
experimental_relax_shapes is deprecated, use reduce_retracing instead


In [2]:
# Step 1: Load PCBA dataset
print("Loading Tox21 dataset...")
tox21_tasks, tox21_datasets, transformers = dc.molnet.load_tox21(featurizer=dc.feat.DMPNNFeaturizer(), splitter='random')
train_dataset, valid_dataset, test_dataset = tox21_datasets

Loading Tox21 dataset...


[02:08:14] Explicit valence for atom # 8 Al, 6, is greater than permitted
    rdkit.Chem.rdmolfiles.CanonicalRankAtoms(NoneType)
did not match C++ signature:
    CanonicalRankAtoms(RDKit::ROMol mol, bool breakTies=True, bool includeChirality=True, bool includeIsotopes=True, bool includeAtomMaps=True, bool includeChiralPresence=False)
[02:08:15] Explicit valence for atom # 3 Al, 6, is greater than permitted
    rdkit.Chem.rdmolfiles.CanonicalRankAtoms(NoneType)
did not match C++ signature:
    CanonicalRankAtoms(RDKit::ROMol mol, bool breakTies=True, bool includeChirality=True, bool includeIsotopes=True, bool includeAtomMaps=True, bool includeChiralPresence=False)
[02:08:15] Explicit valence for atom # 4 Al, 6, is greater than permitted
    rdkit.Chem.rdmolfiles.CanonicalRankAtoms(NoneType)
did not match C++ signature:
    CanonicalRankAtoms(RDKit::ROMol mol, bool breakTies=True, bool includeChirality=True, bool includeIsotopes=True, bool includeAtomMaps=True, bool includeChiralPresence

In [3]:
def time_torch_function(fn):
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)
    start.record()
    result = fn()
    end.record()
    torch.cuda.synchronize()
    return result, start.elapsed_time(end) / 1000

track_dict = {}
prev_time_dict = {}

def get_time_track_callback(track_dict, track_name, track_interval):
    track_dict[track_name] = []
    prev_time_dict[track_name] = datetime.datetime.now()
    def callback(model, step):
        if step % track_interval == 0:
            elapsed_time = datetime.datetime.now() - prev_time_dict[track_name]
            track_dict[track_name].append(elapsed_time.total_seconds())
            prev_time_dict[track_name] = datetime.datetime.now()
    return callback

In [6]:
# Initialize models with correct number of tasks
model = dc.models.DMPNNModel(n_tasks=len(tox21_tasks))
model_compiled = dc.models.DMPNNModel(n_tasks=len(tox21_tasks))

model_compiled.compile(mode='reduce-overhead')

In [7]:
track_interval = 20
eager_dict_name = "eager_train"
compiled_dict_name = "compiled_train"

eager_train_callback = get_time_track_callback(track_dict, eager_dict_name, track_interval)
model.fit(train_dataset, nb_epoch=10, callbacks=[eager_train_callback])

compiled_train_callback = get_time_track_callback(track_dict, compiled_dict_name, track_interval)
model_compiled.fit(train_dataset, nb_epoch=10, callbacks=[compiled_train_callback])

W0418 02:15:34.750000 613 torch/_dynamo/exc.py:304] [0/0] Backend compiler failed with a fake tensor exception at 
W0418 02:15:34.750000 613 torch/_dynamo/exc.py:304] [0/0]   File "/usr/local/lib/python3.11/dist-packages/deepchem/models/torch_models/dmpnn.py", line 449, in forward
W0418 02:15:34.750000 613 torch/_dynamo/exc.py:304] [0/0]     return final_output
W0418 02:15:34.750000 613 torch/_dynamo/exc.py:304] [0/0] Adding a graph break.
W0418 02:15:34.939000 613 torch/_dynamo/exc.py:304] [0/0_1] Backend compiler failed with a fake tensor exception at 
W0418 02:15:34.939000 613 torch/_dynamo/exc.py:304] [0/0_1]   File "/usr/local/lib/python3.11/dist-packages/deepchem/models/torch_models/dmpnn.py", line 449, in forward
W0418 02:15:34.939000 613 torch/_dynamo/exc.py:304] [0/0_1]     return final_output
W0418 02:15:34.939000 613 torch/_dynamo/exc.py:304] [0/0_1] Adding a graph break.
W0418 02:15:40.713000 613 torch/_inductor/utils.py:1137] [2/0] Not enough SMs to use max_autotune_gemm m

RuntimeError: TODO: graph recording observed an input tensor deallocate during graph  recording that did not occur during replay. Please file an issue.