In [1]:
import pandas as pd
import deepchem as dc
import numpy as np
from tqdm import tqdm
import torch

  from .autonotebook import tqdm as notebook_tqdm
2024-05-08 03:43:37.441801: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Instructions for updating:
experimental_relax_shapes is deprecated, use reduce_retracing instead


In [2]:
import data_utils as HIVLoader

df = HIVLoader.load_main_data()
df.head()

Unnamed: 0,smiles,activity,HIV_active
0,CCC1=[O+][Cu-3]2([O+]=C(CC)C1)[O+]=C(CC)CC(CC)...,CI,False
1,C(=Cc1ccccc1)C1=[O+][Cu-3]2([O+]=C(C=Cc3ccccc3...,CI,False
2,CC(=O)N1c2ccccc2Sc2c1ccc1ccccc21,CI,False
3,Nc1ccc(C=Cc2ccc(N)cc2S(=O)(=O)O)c(S(=O)(=O)O)c1,CI,False
4,O=S(=O)(O)CCS(=O)(=O)O,CI,False


In [3]:
train_ds, test_ds = HIVLoader.scaffold_split(df)
print(f'total: {len(train_ds):5d}, active: {sum(train_ds.y):4d}, inactive: {sum(train_ds.y == False):5d}')
print(f'total: {len(test_ds):5d}, active: {sum(test_ds.y):4d}, inactive: {sum(test_ds.y == False):5d}')



total: 32901, active: 1219, inactive: 31682
total:  8226, active:  211, inactive:  8015


In [4]:
featurizer = dc.feat.MolGraphConvFeaturizer(use_edges=True)
transformer = dc.trans.FeaturizationTransformer(featurizer=featurizer)

train_feat = train_ds.transform(transformer)
test_feat = test_ds.transform(transformer)




In [5]:
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score

In [6]:
model = dc.models.GCNModel(mode='classification', n_tasks=1, learning_rate=0.001, device=torch.device('cuda'))

EPOCH = 10
BATCH_SIZE = 64
metrics = [dc.metrics.Metric(metric) for metric in [roc_auc_score, f1_score, accuracy_score]]

for i in range(EPOCH):
  losses = []
  for X, y, w, _ in tqdm(train_feat.iterbatches(BATCH_SIZE), total=np.ceil(len(train_feat)/BATCH_SIZE)):
    loss = model.fit_on_batch(X, y, w)
    losses.append(loss)
  
  result_metrics = model.evaluate(test_feat, metrics, transformers=[transformer])
  result_metrics['train_loss'] = np.mean(np.array(losses))
  result_metrics['epoch'] = i
  print(result_metrics)

100%|██████████| 515/515.0 [00:20<00:00, 24.57it/s]


{'roc_auc_score': 0.7624124494061786, 'f1_score': 0.09691629955947137, 'accuracy_score': 0.975079017748602, 'train_loss': 0.1251277401645496, 'epoch': 0}


100%|██████████| 515/515.0 [00:19<00:00, 26.90it/s]


{'roc_auc_score': 0.7670731418873971, 'f1_score': 0.11155378486055777, 'accuracy_score': 0.9728908339411622, 'train_loss': 0.08551204247486063, 'epoch': 1}


100%|██████████| 515/515.0 [00:23<00:00, 22.28it/s]


{'roc_auc_score': 0.7479713688492844, 'f1_score': 0.08620689655172414, 'accuracy_score': 0.9742280573790421, 'train_loss': 0.08223976806208527, 'epoch': 2}


100%|██████████| 515/515.0 [00:19<00:00, 26.85it/s]


{'roc_auc_score': 0.7658891947267121, 'f1_score': 0.12195121951219512, 'accuracy_score': 0.973741794310722, 'train_loss': 0.07889204232148754, 'epoch': 3}


100%|██████████| 515/515.0 [00:19<00:00, 26.08it/s]


{'roc_auc_score': 0.7534236753953636, 'f1_score': 0.009302325581395349, 'accuracy_score': 0.974106491611962, 'train_loss': 0.07778535717329715, 'epoch': 4}


100%|██████████| 515/515.0 [00:19<00:00, 25.83it/s]


{'roc_auc_score': 0.7407266588416861, 'f1_score': 0.06306306306306306, 'accuracy_score': 0.9747143204473621, 'train_loss': 0.07586140666016856, 'epoch': 5}


100%|██████████| 515/515.0 [00:20<00:00, 25.45it/s]


{'roc_auc_score': 0.7321954096732134, 'f1_score': 0.0, 'accuracy_score': 0.9742280573790421, 'train_loss': 0.07379446252818825, 'epoch': 6}


100%|██████████| 515/515.0 [00:19<00:00, 26.95it/s]


{'roc_auc_score': 0.7436327915963254, 'f1_score': 0.009433962264150943, 'accuracy_score': 0.9744711889132021, 'train_loss': 0.07257842422028986, 'epoch': 7}


100%|██████████| 515/515.0 [00:19<00:00, 26.30it/s]


{'roc_auc_score': 0.7379939272631588, 'f1_score': 0.044444444444444446, 'accuracy_score': 0.973863360077802, 'train_loss': 0.07102328974254049, 'epoch': 8}


100%|██████████| 515/515.0 [00:19<00:00, 26.16it/s]


{'roc_auc_score': 0.7470186232567491, 'f1_score': 0.338368580060423, 'accuracy_score': 0.9733770970094822, 'train_loss': 0.06947842299807093, 'epoch': 9}


In [7]:
metrics = [dc.metrics.Metric(metric) for metric in [roc_auc_score, f1_score, accuracy_score]]
model.evaluate(test_feat, metrics, transformers=[transformer])

{'roc_auc_score': 0.7470186232567491,
 'f1_score': 0.338368580060423,
 'accuracy_score': 0.9733770970094822}