# Installation instructions

- `conda create --name=chemberta`
- `conda activate chemberta`
- `conda install pip`
- `pip install --pre deepchem`
- `git clone https://github.com/NVIDIA/apex`
- `pip install wandb`
- `pip install transformers==4.25.1 --no-cache-dir`
- `pip install simpletransformers --no-cache-dir`
- `pip install torch torchvision torchaudio`
- `pip install -v --no-cache-dir apex/`
- `conda deactivate`
- `python -m ipykernel install --user --name=chemberta`

Then I made the wandb folder manually: `mkdir home/jackievaleri8/.config/wandb/`. You may need to use root and change permissions of .config/wandb/ directory depending on your permissions structures. Finally, run `wandb login` and you should use your wandb authorization key by following instructions provided. This allows you to use deepchem with wandb.

- OS: Ubuntu 18.04.6 LTS (GNU/Linux 5.4.0-1104-gcp x86_64). 
- Machine type: n1-highmem-8
- CPU platform: Intel Haswell
- Architecture: x86/64
- GPUs: 1 x NVIDIA T4

In [1]:
import logging
import os

import deepchem as dc
import pandas as pd
import sklearn
import torch
from apex import amp
from rdkit import Chem
from simpletransformers.classification import ClassificationModel
from sklearn.metrics import auc, precision_recall_curve, roc_auc_score
from transformers import AutoModelWithLMHead, AutoTokenizer, pipeline

Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'torch_geometric'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. cannot import name 'DMPNN' from 'deepchem.models.torch_models' (/home/jackievaleri8/anaconda3/lib/python3.8/site-packages/deepchem/models/torch_models/__init__.py)
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'pytorch_lightning'
Skipped loading some Jax models, missing a dependency. No module named 'jax'


In [2]:
!nvidia-smi # should show your nvidia output

Thu Jun  1 12:20:29 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   61C    P0    29W /  70W |      2MiB / 15360MiB |      7%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [3]:
torch.cuda.is_available()  # must be true

True

# Load in dataset to fine-tune ChemBERTa with

In [4]:
featurizer = (
    dc.feat.RDKitDescriptors()
)  # just use a random descriptor - we are featurizing this to make use of DeepChem's scaffold split function!
tasks = ["hit"]
input_file = "../data/data_prep_for_ml/data_prep_for_ml_pk_37k_screen/FULL_03_19_2022.csv"
loader = dc.data.CSVLoader(tasks=tasks, feature_field="SMILES", featurizer=featurizer)
dataset = loader.create_dataset(input_file)
dataset

<DiskDataset X.shape: (38680, 200), y.shape: (38680, 1), w.shape: (38680, 1), task_names: ['hit']>

# Helper functions

In [5]:
def rework_data_for_chemberta(dataset):
    df = dataset.to_dataframe()
    df = df[["ids", "y"]]
    df.columns = ["smiles", "label"]
    return df


# evaluate each prediction


def modeleval(y_true, y_pred, name=""):

    y_true = [int(x) for x in y_true]
    y_pred = [float(x) for x in y_pred]

    # compute auroc
    auroc = float(roc_auc_score(y_true, y_pred))

    # compute precision-Recall
    precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
    pr = float(auc(recall, precision))

    return (auroc, pr)

# Train models

In [6]:
NUM_FOLDS = 20
NUM_EPOCHS = 5
save_dir = "../models/other_models/chemberta_pk_37k/random_split_chemberta_models/"

In [7]:
auprs = []
aurocs = []

for seed in list(range(NUM_FOLDS)):
    print("Seed: " + str(seed))
    model_dir = save_dir + str(seed) + "/"
    os.mkdir(model_dir)

    # get scaffold split dataset
    # splitter = dc.splits.ScaffoldSplitter() # instead of splitter = dc.splits.RandomSplitter()
    splitter = dc.splits.RandomSplitter()  # instead of splitter = dc.splits.ScaffoldSplitter()
    train, valid, test = splitter.train_valid_test_split(
        dataset, frac_train=0.8, frac_valid=0.1, frac_test=0.1, seed=seed
    )
    train_csv = rework_data_for_chemberta(train)
    test_csv = rework_data_for_chemberta(test)
    valid_csv = rework_data_for_chemberta(valid)

    # set up chemberta architecture
    model = AutoModelWithLMHead.from_pretrained("seyonec/ChemBERTa-zinc-base-v1")
    tokenizer = AutoTokenizer.from_pretrained("seyonec/ChemBERTa-zinc-base-v1")
    fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer)

    # set up logger to monitor issues
    logging.basicConfig(level=logging.INFO)
    transformers_logger = logging.getLogger("transformers")
    transformers_logger.setLevel(logging.WARNING)

    # load pre-trained model from hugging face model-hub
    # args from https://simpletransformers.ai/docs/usage/#configuring-a-simple-transformers-model
    # must turn off multiprocessing or code hangs, according to https://github.com/ThilinaRajapakse/simpletransformers/issues/1402
    model = ClassificationModel(
        "roberta",
        "seyonec/ChemBERTa-zinc-base-v1",
        use_cuda=True,
        args={
            "num_train_epochs": NUM_EPOCHS,
            "auto_weights": True,
            "use_early_stopping": True,
            "early_stopping_metric": sklearn.metrics.roc_auc_score,
            "fp16": True,
            "train_batch_size": 50,
            "use_multiprocessing": False,
            "use_multiprocessing_for_evaluation": False,
            "not_saved_args": ["early_stopping_metric"],
        },
    )  # auto weights set to true because class imabalanced

    # train the model using training set and evaluating on validation set
    model.train_model(train_csv, output_dir=model_dir, num_labels=2, use_cuda=True, eval_df=valid_csv)

    # run model on test set
    result, model_outputs, wrong_predictions = model.eval_model(test_csv, acc=sklearn.metrics.accuracy_score)

    # in the order loss, then logits, where the 2nd val (logits) is called a positive > 0 and a negative < 0
    # compute test set performance
    model_output_vals = [x[1] for x in model_outputs]
    groundtruthvals = list(test_csv["label"])

    auroc, pr = modeleval(groundtruthvals, model_output_vals)
    print("Fold: " + str(seed))
    print("Test auROC: " + str(auroc))
    print("Test auPR: " + str(pr))
    auprs.append(pr)
    aurocs.append(auroc)

Seed: 0


Some weights of the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and a

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/0/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.37271048726985334, 'tp': 40, 'tn': 3690, 'fp': 33, 'fn': 105, 'auroc': 0.9078162771958098, 'auprc': 0.46490776457564753, 'acc': 0.9643226473629782, 'eval_loss': 0.127030973807593}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datas

Fold: 0
Test auROC: 0.9050200524234256
Test auPR: 0.4606870451841162
Seed: 1


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/1/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.36622957479551815, 'tp': 34, 'tn': 3718, 'fp': 36, 'fn': 80, 'auroc': 0.8652069371617643, 'auprc': 0.3464761662074834, 'acc': 0.9700103412616339, 'eval_loss': 0.12489294565536758}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datas

Fold: 1
Test auROC: 0.8690811672227985
Test auPR: 0.33447046583201445
Seed: 2


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/2/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4084672010819655, 'tp': 36, 'tn': 3728, 'fp': 31, 'fn': 73, 'auroc': 0.8791707242068577, 'auprc': 0.3747526504238062, 'acc': 0.9731127197518097, 'eval_loss': 0.11321900083013803}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datase

Fold: 2
Test auROC: 0.8732278006789821
Test auPR: 0.3735426870203861
Seed: 3


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/3/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.41646871257630363, 'tp': 37, 'tn': 3723, 'fp': 25, 'fn': 83, 'auroc': 0.8699884382781928, 'auprc': 0.4014741286060472, 'acc': 0.9720785935884177, 'eval_loss': 0.12369345599585328}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datas

Fold: 3
Test auROC: 0.8671280238349341
Test auPR: 0.39591215829509263
Seed: 4


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/4/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4044098154174005, 'tp': 33, 'tn': 3737, 'fp': 28, 'fn': 70, 'auroc': 0.9038435255740791, 'auprc': 0.37977385772597, 'acc': 0.9746639089968976, 'eval_loss': 0.10158972817757898}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datasets

Fold: 4
Test auROC: 0.9015188437189752
Test auPR: 0.37348241510872265
Seed: 5


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/5/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.46735984359102867, 'tp': 48, 'tn': 3708, 'fp': 27, 'fn': 85, 'auroc': 0.9045364415053698, 'auprc': 0.46882421221867315, 'acc': 0.9710444674250258, 'eval_loss': 0.1218781506658093}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datas

Fold: 5
Test auROC: 0.9095691034815956
Test auPR: 0.47197386463143676
Seed: 6


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/6/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4187279066285532, 'tp': 35, 'tn': 3731, 'fp': 23, 'fn': 79, 'auroc': 0.853150791202834, 'auprc': 0.39998356954838066, 'acc': 0.9736297828335057, 'eval_loss': 0.1182260497296152}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.dataset

Fold: 6
Test auROC: 0.8578113170512855
Test auPR: 0.40393295307677024
Seed: 7


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/7/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.5217426500740805, 'tp': 52, 'tn': 3715, 'fp': 19, 'fn': 82, 'auroc': 0.8953225303583849, 'auprc': 0.48217385118830186, 'acc': 0.9738883143743536, 'eval_loss': 0.12100001535386093}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datas

Fold: 7
Test auROC: 0.895372494783714
Test auPR: 0.48062816892410226
Seed: 8


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/8/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.44813073384658586, 'tp': 39, 'tn': 3728, 'fp': 22, 'fn': 79, 'auroc': 0.8675209039548022, 'auprc': 0.4142766151464967, 'acc': 0.9738883143743536, 'eval_loss': 0.11867867274717851}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datas

Fold: 8
Test auROC: 0.873161581920904
Test auPR: 0.4195666729358582
Seed: 9


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/9/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4673967948612176, 'tp': 44, 'tn': 3721, 'fp': 25, 'fn': 78, 'auroc': 0.8727713933113354, 'auprc': 0.4572327284765299, 'acc': 0.9733712512926577, 'eval_loss': 0.1170672443656882}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.dataset

Fold: 9
Test auROC: 0.8787132504179321
Test auPR: 0.45541780672205323
Seed: 10


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/10/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.43559637057573464, 'tp': 41, 'tn': 3721, 'fp': 31, 'fn': 75, 'auroc': 0.8963656164987869, 'auprc': 0.43674627452124487, 'acc': 0.9725956566701137, 'eval_loss': 0.11311010231287026}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.data

Fold: 10
Test auROC: 0.8974259705168737
Test auPR: 0.4360215327450321
Seed: 11


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/11/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.3796565568330954, 'tp': 31, 'tn': 3734, 'fp': 28, 'fn': 75, 'auroc': 0.8870708073786524, 'auprc': 0.40151392511288403, 'acc': 0.9733712512926577, 'eval_loss': 0.10602805209307631}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datas

Fold: 11
Test auROC: 0.8871874153651711
Test auPR: 0.4032219016739414
Seed: 12


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/12/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4256432562463128, 'tp': 47, 'tn': 3693, 'fp': 33, 'fn': 95, 'auroc': 0.8887546967257113, 'auprc': 0.4218884686743053, 'acc': 0.9669079627714581, 'eval_loss': 0.14106668134735637}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datase

Fold: 12
Test auROC: 0.8939617306630983
Test auPR: 0.4237721962893336
Seed: 13


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/13/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.525334763007242, 'tp': 57, 'tn': 3709, 'fp': 30, 'fn': 72, 'auroc': 0.8958588604091381, 'auprc': 0.5050964512728051, 'acc': 0.9736297828335057, 'eval_loss': 0.11434602284850168}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.dataset

Fold: 13
Test auROC: 0.9030157713271592
Test auPR: 0.5078429070222785
Seed: 14


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/14/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4738884932842304, 'tp': 45, 'tn': 3717, 'fp': 21, 'fn': 85, 'auroc': 0.8894657776680248, 'auprc': 0.4620739733496086, 'acc': 0.9725956566701137, 'eval_loss': 0.1219546432214335}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.dataset

Fold: 14
Test auROC: 0.8894729802033173
Test auPR: 0.4584819331598253
Seed: 15


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/15/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4026008633327667, 'tp': 38, 'tn': 3717, 'fp': 32, 'fn': 81, 'auroc': 0.8695024555567759, 'auprc': 0.4067537957944841, 'acc': 0.9707859358841778, 'eval_loss': 0.12517103592842077}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datase

Fold: 15
Test auROC: 0.8738498333449144
Test auPR: 0.4063852543390625
Seed: 16


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/16/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4249178062121436, 'tp': 34, 'tn': 3727, 'fp': 14, 'fn': 93, 'auroc': 0.8859677925183169, 'auprc': 0.43899534219201564, 'acc': 0.9723371251292657, 'eval_loss': 0.1227904306955574}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datase

Fold: 16
Test auROC: 0.8860782939422067
Test auPR: 0.436081147332958
Seed: 17


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/17/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4763536218686626, 'tp': 38, 'tn': 3741, 'fp': 18, 'fn': 71, 'auroc': 0.8913762932265317, 'auprc': 0.44007655360714926, 'acc': 0.9769906928645294, 'eval_loss': 0.10207897972715788}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datas

Fold: 17
Test auROC: 0.895153161464473
Test auPR: 0.44521354084241954
Seed: 18


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/18/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.47581534155702965, 'tp': 49, 'tn': 3701, 'fp': 19, 'fn': 99, 'auroc': 0.8840971011333915, 'auprc': 0.4872655036880028, 'acc': 0.9694932781799379, 'eval_loss': 0.13910995334509976}
INFO:deepchem.splits.splitters:Computing train/valid/test indices
INFO:deepchem.data.datasets:Constructing selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 1
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 1
INFO:deepchem.data.datasets:Constructing selection output shard 2
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 2
INFO:deepchem.data.datas

Fold: 18
Test auROC: 0.8812845103167684
Test auPR: 0.4840118593133442
Seed: 19


INFO:deepchem.data.datasets:Constructing selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 3
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection output shard 3
INFO:deepchem.data.datasets:Constructing selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 1/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 2/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 3/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 4/5 for selection output shard 4
INFO:deepchem.data.datasets:Selecting from input shard 5/5 for selection out

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/619 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to chemberta/random_split_chemberta_models/19/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_2


Running Evaluation:   0%|          | 0/484 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.4769502428255329, 'tp': 45, 'tn': 3722, 'fp': 25, 'fn': 76, 'auroc': 0.8890980553037471, 'auprc': 0.47972857005410213, 'acc': 0.9738883143743536, 'eval_loss': 0.10872633025542763}


Fold: 19
Test auROC: 0.8933935026809326
Test auPR: 0.47507379799196225


In [8]:
results_df = pd.DataFrame()
results_df["auPR"] = auprs
results_df["auROC"] = aurocs
results_df.to_csv(save_dir + "summary_results.csv", index=False)
results_df

Unnamed: 0,auPR,auROC
0,0.460687,0.90502
1,0.33447,0.869081
2,0.373543,0.873228
3,0.395912,0.867128
4,0.373482,0.901519
5,0.471974,0.909569
6,0.403933,0.857811
7,0.480628,0.895372
8,0.419567,0.873162
9,0.455418,0.878713
