# 🧪 Tox21 Dataset Exploration
This notebook downloads the Tox21 dataset and explores its structure using DeepChem and RDKit.

In [None]:
import deepchem as dc
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw

# Load Tox21 dataset from DeepChem
tox21_tasks, datasets, transformers = dc.molnet.load_tox21(featurizer='ECFP')
train_dataset, valid_dataset, test_dataset = datasets

print("Number of training samples:", len(train_dataset))
print("Feature shape:", train_dataset.X.shape)
print("Labels shape:", train_dataset.y.shape)


In [None]:
# Convert a few molecules from SMILES and visualize
smiles = train_dataset.ids[:5]
mols = [Chem.MolFromSmiles(smi) for smi in smiles]
Draw.MolsToGridImage(mols, molsPerRow=5, subImgSize=(200,200))


In [None]:
# Show the label matrix for the first few samples
df = pd.DataFrame(train_dataset.y[:5], columns=tox21_tasks)
df


📌 **Next steps**: Featurize with custom fingerprints, try a simple Random Forest model, and evaluate using ROC-AUC.