# ML-Fragment-Optimizer Quick Start

This notebook demonstrates basic usage of ML-Fragment-Optimizer for ADMET property prediction.

In [None]:
# Import required modules
import sys
from pathlib import Path
sys.path.insert(0, str(Path('..') / 'src'))

import pandas as pd
import numpy as np
from ml_fragment_optimizer import ADMETPredictor, MolecularFeaturizer
from ml_fragment_optimizer.utils.featurizers import calculate_basic_properties

## 1. Molecular Featurization

In [None]:
# Create featurizer
featurizer = MolecularFeaturizer(
    fingerprint_type="morgan",
    radius=2,
    n_bits=2048
)

# Example molecules
smiles = ["CCO", "c1ccccc1", "CC(C)C"]

# Featurize
features = featurizer.featurize(smiles)
print(f"Feature matrix shape: {features.shape}")

# Calculate basic properties
for smi in smiles:
    props = calculate_basic_properties(smi)
    print(f"\n{smi}:")
    for key, value in props.items():
        print(f"  {key}: {value:.2f}")

## 2. Training an ADMET Model

In [None]:
# Create synthetic training data
train_smiles = [
    "CCO", "c1ccccc1", "CC(C)C", "CCCC", "CCC(C)C",
    "c1ccc(O)cc1", "CCN", "CCCO", "c1cccnc1", "CC(=O)O"
]

train_properties = {
    "solubility": np.random.randn(10) * 2 - 1,
    "logp": np.random.randn(10) * 1.5 + 2
}

# Initialize predictor
predictor = ADMETPredictor(
    properties=["solubility", "logp"],
    model_type="random_forest",
    n_estimators=50
)

# Train
metrics = predictor.fit(
    smiles=train_smiles,
    properties_dict=train_properties,
    validation_split=0.2
)

print("\nValidation metrics:")
for prop, metric_dict in metrics.items():
    print(f"\n{prop}:")
    for metric_name, value in metric_dict.items():
        print(f"  {metric_name}: {value:.3f}")

## 3. Making Predictions

In [None]:
# Test molecules
test_smiles = ["CCCCO", "c1ccc(N)cc1", "CC(C)CO"]

# Predict
predictions = predictor.predict(test_smiles)

# Display results
results_df = pd.DataFrame({
    "SMILES": test_smiles,
    "solubility": predictions["solubility"],
    "logp": predictions["logp"]
})

print("\nPredictions:")
print(results_df)

## 4. Uncertainty Estimation

In [None]:
# Predict with uncertainty
predictions, uncertainties = predictor.predict(
    test_smiles,
    return_uncertainty=True
)

# Display with uncertainties
results_with_unc = pd.DataFrame({
    "SMILES": test_smiles,
    "solubility": predictions["solubility"],
    "solubility_unc": uncertainties["solubility"],
    "logp": predictions["logp"],
    "logp_unc": uncertainties["logp"]
})

print("\nPredictions with uncertainties:")
print(results_with_unc)

## 5. Save and Load Model

In [None]:
# Save model
model_path = Path("../models/example_model.pkl")
model_path.parent.mkdir(parents=True, exist_ok=True)
predictor.save(model_path)
print(f"Model saved to {model_path}")

# Load model
loaded_predictor = ADMETPredictor.load(model_path)
print(f"Model loaded from {model_path}")

# Verify predictions match
new_predictions = loaded_predictor.predict(["CCO"])
print(f"\nTest prediction: {new_predictions}")