# Build classification model

Please note that before running, make sure you are logged in by running `dm login EMAIL` in the terminal.

In [None]:
import time
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve
from tqdm import tqdm

from deepmirror.api import model_info, model_metadata, predict, train

In [None]:
root = Path().cwd().parent
data_path_train = root / "data" / "data-cls.csv"
data_path_test = root / "data" / "data-cls-test.csv"

df_train = pd.read_csv(data_path_train)
df_test = pd.read_csv(data_path_test)

In [None]:
df_train.head()

In [None]:
model = train(
    model_name="test-cls-model",
    csv_file=data_path_train,
    smiles_column="smiles",
    value_column="y",
    classification=True,
)

In [None]:
model

In [None]:
model = model_info(model["model_id"])
with tqdm(desc="Training model", unit="sec") as pbar:
    while model["status"] != "completed":
        time.sleep(1)
        model = model_info(model["model_id"])
        pbar.set_postfix_str(f"Status: {model['status']}")
        pbar.update(1)

In [None]:
model_metadata(model["model_id"])

In [None]:
df_test

In [None]:
predictions = predict(model["model_name"], smiles=df_test["smiles"].tolist())

In [None]:
class_prob = predictions["prediction"]
y_pred = (np.array(class_prob) > 0.5).astype(int)
y_true = df_test["y"].astype(int)

auc_roc = roc_auc_score(y_true, class_prob)
print(f"ROC AUC Score: {auc_roc:.4f}")

# Calculate ROC curve points
fpr, tpr, _ = roc_curve(y_true, class_prob)

# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color="blue", lw=2, label=f"ROC curve (AUC = {auc_roc:.4f})")
plt.plot([0, 1], [0, 1], color="gray", linestyle="--")
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver Operating Characteristic (ROC) Curve")
plt.legend(loc="lower right")
plt.grid(True)
plt.show()