This example demonstrates how to use Octopus to create a machine learning classification model.
We will use the breast cancer dataset for this purpose.
Please ensure your dataset is clean, with no missing values (`NaN`),
and that all features are numeric.

In [None]:
### Necessary imports for this example
import os

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
from octopus import OctoStudy

In [None]:
### Load and Preprocess Data
breast_cancer = load_breast_cancer(as_frame=True)

In [None]:
df = breast_cancer["frame"].reset_index()
df.columns = df.columns.str.replace(" ", "_")
features = list(breast_cancer["feature_names"])
features = [feature.replace(" ", "_") for feature in features]

In [None]:
### Create and run OctoStudy
study = OctoStudy(
    name="basic_classification",
    path=os.environ.get("STUDIES_PATH", "./studies"),
    ml_type="classification",
    target_metric="AUCROC",
    feature_columns=features,
    target_columns=["target"],
    sample_id="index",
    stratification_column="target",
)

In [None]:
study.fit(data=df)

In [None]:
print("Workflow completed")