In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import wandb

from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline

from sklearn.ensemble import RandomForestClassifier

In [3]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mfratambot[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
pd.options.display.max_seq_items = 2000
pd.options.display.max_columns = 500
pd.options.display.max_rows = 800
pd.options.display.max_colwidth = 200
pd.options.display.width = 800

In [5]:
random_seed = 42
project_name = "RF-clf-notebook"

In [6]:
X, y = make_classification(
    n_samples=1000,
    n_features=10,
    n_informative=3,
    n_redundant=0,
    n_repeated=0,
    n_classes=2,
    random_state=random_seed,
    shuffle=False,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=random_seed)
#feature_names = [f"feature {i}" for i in range(X.shape[1])]

In [7]:
%%wandb

def train_RF():
    run = wandb.init(project=project_name)
    forest = RandomForestClassifier(
        n_estimators = wandb.config.n_estimators,
        max_features = wandb.config.max_features,
        max_depth = wandb.config.max_depth, 
        random_state=random_seed
        )
    pipe = make_pipeline(forest)
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    labels = ["0", "1"]
    class_report = classification_report(
        y_test, y_pred, target_names=labels, output_dict=True
    )
    wandb.log({
        "f1_macro": class_report["macro avg"]["f1-score"]
    })
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
    disp.plot(cmap="viridis")
    wandb.log({"confusion_matrix" : plt})

In [8]:
sweep_configuration = {
    "method" : "bayes",
    "name" : f"{project_name}-sweeps",
    "metric" : {
        "goal" : "maximize", 
        "name" : "f1_macro",
		},
    "early_terminate": {
        "type" : "hyperband",
        "min_iter" : 3,
    },
    "parameters" : {
        "n_estimators": {
            "distribution" : "int_uniform",
            "min" : 1,
            "max" : 1000
        },
        "max_features": {
            "values": ["sqrt", "log2"]
        },
        "max_depth": {
            "distribution" : "int_uniform",
            "min" : 1, 
            "max" : 1000
        }
     }
}

In [9]:
sweep_id = wandb.sweep(sweep=sweep_configuration, project=project_name)
print(sweep_id)

Create sweep with ID: jpk4sr7d
Sweep URL: https://wandb.ai/fratambot/RF-clf-notebook/sweeps/jpk4sr7d
jpk4sr7d


In [10]:
wandb.agent(sweep_id, function=train_RF, count=20)

[34m[1mwandb[0m: Agent Starting Run: wzunpotp with config:
[34m[1mwandb[0m: 	max_depth: 742
[34m[1mwandb[0m: 	max_features: sqrt
[34m[1mwandb[0m: 	n_estimators: 405
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


0,1
f1_macro,▁

0,1
f1_macro,0.948
