# Bonus: hyperparameter optimization using WandB Sweeps + Optuna

### !!! Warning
To be able to run the code below you need to have a WandB account

## Login to WandB

In [1]:
import wandb
import inspect
from wandb import CommError
import yaml

In [2]:

wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33maaalex-lit[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
PROJECT='diabetes-prediction'

## Create a Lauch Queue


In [4]:
config = {"label": "hyperparams-finetune-optuna"}
api = wandb.Api()
try:
    queue = api.create_run_queue(
            name="diabetes-prediction-queue",
            type="local-process",
            access="project",
            config=config,
        )
except CommError as e:
    print(e.message)

409 response executing GraphQL.
{"errors":[{"message":"project already has queue with name diabetes-prediction-queue","path":["createRunQueue"]}],"data":{"createRunQueue":null}}
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: project already has queue with name diabetes-prediction-queue (<Response [409]>)


project already has queue with name diabetes-prediction-queue (Error 409: Conflict)


## Create the training job

In [5]:
!wandb job create -p $PROJECT -n "xgb-classifier-diabetes" code ./ -E "xgb_job.py"

[34m[1mwandb[0m: Creating launch job of type: code...
[34m[1mwandb[0m: Adding directory to artifact (./.)... Done. 0.3s
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: W&B sync reduced upload amount by 99.9%             
[34m[1mwandb[0m: Updated job: [33maaalex-lit/diabetes-prediction/xgb-classifier-diabetes:v3[0m, with alias: [33mlatest[0m
[34m[1mwandb[0m: View all jobs in project 'diabetes-prediction' here: [4mhttps://wandb.ai/aaalex-lit/diabetes-prediction/jobs[0m
[34m[1mwandb[0m: 


## Create a function to optimize


In [5]:
def objective(trial):
    # Define search spaces for hyperparameters
    n_estimators = trial.suggest_int('n_estimators', 10, 300)
    max_depth = trial.suggest_int('max_depth', 1, 20)
    min_child_weight = trial.suggest_float('min_child_weight', 0, 1)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1, log=True)

    print(f"{n_estimators=} {max_depth=} {min_child_weight=} {learning_rate=}")

    # !! don't actually train, return -1
    return -1    

### Test the conditional configuration function.

In [6]:
import optuna 
# Create an Optuna study
study = optuna.create_study(direction="maximize")

# Start the optimization process
study.optimize(objective, n_trials=2)


[I 2023-11-09 07:58:25,311] A new study created in memory with name: no-name-a2dc2b64-42d2-471e-ac1e-a1c6e7b12979
[I 2023-11-09 07:58:25,316] Trial 0 finished with value: -1.0 and parameters: {'n_estimators': 152, 'max_depth': 8, 'min_child_weight': 0.830067751170727, 'learning_rate': 0.031024817713560474}. Best is trial 0 with value: -1.0.
[I 2023-11-09 07:58:25,317] Trial 1 finished with value: -1.0 and parameters: {'n_estimators': 215, 'max_depth': 6, 'min_child_weight': 0.7215802763618397, 'learning_rate': 4.59420654424295e-05}. Best is trial 0 with value: -1.0.


n_estimators=152 max_depth=8 min_child_weight=0.830067751170727 learning_rate=0.031024817713560474
n_estimators=215 max_depth=6 min_child_weight=0.7215802763618397 learning_rate=4.59420654424295e-05


## Save the configuration to W&B as an artifact.

Save the conditional search logic to W&B as an artifact.

In [7]:
ARTIFACT_FILENAME = "optuna_diabetes_prediction.py"
ARTIFACT_NAME = "optuna-config-diabetes-prediction"

"""write function to its own file"""
function_lines = inspect.getsource(objective)
with open(ARTIFACT_FILENAME, 'w') as f:
    f.write(function_lines)

"""create and log artifact to wandb"""
run = wandb.init(project=PROJECT)
artifact = run.log_artifact(ARTIFACT_FILENAME, name=ARTIFACT_NAME, type='optuna')
run.finish()



VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded\r'), FloatProgress(value=0.10500500500500501, max=1.…

## Define a sweep configuration

In [9]:
config = {
    "metric": {"name": "custom_recall_score", "goal": "maximize"},
    "run_cap": 10,
    "job": "aaalex-lit/diabetes-prediction/xgb-classifier-diabetes:latest",
    "scheduler": {
        "job": "wandb/sweep-jobs/job-optuna-sweep-scheduler:latest",
        "num_workers": 2,
        "settings": {
            "optuna_source": f"{PROJECT}/{artifact.wait().name}",
            "optuna_source_filename": ARTIFACT_FILENAME,
        }
    },
}

# write config to file
config_filename = "sweep-config.yaml"
yaml.dump(config, open(config_filename, "w"))

## Launch the agent

In [10]:
import subprocess
process = subprocess.Popen(["wandb", "launch-agent", "-q", "diabetes-prediction-queue"])

## Launch the sweep

In [11]:
! wandb launch-sweep sweep-config.yaml -e aaalex-lit -p $PROJECT -q diabetes-prediction-queue



wandb: Starting launch agent ✨


[34m[1mwandb[0m:   2 of 2 files downloaded.  


wandb: launch: agent knzt4mzz polling on queues diabetes-prediction-queue, running 0 out of a maximum of 1 jobs


[34m[1mwandb[0m:                                                                                
Problem at: /Users/alexlitvinov/anaconda3/envs/ml-zoomcamp-midterm-alex/lib/python3.10/site-packages/wandb/sdk/artifacts/artifact.py 1690 download
[34m[1mwandb[0m: [32m[41mERROR[0m Failed to load job. It appears that you do not have permission to access the requested resource. Please reach out to the project owner to grant you access. If you have the correct permissions, verify that there are no issues with your networking setup.(Error 404: Not Found)


wandb: ERROR Error while calling W&B API: project not found (<Response [404]>)


## Stop the agent

In [12]:
process.kill()

