# 6_wandb_sweep

A notebookto explore the sweep feature of wandb

In [8]:
import random
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_squared_error
import wandb

evaluation_metrics = []

In [9]:
os.environ["WANDB_SILENT"] = "true" # make wandb less verbose

In [10]:
dfp_train = pd.read_csv('./data/rtu/model_train_data.csv')
dfp_train['date'] = pd.to_datetime(dfp_train['date'])

dfp_test = pd.read_csv('./data/rtu/model_test_data.csv')
dfp_test['date'] = pd.to_datetime(dfp_test['date'])

In [11]:
columns_weather = [ 't2m_min_bordeaux',
       't2m_bordeaux', 't2m_max_bordeaux', 'prectot_bordeaux', 't2m_min_lille',
       't2m_lille', 't2m_max_lille', 'prectot_lille', 't2m_min_paris',
       't2m_paris', 't2m_max_paris', 'prectot_paris', 't2m_min_rennes',
       't2m_rennes', 't2m_max_rennes', 'prectot_rennes', 't2m_min_nantes',
       't2m_nantes', 't2m_max_nantes', 'prectot_nantes', 't2m_min_toulouse',
       't2m_toulouse', 't2m_max_toulouse', 'prectot_toulouse',
       't2m_min_marseille', 't2m_marseille', 't2m_max_marseille',
       'prectot_marseille', 't2m_min_lyon', 't2m_lyon', 't2m_max_lyon',
       'prectot_lyon', 't2m_min_nice', 't2m_nice', 't2m_max_nice',
       'prectot_nice', 't2m_min_strasbourg', 't2m_strasbourg',
       't2m_max_strasbourg', 'prectot_strasbourg', 't2m_min_montpellier',
       't2m_montpellier', 't2m_max_montpellier', 'prectot_montpellier',
       'weighted_t2m', 'weighted_t2m_min', 'weighted_t2m_max',
       'weighted_prectot']

columns_features = ['weekday', 'month', 'week_number'] + columns_weather

In [12]:
# Keep it simple only date and consumption
column_target = 'daily_electrical_consumption'
X_train, y_train = dfp_train[columns_features], dfp_train[column_target]
X_test, y_test = dfp_test[columns_features], dfp_test[column_target]

In [13]:
sweep_config = {
    "name" : "sweep-bayes",
    "method" : "bayes",
    "metric" : {
        "name" : "rmse",
        "goal" : "minimise"
    },
    "parameters" : {
        "criterion" : {
            "values" : ['mse', 'mae']
    },
        "n_estimators" :{
            "min": 1,
            "max": 100,
            "q" : 1
    }, 
        "min_samples_split" :{
            "min": 0.1,
            "max": 1,
            "q" : 0.1
    },
        "max_features" : {
            "values" : ['auto', 'sqrt', 'log2']
        },
        "max_depth" : {
            "min" : 2,
            "max": 100,
            "q":5
        }
        
  }
}

def train_and_evaluate():
    with wandb.init() as run:
        config = wandb.config
        model = RandomForestRegressor( **config)
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)
        rmse = mean_squared_error(y_test, predictions, squared=False)
        wandb.log({'rmse' : rmse})

In [14]:
count = 100 # number of runs to execute
sweep_config['name'] = 'sweep-random'
sweep_config['method'] = 'random'
sweep_id = wandb.sweep(sweep_config, project='french_electrical_consumption', entity='jmdaignan')
wandb.agent(sweep_id, function=train_and_evaluate, count=count)

Create sweep with ID: oasxuvvr
Sweep URL: https://wandb.ai/jmdaignan/french_electrical_consumption/sweeps/oasxuvvr


[34m[1mwandb[0m: Agent Starting Run: jbesx7qt with config:
[34m[1mwandb[0m: 	criterion: mae
[34m[1mwandb[0m: 	max_depth: 77
[34m[1mwandb[0m: 	max_features: sqrt
[34m[1mwandb[0m: 	min_samples_split: 0.62448292692302
[34m[1mwandb[0m: 	n_estimators: 6


In [15]:
count = 100 # number of runs to execute
sweep_config['name'] = 'sweep-bayes'
sweep_config['method'] = 'bayes'
sweep_id = wandb.sweep(sweep_config, project='french_electrical_consumption', entity='jmdaignan')
wandb.agent(sweep_id, function=train_and_evaluate, count=count)

Create sweep with ID: k05shddu
Sweep URL: https://wandb.ai/jmdaignan/french_electrical_consumption/sweeps/k05shddu
