# 3_hyperopt

A notebook to set a research of hyperparamter based on hyperopt libraries

In [28]:
import random

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing
from hyperopt import fmin, tpe, hp, anneal, Trials

import wandb
import pickle

evaluation_metrics = []

In [None]:
dfp_train = pd.read_csv('./data/rtu/model_train_data.csv')
dfp_train['date'] = pd.to_datetime(dfp_train['date'])

dfp_test = pd.read_csv('./data/rtu/model_test_data.csv')
dfp_test['date'] = pd.to_datetime(dfp_test['date'])

In [None]:
columns_weather = [ 't2m_min_bordeaux',
       't2m_bordeaux', 't2m_max_bordeaux', 'prectot_bordeaux', 't2m_min_lille',
       't2m_lille', 't2m_max_lille', 'prectot_lille', 't2m_min_paris',
       't2m_paris', 't2m_max_paris', 'prectot_paris', 't2m_min_rennes',
       't2m_rennes', 't2m_max_rennes', 'prectot_rennes', 't2m_min_nantes',
       't2m_nantes', 't2m_max_nantes', 'prectot_nantes', 't2m_min_toulouse',
       't2m_toulouse', 't2m_max_toulouse', 'prectot_toulouse',
       't2m_min_marseille', 't2m_marseille', 't2m_max_marseille',
       'prectot_marseille', 't2m_min_lyon', 't2m_lyon', 't2m_max_lyon',
       'prectot_lyon', 't2m_min_nice', 't2m_nice', 't2m_max_nice',
       'prectot_nice', 't2m_min_strasbourg', 't2m_strasbourg',
       't2m_max_strasbourg', 'prectot_strasbourg', 't2m_min_montpellier',
       't2m_montpellier', 't2m_max_montpellier', 'prectot_montpellier',
       'weighted_t2m', 'weighted_t2m_min', 'weighted_t2m_max',
       'weighted_prectot']

columns_features = ['weekday', 'month', 'week_number'] + columns_weather

In [None]:
column_target = 'daily_electrical_consumption'
X_train, y_train = dfp_train[columns_features], dfp_train[column_target]
X_test, y_test = dfp_test[columns_features], dfp_test[column_target]

In [None]:
def train_and_evaluate(params, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test):
    # the function gets a set of variable parameters in "param"
    params = {
        'n_estimators': int(params['n_estimators']),
        'criterion' : params['criterion'],
        'max_depth' : params['max_depth'],
        'min_samples_split' : params['min_samples_split'],
        'max_features' : params['max_features']
    }
    
    # we use this params to create a new LGBM Regressor
    model = RandomForestRegressor( **params)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    rmse = mean_squared_error(y_test, predictions, squared=False)
    return rmse

# Space definition
space={
    'n_estimators': hp.quniform('n_estimators', 2, 100, 1),
    'criterion' : hp.choice('criterion', ['mse', 'mae']),
    'max_depth' : hp.quniform('max_depth', 2, 100, 1),
    'min_samples_split' : hp.quniform('min_samples_split', 0.1, 1, 0.1),
    'max_features' : hp.choice('max_features', ['auto', 'sqrt', 'log2']),
}

# Trigger the search
trials = Trials()
best=fmin(fn=train_and_evaluate, # function to optimize
          space=space, 
          algo=tpe.suggest, # optimization algorithm, hyperotp will select its parameters automatically
          max_evals=10, # maximum number of iterations
          trials=trials, # logging
          rstate=np.random.RandomState(0) # fixing random state for the reproducibility
         )

In [None]:
# Train hte model with the best parameters
params = {
    'n_estimators': int(best['n_estimators']),
    'criterion' : ['mse', 'mae'][best['criterion']],
    'max_depth' : int(best['max_depth']),
    'min_samples_split' : best['min_samples_split'],
    'max_features' : ['auto', 'sqrt', 'log2'][best['max_features']]
}

# we use this params to create a new LGBM Regressor
model = RandomForestRegressor( **params)
model.fit(X_train, y_train)

In [None]:
# build some metrics
wandb.init(project='french_electrical_consumption', entity='jmdaignan')
wandb.sklearn.plot_regressor(model, X_train, X_test, y_train, y_test,  model_name='best_hyperopt')

In [29]:
# Save the model
run = wandb.init(project='french_electrical_consumption', entity='jmdaignan')

trained_model_artifact = wandb.Artifact('best_model_hyperopt', type='model', description='Best model from the hyperopt')

file_model = './data/model.pkl'
with open(file_model, 'wb') as file:
    pickle.dump(model, file)
trained_model_artifact.add_file(file_model)

run.log_artifact(trained_model_artifact)

[34m[1mwandb[0m: wandb version 0.10.32 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


<wandb.sdk.wandb_artifacts.Artifact at 0x7f2ed27e0d50>