# 3_hyperopt

A notebook to set a research of hyperparamter based on hyperopt libraries

In [1]:
import random

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing
from hyperopt import fmin, tpe, hp, anneal, Trials

evaluation_metrics = []

In [2]:
dfp_train = pd.read_csv('./data/rtu/model_train_data.csv')
dfp_train['date'] = pd.to_datetime(dfp_train['date'])

dfp_test = pd.read_csv('./data/rtu/model_test_data.csv')
dfp_test['date'] = pd.to_datetime(dfp_test['date'])

In [3]:
columns_weather = [ 't2m_min_bordeaux',
       't2m_bordeaux', 't2m_max_bordeaux', 'prectot_bordeaux', 't2m_min_lille',
       't2m_lille', 't2m_max_lille', 'prectot_lille', 't2m_min_paris',
       't2m_paris', 't2m_max_paris', 'prectot_paris', 't2m_min_rennes',
       't2m_rennes', 't2m_max_rennes', 'prectot_rennes', 't2m_min_nantes',
       't2m_nantes', 't2m_max_nantes', 'prectot_nantes', 't2m_min_toulouse',
       't2m_toulouse', 't2m_max_toulouse', 'prectot_toulouse',
       't2m_min_marseille', 't2m_marseille', 't2m_max_marseille',
       'prectot_marseille', 't2m_min_lyon', 't2m_lyon', 't2m_max_lyon',
       'prectot_lyon', 't2m_min_nice', 't2m_nice', 't2m_max_nice',
       'prectot_nice', 't2m_min_strasbourg', 't2m_strasbourg',
       't2m_max_strasbourg', 'prectot_strasbourg', 't2m_min_montpellier',
       't2m_montpellier', 't2m_max_montpellier', 'prectot_montpellier',
       'weighted_t2m', 'weighted_t2m_min', 'weighted_t2m_max',
       'weighted_prectot']

columns_features = ['weekday', 'month', 'week_number'] + columns_weather

In [4]:
column_target = 'daily_electrical_consumption'
X_train, y_train = dfp_train[columns_features], dfp_train[column_target]
X_test, y_test = dfp_test[columns_features], dfp_test[column_target]

In [10]:
def train_and_evaluate(params, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test):
    # the function gets a set of variable parameters in "param"
    params = {
        'n_estimators': int(params['n_estimators']),
        'criterion' : params['criterion'],
        'max_depth' : params['max_depth'],
        'min_samples_split' : params['min_samples_split'],
        'max_features' : params['max_features']
    }
    
    # we use this params to create a new LGBM Regressor
    model = RandomForestRegressor( **params)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    rmse = mean_squared_error(y_test, predictions, squared=False)
    return rmse

# Space definition
space={
    'n_estimators': hp.quniform('n_estimators', 2, 100, 1),
    'criterion' : hp.choice('criterion', ['mse', 'mae']),
    'max_depth' : hp.quniform('max_depth', 2, 100, 1),
    'min_samples_split' : hp.quniform('min_samples_split', 0.1, 1, 0.1),
    'max_features' : hp.choice('max_features', ['auto', 'sqrt', 'log2']),
}

# Trigger the search
trials = Trials()
best=fmin(fn=train_and_evaluate, # function to optimize
          space=space, 
          algo=tpe.suggest, # optimization algorithm, hyperotp will select its parameters automatically
          max_evals=100, # maximum number of iterations
          trials=trials, # logging
          rstate=np.random.RandomState(0) # fixing random state for the reproducibility
         )

100%|██████████| 100/100 [01:37<00:00,  1.02trial/s, best loss: 437670.39141869824]


In [13]:
best

{'criterion': 0,
 'max_depth': 47.0,
 'max_features': 0,
 'min_samples_split': 0.1,
 'n_estimators': 20.0}