<a href="https://colab.research.google.com/github/alonsocampana/fire-montesinho/blob/main/Hyperparameter_tuning_classificators_accuracy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.8.1-py2.py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 9.3 MB/s eta 0:00:01
Collecting pyaml>=16.9
  Downloading pyaml-21.8.3-py2.py3-none-any.whl (17 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-21.8.3 scikit-optimize-0.8.1


In [None]:
import sys
sys.path.insert(1, './imports')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from exploratory_analysis import *
from preprocessing import *
from model_selection import *
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
import skopt
pd.options.display.max_rows = 30
from skopt.space import Integer
from skopt.space import Real
from skopt.space import Categorical
from skopt.utils import use_named_args
from skopt import gp_minimize
from skopt import forest_minimize

# Searching the model space for most accurate (jan-may)

In [None]:
search_space = list()
search_space.append(Real(1e-6, 100.0, 'log-uniform', name='C'))
search_space.append(Categorical(['rbf'], name='kernel'))
search_space.append(Integer(1, 5, name='degree'))
search_space.append(Real(1e-6, 100.0, 'log-uniform', name='gamma'))
@use_named_args(search_space)
def evaluate_model(**params):
	# configure the model with specific hyperparameters
	model = SVC()
	model.set_params(**params)
	# define test harness
	cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=5, random_state=3558)
	# calculate 5-fold cross validation
	result = cross_val_score(model, X, y, cv=cv, n_jobs=-1, scoring='accuracy')
	# calculate the mean of the scores
	estimate = np.mean(result)
	# convert from a maximizing score to a minimizing score
	return 1.0 - estimate

In [None]:
fires = pd.read_csv("fires_jan_may.csv")

In [None]:
X, y = fires.drop(["area", "area_bool", "index", "index.1", "index.2", 'Unnamed: 0', 'index'], axis=1), fires["area_bool"]

In [None]:
result = gp_minimize(evaluate_model, search_space)



In [None]:
print(result.x)
print(result.fun)

[100.0, 'rbf', 5, 0.002652859881404326]
0.29055555555555557


# Searching Regression with smaller loss

## Lasso

In [None]:
filter_nonzero = fires["area_bool"] == 1
X, y = fires[filter_nonzero].drop(["area", "area_bool", "index", "index.1", "index.2", 'Unnamed: 0', 'index'], axis=1), fires[filter_nonzero]["area"]

In [None]:
losses_lasso_df = hyperpar_grid_lasso(X, y, degrees = [1, 2, 3, 4])

In [None]:
losses_lasso_df

Unnamed: 0,0.100000,0.143845,0.206914,0.297635,0.428133,0.615848,0.885867,1.274275,1.832981,2.636651,3.792690,5.455595,7.847600,11.288379,16.237767,23.357215,33.598183,48.329302,69.519280,100.000000
1,280.532749,278.108662,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243
2,280.532749,278.108662,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243
3,280.532749,278.108662,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243
4,280.532749,278.108662,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243,277.926243


## Ridge

In [None]:
losses_ridge_df = hyperpar_grid_ridge(X, y, degrees = [1, 2, 3, 4])

In [None]:
losses_ridge_df

Unnamed: 0,0.100000,0.143845,0.206914,0.297635,0.428133,0.615848,0.885867,1.274275,1.832981,2.636651,3.792690,5.455595,7.847600,11.288379,16.237767,23.357215,33.598183,48.329302,69.519280,100.000000
1,388.470804,320.782362,279.571537,256.553774,246.07419,243.858677,246.481037,251.286181,256.488693,261.150673,264.960824,267.946515,270.25665,272.049249,273.450901,274.551311,275.411837,276.076932,276.582586,276.960276
2,300.646985,295.885875,290.318361,284.21451,278.019636,272.28135,267.529248,264.158257,262.351762,262.048662,262.957732,264.637804,266.634457,268.605841,270.369173,271.868846,273.117626,274.150806,275.002338,275.697703
3,281.708523,281.211956,280.532915,279.623202,278.4381,276.951131,275.175381,273.186299,271.134916,269.238423,267.741554,266.854679,266.686652,267.20068,268.223895,269.516716,270.862365,272.121267,273.232425,274.185145
4,274.99959,274.899425,274.758012,274.559954,274.285666,273.911752,273.413048,272.767552,271.965307,271.021133,269.988411,268.967631,268.101407,267.549417,267.443389,267.835155,268.666347,269.787318,271.019644,272.219692


## Gradient boost regressor with polynomial features

In [None]:
hyper_opt_gbr(X, y)

{'deg': 3, 'loss': 227.0846601173799, 'lr': 0.1875, 'n_estimators': 20}