In [1]:
import io
import random
import requests
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.svm import SVR
from sklearn.svm import LinearSVR
from scipy.stats import loguniform, uniform
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV

In [2]:
import warnings
warnings.filterwarnings("ignore")

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00514/Bias_correction_ucl.csv"
s = requests.get(url).content
df = pd.read_csv(io.StringIO(s.decode('utf-8')))

In [4]:
random.seed(42)

df.drop(columns=['Next_Tmin', 'Date'], inplace=True)
df.dropna(inplace=True)

SC = StandardScaler()
targets = df.iloc[:, 22:23]
X = df.iloc[:, 0:22].values
X = SC.fit_transform(X)


print('X:', X.shape)
print('targets:', targets.shape)

X: (7588, 22)
targets: (7588, 1)


In [7]:
results = {}
results['lr'] = {}
validation = cross_val_score(LinearRegression(), X, y, scoring='neg_root_mean_squared_error')
results['lr']['deafult'] = np.round(np.min(-validation),3) # melhor resultado

print('Linear Regression:', results['lr']['deafult'])


Linear Regression: 1.454


### MLP 

In [5]:
from sklearn.neural_network import MLPRegressor

In [7]:
MLP_params = {
    'hidden_layer_sizes':(5,8,11,14,17,20,23)
    }

In [8]:
rnd_search = RandomizedSearchCV(MLPRegressor(), MLP_params, n_iter =10)
search = rnd_search .fit(X, targets)

validacao_MLP = cross_val_score(MLPRegressor(), X, targets, scoring='neg_root_mean_squared_error')


In [10]:
print("Melhor k", rnd_search.best_params_)
print("Melhor score", rnd_search.best_score_)
print("default_score", np.round(np.min(-validacao_MLP ),3))

Melhor k {'hidden_layer_sizes': 23}
Melhor score 0.3900987189951331
default_score 1.899


### Arvore de decisão

In [12]:
from sklearn.tree import DecisionTreeRegressor

In [13]:
dtree_params = {
    'ccp_alpha':[random.uniform(0.0, 0.4) for i in range(10)]
    }

In [17]:
rnd_search = RandomizedSearchCV(DecisionTreeRegressor(), dtree_params, n_iter =10)
search = rnd_search.fit(X, targets)

validacao_DT = cross_val_score(DecisionTreeRegressor(), X, targets, scoring='neg_root_mean_squared_error')


In [18]:
print("Melhor k", rnd_search.best_params_)
print("Melhor score", rnd_search.best_score_)
print("default_score", np.round(np.min(-validacao_DT ),3))

Melhor k {'ccp_alpha': 0.034775533051766463}
Melhor score 0.6171919580564847
default_score 2.162


In [19]:
### Random Forest

In [21]:
from sklearn.ensemble import RandomForestRegressor

In [22]:
rf_params = {
    'n_estimators':[10, 100, 1000],
    'max_features':[5, 10, 22]
    }

In [None]:
rnd_search = RandomizedSearchCV(RandomForestRegressor(), rf_params, n_iter =10)
search = rnd_search.fit(X, targets)

validacao_RF = cross_val_score(RandomForestRegressor(), X, targets, scoring='neg_root_mean_squared_error')


In [None]:
print("Melhor k", rnd_search.best_params_)
print("Melhor score", rnd_search.best_score_)
print("default_score", np.round(np.min(-validacao_RF),3))

### GBM

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

In [None]:
gbm_params = {
    'n_estimators': np.random.randint(5, 100, 10),
    'max_features':[random.uniform(0.01, 0.3) for i in range(10)],
    'max_depth':[2, 3]
    }

In [None]:
rnd_search = RandomizedSearchCV(GradientBoostingRegressor(), rf_params, n_iter =10)
search = rnd_search.fit(X, targets)

validacao_GBM = cross_val_score(GradientBoostingRegressor(), X, targets, scoring='neg_root_mean_squared_error')


In [None]:
print("Melhor k", rnd_search.best_params_)
print("Melhor score", rnd_search.best_score_)
print("default_score", np.round(np.min(-validacao_GBM),3))