<a href="https://colab.research.google.com/github/italomarcelogit/auto-sklearn_regressao/blob/main/auto_sklearn_temperatura_minima_australia.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Libs**

In [1]:
import pandas as pd
import numpy as np
import sklearn.metrics
from datetime import datetime
!pip install --upgrade -q pip
!pip install --upgrade -q scipy==1.7.1
!pip install -q auto-sklearn
import plotly.graph_objects as go



**IMPORTANTE:**
Após as importações e instalações anteriores, reinicie o ambiente

In [2]:
import autosklearn.regression

# **Dataset**
*A fonte dos dados é creditada como Australian Bureau of Meteorology.*

*The source of the data is credited to the Australian Bureau of Meteorology.*

In [3]:
#  A fonte dos dados é creditada como Australian Bureau of Meteorology.
#  The source of the data is credited to the Australian Bureau of Meteorology.
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv'
df = pd.read_csv(url)
df.columns = ['ds', 'y']
df.head()

Unnamed: 0,ds,y
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8


# **Pre-Processing**

In [4]:
df2 = df.copy()
# diaAno == day of year from ds
df2['diaAno'] = df2.ds.apply(lambda x: pd.to_datetime([x]).dayofyear[0])
# ano == year from ds
df2['ano'] = df2.ds.apply(lambda x: pd.to_datetime([x]).year[0])
df2.pop('ds')
df2.head()

Unnamed: 0,y,diaAno,ano
0,20.7,1,1981
1,17.9,2,1981
2,18.8,3,1981
3,14.6,4,1981
4,15.8,5,1981


In [5]:
df2.ano.min(), df2.ano.max()

(1981, 1990)

In [6]:
ano = 1990
Xtreino = df2[df2.ano<ano][['diaAno', 'ano']]
ytreino = df2[df2.ano<ano]['y']
Xteste = df2[df2.ano>=ano][['diaAno', 'ano']]
yteste = df2[df2.ano>=ano]['y']

# **Model Auto-Sklearn**

In [7]:
automl = autosklearn.regression.AutoSklearnRegressor(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    tmp_folder='/content/your_directory',
)

In [8]:
automl.fit(Xtreino, ytreino)

AutoSklearnRegressor(per_run_time_limit=30, time_left_for_this_task=120,
                     tmp_folder='/content/your_directory')

In [9]:
automl.leaderboard()

Unnamed: 0_level_0,rank,ensemble_weight,type,cost,duration
model_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
14,1,0.52,extra_trees,0.382564,2.652869
11,2,0.28,random_forest,0.388911,3.964741
8,3,0.16,mlp,0.487689,2.602429
10,4,0.04,sgd,0.932299,0.703084


In [10]:
automl.show_models()

"[(0.520000, SimpleRegressionPipeline({'data_preprocessing:categorical_transformer:categorical_encoding:__choice__': 'no_encoding', 'data_preprocessing:categorical_transformer:category_coalescence:__choice__': 'no_coalescense', 'data_preprocessing:numerical_transformer:imputation:strategy': 'most_frequent', 'data_preprocessing:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:__choice__': 'polynomial', 'regressor:__choice__': 'extra_trees', 'feature_preprocessor:polynomial:degree': 2, 'feature_preprocessor:polynomial:include_bias': 'False', 'feature_preprocessor:polynomial:interaction_only': 'True', 'regressor:extra_trees:bootstrap': 'False', 'regressor:extra_trees:criterion': 'mse', 'regressor:extra_trees:max_depth': 'None', 'regressor:extra_trees:max_features': 0.9797793053686011, 'regressor:extra_trees:max_leaf_nodes': 'None', 'regressor:extra_trees:min_impurity_decrease': 0.0, 'regressor:extra_trees:min_samples_leaf': 1, 'regressor:extra_trees:min_samples

# **Prediction**

In [11]:
label_treino = automl.predict(Xtreino)
label_teste = automl.predict(Xteste)

In [12]:
print(f"R2 score - treino: {sklearn.metrics.r2_score(ytreino, label_treino)}",
f"\nR2 score - teste: {sklearn.metrics.r2_score(yteste, label_teste)}")

R2 score - treino: 0.8081553262474542 
R2 score - teste: 0.529400122822655


# **Dataviz**

In [14]:
vdc = []
for x in Xteste.index:
  a = Xteste.loc[x]['ano']
  da = Xteste.loc[x]['diaAno']
  dc = datetime.strptime(f'{a}' + "-" + f'{da}', "%Y-%j").strftime("%d-%m-%Y")
  vdc.append(dc)

previsao = pd.DataFrame()
previsao['Realizado'] = yteste.to_list()
previsao['Previsão'] = label_teste.tolist()
previsao['Período'] = vdc

In [22]:
fig = go.Figure()
fig.add_trace(go.Bar(x=previsao['Período'], y=previsao['Realizado'],
                name='Realizado', marker_color='lightsalmon'))

fig.add_trace(go.Scatter(x=previsao['Período'], y=previsao['Previsão'],
                         mode='lines+markers',
                         name='Previsão'))
fig.update_layout(title='AutoML - Prever temperatura mínima',
                  xaxis_title='Período',
                  yaxis_title='Realizado',
                  template='plotly_white',
                  xaxis = dict(tickangle=80)
                  )
fig.show()