# Random Forest

### Imports

In [346]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder


### dataset

In [347]:
data = pd.read_csv('weather.csv')

df = pd.DataFrame(data.values, columns=data.columns)

# Use o LabelEncoder para codificar variáveis categóricas
label_encoder = LabelEncoder()
for col in ["outlook", "temperature", "humidity", "windy"]:
    df[col] = label_encoder.fit_transform(df[col])

# Separe a variável de destino das features
x = df.drop(columns=["play"])
y = df["play"]

df


Unnamed: 0,outlook,temperature,humidity,windy,play
0,2,1,0,0,no
1,2,1,0,1,no
2,0,1,0,0,yes
3,1,2,0,0,yes
4,1,0,1,0,yes
5,1,0,1,1,no
6,0,0,1,1,yes
7,2,2,0,0,no
8,2,0,1,0,yes
9,1,2,1,0,yes


### divide data 

In [348]:
# Divida os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

X_train


Unnamed: 0,outlook,temperature,humidity,windy
12,0,1,1,0
5,1,0,1,1
8,2,0,1,0
2,0,1,0,0
1,2,1,0,1
13,1,2,0,1
4,1,0,1,0
7,2,2,0,0
10,2,2,1,1
3,1,2,0,0


### Build Radom Forest Model with hyperparameters

In [349]:
# Defina os hiperparâmetros que deseja otimizar usando o GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [350]:

# Crie o modelo Random Forest
random_forest = RandomForestClassifier(random_state=42)

# Use GridSearchCV para encontrar os melhores hiperparâmetros
grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid, cv=4, scoring='accuracy')
grid_search.fit(X_train, y_train)


### Build model with best hiperparams

In [351]:
# Obtenha os melhores hiperparâmetros
best_params = grid_search.best_params_
print("Best parameters: ")
print(best_params)

# Crie um modelo com os melhores hiperparâmetros
best_random_forest = RandomForestClassifier(**best_params, random_state=42)

# Treine o modelo no conjunto de treinamento
best_random_forest.fit(X_train, y_train)


Best parameters: 
{'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 50}


### Predict

In [352]:
y_pred = best_random_forest.predict([[1,0,  1, 1]])
print('prevision:' +  y_pred)

['prevision:yes']


