# Modelo de Machine Learning - Random Forest 

### Import the libraries

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, ConfusionMatrixDisplay, accuracy_score

### Leitura do dataset 

In [None]:
df = pd.read_csv('../../../datasets/parte1/dataset_cleaned.csv')

### Criação da variavel que queremos prever 

In [None]:
X = df.drop(['EncodedRainTomorrow'], axis=1)
y = df['EncodedRainTomorrow']

### Criação da Random Forest

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2023)

In [None]:
model = RandomForestClassifier(random_state=2023)

In [None]:
# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'class_weight': [None, 'balanced']
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, refit=True, verbose=2, scoring="f1")
grid_search.fit(X_train, y_train)

model = grid_search.best_estimator_

In [None]:
grid_search.best_params_

### Cálculo das predictions

In [None]:
predictions = model.predict(X_test)

### Accuracy do modelo 

In [None]:
print("%0.2f accuracy" % (accuracy_score(y_test, predictions)))

### Classification Report

In [None]:
print(classification_report(y_test,predictions))

### Matriz de Confusão

In [None]:
ConfusionMatrixDisplay.from_predictions(y_test, predictions)
plt.show()