In [2]:
# ---------------------------------------------------------
# Modelagem Explicativa
# ---------------------------------------------------------
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

df = pd.read_csv("../data/processed/air_quality_clean.csv")
print(f"✅ Dataset carregado: {df.shape}")

# Variáveis
X = df[['Temperature','Humidity','Wind Speed','Month']]
y = df['Pollution_Index']

X = pd.get_dummies(X, columns=['Month'], drop_first=True)

# Regressão Linear
X_const = sm.add_constant(X)
model = sm.OLS(y, X_const.astype(float)).fit()
print(model.summary())

# Random Forest
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

print(f"\n🌳 Random Forest:")
print(f"R²: {r2_score(y_test, y_pred):.3f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.3f}")


✅ Dataset carregado: (5203, 16)
                            OLS Regression Results                            
Dep. Variable:        Pollution_Index   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                    0.5980
Date:                Tue, 21 Oct 2025   Prob (F-statistic):              0.869
Time:                        17:39:20   Log-Likelihood:                -21725.
No. Observations:                5203   AIC:                         4.348e+04
Df Residuals:                    5188   BIC:                         4.358e+04
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          61.