In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Load the dataset
df = pd.read_csv("home_temperature_and_humidity_smoothed_filled.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

def create_features(df):
    df = df.copy()
    df['hour'] = df.index.hour
    df['dayofweek'] = df.index.dayofweek
    df['quarter'] = df.index.quarter
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['dayofyear'] = df.index.dayofyear
    df['dayofmonth'] = df.index.day
    df['weekofyear'] = df.index.isocalendar().week
    return df

df = create_features(df)

# Creating lag features for temperature and humidity of interest
df['temp_lag1'] = df['temperature_exterieur'].shift(1)
df['temp_lag2'] = df['temperature_exterieur'].shift(2)
df['humidity_lag1'] = df['humidity_exterieur'].shift(1)
df['humidity_lag2'] = df['humidity_exterieur'].shift(2)

df.dropna(inplace=True)


In [5]:
# Define features and target
features = ['temp_lag1', 'temp_lag2', 'humidity_lag1', 'humidity_lag2', 'hour', 'dayofweek', 'month', 'year', 'dayofyear', 'dayofmonth', 'weekofyear']
target_temp = 'temperature_exterieur'
target_humidity = 'humidity_exterieur'



X = df[features]
y_temp = df[target_temp]
y_humidity = df[target_humidity]



X_train, X_test, y_temp_train, y_temp_test = train_test_split(X, y_temp, test_size=0.2, shuffle=False)
X_train, X_test, y_humidity_train, y_humidity_test = train_test_split(X, y_humidity, test_size=0.2, shuffle=False)

# Initialize and train the Random Forest Regressor
rf_temp = RandomForestRegressor(n_estimators=100, random_state=42)
rf_humidity = RandomForestRegressor(n_estimators=100, random_state=42)

rf_temp.fit(X_train, y_temp_train)
rf_humidity.fit(X_train, y_humidity_train)

# Train SARIMA Model for Temperature
sarima_temp = SARIMAX(df['temperature_exterieur'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 7))
sarima_temp_fitted = sarima_temp.fit(disp=False)

# Train SARIMA Model for Humidity
sarima_humidity = SARIMAX(df['humidity_exterieur'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 7))
sarima_humidity_fitted = sarima_humidity.fit(disp=False)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [11]:
# Evaluate the Random Forest model
y_temp_pred = rf_temp.predict(X_test)
y_humidity_pred = rf_humidity.predict(X_test)

mae_temp = mean_absolute_error(y_temp_test, y_temp_pred)
mse_temp = mean_squared_error(y_temp_test, y_temp_pred)
mae_humidity = mean_absolute_error(y_humidity_test, y_humidity_pred)
mse_humidity = mean_squared_error(y_humidity_test, y_humidity_pred)

print(f"Random Forest - Temperature MAE: {mae_temp:.2f}, MSE: {mse_temp:.2f}")
print(f"Random Forest - Humidity MAE: {mae_humidity:.2f}, MSE: {mse_humidity:.2f}")

# Evaluate the SARIMA model
sarima_temp_pred = sarima_temp_fitted.get_forecast(steps=len(X_test)).predicted_mean
sarima_humidity_pred = sarima_humidity_fitted.get_forecast(steps=len(X_test)).predicted_mean

mae_sarima_temp = mean_absolute_error(y_temp_test, sarima_temp_pred)
mse_sarima_temp = mean_squared_error(y_temp_test, sarima_temp_pred)
mae_sarima_humidity = mean_absolute_error(y_humidity_test, sarima_humidity_pred)
mse_sarima_humidity = mean_squared_error(y_humidity_test, sarima_humidity_pred)

print(f"SARIMA - Temperature MAE: {mae_sarima_temp:.2f}, MSE: {mse_sarima_temp:.2f}")
print(f"SARIMA - Humidity MAE: {mae_sarima_humidity:.2f}, MSE: {mse_sarima_humidity:.2f}")


Random Forest - Temperature MAE: 0.06, MSE: 0.01
Random Forest - Humidity MAE: 0.34, MSE: 0.20
SARIMA - Temperature MAE: 17.16, MSE: 309.06
SARIMA - Humidity MAE: 38.43, MSE: 1613.50


In [8]:
# Predict next day's temperature and humidity using Random Forest
latest_data = df[features].iloc[-1].values.reshape(1, -1)

next_day_temp_rf = rf_temp.predict(latest_data)
next_day_humidity_rf = rf_humidity.predict(latest_data)

print(f'Random Forest - Predicted Temperature for Next Day: {next_day_temp_rf[0]:.2f}')
print(f'Random Forest - Predicted Humidity for Next Day: {next_day_humidity_rf[0]:.2f}')

# Predict next day's temperature and humidity using SARIMA
next_day_temp_sarima = sarima_temp_fitted.get_forecast(steps=1).predicted_mean
next_day_humidity_sarima = sarima_humidity_fitted.get_forecast(steps=1).predicted_mean

print(f'SARIMA - Predicted Temperature for Next Day: {next_day_temp_sarima.iloc[-1]:.2f}')
print(f'SARIMA - Predicted Humidity for Next Day: {next_day_humidity_sarima.iloc[-1]:.2f}')


Random Forest - Predicted Temperature for Next Day: 9.51
Random Forest - Predicted Humidity for Next Day: 75.10
SARIMA - Predicted Temperature for Next Day: 9.27
SARIMA - Predicted Humidity for Next Day: 76.89




### **CODE FOR RF MODEL SAVING**

In [12]:
import joblib

# Save the Random Forest models
joblib.dump(rf_temp, 'rf_temp_model.pkl')
joblib.dump(rf_humidity, 'rf_humidity_model.pkl')

print("Models saved successfully.")


Models saved successfully.


### **CODE FOR LOADING THE RF SAVED MODEL**

In [10]:
import joblib

# Load the Random Forest models
rf_temp = joblib.load('rf_temp_model.pkl')
rf_humidity = joblib.load('rf_humidity_model.pkl')

print("Models loaded successfully.")


Models loaded successfully.
