# Random Forest

In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from datetime import datetime

In [2]:
# Load dataset
df = pd.read_csv("data/interpolated_co2.csv")

In [3]:

# Convert 'date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Sort by date
df = df.sort_values(by='Date')

In [5]:
# Target column and feature engineering
target_col = 'CO2'

In [8]:
# Create time-based features
df['year'] = df['Date'].dt.year
df['month'] = df['Date'].dt.month
df['day'] = df['Date'].dt.day
df['dayofyear'] = df['Date'].dt.dayofyear
df['week'] = df['Date'].dt.isocalendar().week
df['weekday'] = df['Date'].dt.weekday

In [9]:
# Define features
features = ['year', 'month', 'day', 'dayofyear', 'week', 'weekday']

In [10]:

# Train-test split
train_df = df[df['Date'] <= '2024-12-31']
test_df = df[(df['Date'] >= '2025-01-01') & (df['Date'] <= '2025-04-13')]

X_train = train_df[features]
y_train = train_df[target_col]

X_test = test_df[features]
y_test = test_df[target_col]

In [11]:
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

In [16]:
# Evaluation metrics
rmse = np.sqrt(mean_squared_error(y_test, y_pred))  # RMSE
mae = mean_absolute_error(y_test, y_pred)           # MAE
accuracy = 100 - (np.mean(np.abs((y_test - y_pred) / y_test)) * 100)  # Accuracy %

print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"Accuracy: {accuracy:.2f}%")

RMSE: 3.2161
MAE: 3.0381
Accuracy: 99.29%


In [15]:
# Create predictions DataFrame
pred_df = pd.DataFrame({
    'Date': test_df['Date'].dt.strftime('%Y-%m-%d'),
    'Actual': y_test.values,
    'Predicted': y_pred
})

# Save to Predictions folder
os.makedirs("Predictions", exist_ok=True)
pred_df.to_csv("Predictions/RandomForest_Predictions.csv", index=False)