In [None]:
# Model Training

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
import json
import matplotlib.pyplot as plt

# Load dataset
data = pd.read_csv('../data/energy_consumption_features.csv', index_col='timestamp', parse_dates=True)

# Features and labels
X = data.drop('energy_consumption', axis=1)
y = data['energy_consumption']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Model
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

# Save metrics
metrics = {'MAE': mae, 'MSE': mse, 'RMSE': rmse, 'R2': r2}
with open('../outputs/metrics/model_performance.json', 'w') as f:
    json.dump(metrics, f)

# Save model
joblib.dump(model, '../models/final_model.pkl')

# Plot predicted vs actual
plt.figure(figsize=(15,5))
plt.plot(y_test.index, y_test, label='Actual')
plt.plot(y_test.index, y_pred, label='Predicted')
plt.legend()
plt.title('Actual vs Predicted Energy Consumption')
plt.savefig('../outputs/plots/prediction_vs_actual.png')
plt.show()