In [None]:

# 1. Import Required Libraries

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

import joblib


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

# 2. Load the Dataset

df = pd.read_csv("/content/drive/MyDrive/DATA SCIENCE/energy_data.csv")

# View first few rows
df.head()


Unnamed: 0,Month,Temperature_C,Rainfall_mm,Household_Size,Appliance_Count,Electricity_Consumption_kWh
0,Jan,30,12,4,15,245
1,Feb,32,5,4,16,260
2,Mar,31,18,3,14,230
3,Apr,29,35,3,13,210
4,May,27,55,5,18,300


In [None]:

# 3. Data Preprocessing

# Encode Month using cyclical encoding (better than factorize)
df["Month"] = pd.factorize(df["Month"])[0] + 1

df["Month_sin"] = np.sin(2 * np.pi * df["Month"] / 12)
df["Month_cos"] = np.cos(2 * np.pi * df["Month"] / 12)

# Drop original Month column
df.drop(columns=["Month"], inplace=True)


In [None]:

# 4. Define Features and Target

X = df.drop(columns=["Electricity_Consumption_kWh"])
Y = df["Electricity_Consumption_kWh"]


In [None]:

# 5. Feature Scaling


scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


In [None]:

# 6. Train-Test Split


X_train, X_test, Y_train, Y_test = train_test_split(
    X_scaled,
    Y,
    test_size=0.35,
    random_state=42
)


In [None]:

# 7. Build and Tune Random Forest Model


rf = RandomForestRegressor(random_state=42)

param_grid = {
    "n_estimators": [200, 300],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5],
    "min_samples_leaf": [1, 2]
}

grid = GridSearchCV(
    rf,
    param_grid,
    cv=5,
    scoring="r2",
    n_jobs=-1
)

# Train model
grid.fit(X_train, Y_train)

# Best tuned model
best_model = grid.best_estimator_


 nan nan nan nan nan nan]


In [None]:

# 8. Model Evaluation


Y_pred = best_model.predict(X_test)

r2 = r2_score(Y_test, Y_pred)

print(f"R-squared (R²): {r2:.4f}")
print(f"Model Accuracy: {r2 * 100:.2f}%")


R-squared (R²): 0.8730
Model Accuracy: 87.30%
