In [None]:
# ============================================================
# âœ… ENERGY USAGE PREDICTION MODEL TRAINING (GOOGLE DRIVE)
# Dataset from Drive â†’ Train ML Model â†’ Save Model in Drive
# ============================================================

# âœ… STEP 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# ============================================================

# âœ… STEP 2: Import Required Libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import joblib

# ============================================================

# âœ… STEP 3: Load Dataset from Google Drive

# ðŸ”¥ Change this path according to where your file is saved
dataset_path = "/content/drive/MyDrive/P2BL_ML/Chennai_Home_Energy_Usage_Dataset.csv"

df = pd.read_csv(dataset_path)

print("âœ… Dataset Loaded Successfully!")
print("Dataset Shape:", df.shape)

# ============================================================

# âœ… STEP 4: Convert Timestamp into Features

df["Timestamp"] = pd.to_datetime(df["Timestamp"])

df["Hour"] = df["Timestamp"].dt.hour
df["Day"] = df["Timestamp"].dt.day
df["Month"] = df["Timestamp"].dt.month

# Drop Timestamp column
df = df.drop("Timestamp", axis=1)

print("\nâœ… Timestamp Converted into Hour, Day, Month")

# ============================================================

# âœ… STEP 5: Define Input Features (X) and Target (y)

X = df.drop("TotalEnergy_kWh", axis=1)
y = df["TotalEnergy_kWh"]

print("\nâœ… Features and Target Defined Successfully!")

# ============================================================

# âœ… STEP 6: Split Dataset into Train and Test

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

print("\nâœ… Data Split Done!")
print("Training Samples:", X_train.shape[0])
print("Testing Samples :", X_test.shape[0])

# ============================================================

# âœ… STEP 7: Train Random Forest Regressor Model

model = RandomForestRegressor(
    n_estimators=200,
    random_state=42
)

model.fit(X_train, y_train)

print("\nâœ… Model Training Completed!")

# ============================================================

# âœ… STEP 8: Model Prediction

y_pred = model.predict(X_test)

print("\nâœ… Prediction Done!")

# ============================================================

# âœ… STEP 9: Evaluate Model Performance

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("\nðŸ“Œ MODEL EVALUATION RESULTS")
print("===================================")
print("MAE  :", mae)
print("RMSE :", rmse)
print("RÂ² Score:", r2)
print("===================================")

# ============================================================

# âœ… STEP 10: Save Trained Model into Google Drive

model_path = "/content/drive/MyDrive/P2BL_ML/Energy_Usage_Prediction_Model.pkl"

joblib.dump(model, model_path)

print("\nâœ… Model Saved Successfully in Google Drive!")
print("ðŸ“Œ Saved Location:", model_path)

# ============================================================

print("\nðŸŽ‰ ALL DONE SUCCESSFULLY! Your Dataset is Trained and Model is Stored in Drive.")
