In [1]:
import os

os.listdir("../models")


['clean_model.pkl']

In [2]:
import pandas as pd
import os
import joblib
import numpy as np

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


In [3]:
FEATURE_DIR = "../data/processed_features"
MODEL_DIR = "../models"
RESULTS_DIR = "../results"

os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)


In [4]:
feature_files = [
    f for f in os.listdir(FEATURE_DIR)
    if f.endswith("_features.csv")
]

len(feature_files), feature_files[:5]


(34,
 ['Andhra Pradesh_features.csv',
  'Arunachal Pradesh_features.csv',
  'Assam_features.csv',
  'Bihar_features.csv',
  'Chandigarh_features.csv'])

In [7]:
results = []

for file in feature_files:
    state_name = file.replace("_features.csv", "")
    file_path = os.path.join(FEATURE_DIR, file)

    df = pd.read_csv(file_path)

    # Separate features and target
    X = df.drop(columns=["date", "load"])
    y = df["load"]

    # Time-based split
    split_idx = int(len(df) * 0.8)
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

    # Train model
    model = RandomForestRegressor(
        n_estimators=200,
        random_state=42,
        n_jobs=-1
    )
    model.fit(X_train, y_train)

    # Evaluate
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    # Save model
    model_path = os.path.join(MODEL_DIR, f"{state_name}_model.pkl")
    joblib.dump(model, model_path)

    results.append({
        "State": state_name,
        "RMSE": rmse
    })

    print(f"✅ {state_name} trained | RMSE: {rmse:.2f}")


✅ Andhra Pradesh trained | RMSE: 8.57
✅ Arunachal Pradesh trained | RMSE: 0.18
✅ Assam trained | RMSE: 3.01
✅ Bihar trained | RMSE: 9.78
✅ Chandigarh trained | RMSE: 0.59
✅ Chhattisgarh trained | RMSE: 5.60
✅ clean trained | RMSE: 9.63
✅ Dadra and Nagar Haveli trained | RMSE: 1.48
✅ Delhi trained | RMSE: 10.31
✅ Goa trained | RMSE: 1.07
✅ Gujarat trained | RMSE: 21.96
✅ Haryana trained | RMSE: 19.50
✅ Himachal Pradesh trained | RMSE: 2.74
✅ Jammu and Kashmir trained | RMSE: 5.31
✅ Jharkhand trained | RMSE: 1.49
✅ Karnataka trained | RMSE: 13.58
✅ Kerala trained | RMSE: 3.56
✅ Madhya Pradesh trained | RMSE: 17.33
✅ Maharashtra trained | RMSE: 20.14
✅ Manipur trained | RMSE: 0.17
✅ Meghalaya trained | RMSE: 0.44
✅ Mizoram trained | RMSE: 0.14
✅ Nagaland trained | RMSE: 0.13
✅ Odisha trained | RMSE: 6.98
✅ Puducherry trained | RMSE: 0.69
✅ Punjab trained | RMSE: 19.08
✅ Rajasthan trained | RMSE: 18.67
✅ Sikkim trained | RMSE: 0.23
✅ Tamil Nadu trained | RMSE: 20.69
✅ Telangana trained | R

In [8]:
results_df = pd.DataFrame(results)
results_df.sort_values("RMSE", inplace=True)
results_df.reset_index(drop=True, inplace=True)

results_df


Unnamed: 0,State,RMSE
0,Nagaland,0.134266
1,Mizoram,0.142264
2,Manipur,0.166
3,Arunachal Pradesh,0.178267
4,Sikkim,0.227369
5,Meghalaya,0.440135
6,Tripura,0.529516
7,Chandigarh,0.588478
8,Puducherry,0.69109
9,Goa,1.069062


In [9]:
results_df.to_csv(
    os.path.join(RESULTS_DIR, "model_performance.csv"),
    index=False
)


In [10]:
os.listdir(MODEL_DIR)


['Andhra Pradesh_model.pkl',
 'Arunachal Pradesh_model.pkl',
 'Assam_model.pkl',
 'Bihar_model.pkl',
 'Chandigarh_model.pkl',
 'Chhattisgarh_model.pkl',
 'clean_model.pkl',
 'Dadra and Nagar Haveli_model.pkl',
 'Delhi_model.pkl',
 'Goa_model.pkl',
 'Gujarat_model.pkl',
 'Haryana_model.pkl',
 'Himachal Pradesh_model.pkl',
 'Jammu and Kashmir_model.pkl',
 'Jharkhand_model.pkl',
 'Karnataka_model.pkl',
 'Kerala_model.pkl',
 'Madhya Pradesh_model.pkl',
 'Maharashtra_model.pkl',
 'Manipur_model.pkl',
 'Meghalaya_model.pkl',
 'Mizoram_model.pkl',
 'Nagaland_model.pkl',
 'Odisha_model.pkl',
 'Puducherry_model.pkl',
 'Punjab_model.pkl',
 'Rajasthan_model.pkl',
 'Sikkim_model.pkl',
 'Tamil Nadu_model.pkl',
 'Telangana_model.pkl',
 'Tripura_model.pkl',
 'Uttar Pradesh_model.pkl',
 'Uttarakhand_model.pkl',
 'West Bengal_model.pkl']

In [11]:
pd.read_csv("../results/model_performance.csv").head()

Unnamed: 0,State,RMSE
0,Nagaland,0.134266
1,Mizoram,0.142264
2,Manipur,0.166
3,Arunachal Pradesh,0.178267
4,Sikkim,0.227369
