In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)
n = 10000

fans = np.random.poisson(2, n) + np.random.randint(0, 3, n).clip(0, 5)
lights = np.random.poisson(8, n) + np.random.randint(0, 5, n).clip(0, 15)
fridge = np.random.choice([0, 1, 2], n, p=[0.1, 0.8, 0.1])
tv = np.random.choice([0, 1, 2], n, p=[0.2, 0.7, 0.1])
ac = np.random.choice([0, 1, 2], n, p=[0.6, 0.35, 0.05])
water_heater = np.random.choice([0, 1], n, p=[0.7, 0.3])
washing_machine = np.random.choice([0, 1], n, p=[0.6, 0.4])
microwave = np.random.choice([0, 1], n, p=[0.75, 0.25])

num_family_members = np.random.randint(1, 8, size=n)
house_size = np.random.randint(300, 4000, size=n)
num_rooms = np.random.randint(1, 10, size=n)

voltage = (
    220
    + 0.5 * (fans + lights)
    + 2 * (fridge + tv)
    + 5 * (ac + water_heater)
    + np.random.normal(0, 8, n)
).clip(200, 250)

base_bill = 250
kwh_per_month = (
    10 * fans +
    8 * lights +
    40 * fridge +
    20 * tv +
    400 * ac +
    200 * water_heater +
    40 * washing_machine +
    15 * microwave
)

bill = base_bill + kwh_per_month * 7 + np.random.normal(0, 30, n)
bill = bill.clip(250, None)

df = pd.DataFrame({
    "fans": fans,
    "lights": lights,
    "fridge": fridge,
    "tv": tv,
    "ac": ac,
    "water_heater": water_heater,
    "washing_machine": washing_machine,
    "microwave": microwave,
    "num_family_members": num_family_members,
    "house_size": house_size,
    "num_rooms": num_rooms,
    "voltage": voltage,
    "electricity_bill": bill,
})

csv_path = r"C:\Users\ANTO CHARLES\Downloads\appliance_dataset_fixed.csv"
df.to_csv(csv_path, index=False)

print("Saved:", csv_path)
basic = df[(df["ac"] == 0) & (df["water_heater"] == 0)]
print("Basic (no AC/geyser) avg bill:", basic["electricity_bill"].mean())


Saved: C:\Users\ANTO CHARLES\Downloads\appliance_dataset_fixed.csv
Basic (no AC/geyser) avg bill: 1568.6598292680908


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib, os

csv_path = r"C:\Users\ANTO CHARLES\Downloads\appliance_dataset_fixed.csv"
df = pd.read_csv(csv_path)

feature_cols = [
    "fans", "lights", "fridge", "tv", "ac", "water_heater",
    "washing_machine", "microwave", "num_family_members",
    "house_size", "num_rooms",
]

X = df[feature_cols]
y_voltage = df["voltage"]
y_bill = df["electricity_bill"]

X_train_v, X_test_v, y_train_v, y_test_v = train_test_split(
    X, y_voltage, test_size=0.2, random_state=42
)
model_voltage = RandomForestRegressor(n_estimators=200, max_depth=15, random_state=42)
model_voltage.fit(X_train_v, y_train_v)

X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(
    X, y_bill, test_size=0.2, random_state=42
)
model_bill = RandomForestRegressor(n_estimators=200, max_depth=15, random_state=42)
model_bill.fit(X_train_b, y_train_b)

models_dir = r"C:\Users\ANTO CHARLES\models"
os.makedirs(models_dir, exist_ok=True)

voltage_model_path = os.path.join(models_dir, "fixed_voltage_model.pkl")
bill_model_path = os.path.join(models_dir, "fixed_bill_model.pkl")

joblib.dump(model_voltage, voltage_model_path)
joblib.dump(model_bill, bill_model_path)

print("Voltage model:", voltage_model_path)
print("Bill model:   ", bill_model_path)


Voltage model: C:\Users\ANTO CHARLES\models\fixed_voltage_model.pkl
Bill model:    C:\Users\ANTO CHARLES\models\fixed_bill_model.pkl


In [14]:
# ================== ACCURACY METRICS ==================

from sklearn.metrics import mean_absolute_error

# Voltage model predictions
y_pred_v = model_voltage.predict(X_test_v)

r2_v = r2_score(y_test_v, y_pred_v)
rmse_v = mean_squared_error(y_test_v, y_pred_v, squared=False)
mae_v = mean_absolute_error(y_test_v, y_pred_v)

# Bill model predictions
y_pred_b = model_bill.predict(X_test_b)

r2_b = r2_score(y_test_b, y_pred_b)
rmse_b = mean_squared_error(y_test_b, y_pred_b, squared=False)
mae_b = mean_absolute_error(y_test_b, y_pred_b)

print("\n" + "="*60)
print("SMARTBILL MODEL ACCURACY RESULTS")
print("="*60)


print(f"\nBill Model (test size = {len(X_test_b)}):")
print(f"  R² Score : {r2_b:.4f}")
print(f"  RMSE     : ₹{rmse_b:.2f}")
print(f"  MAE      : ₹{mae_b:.2f}")



SMARTBILL MODEL ACCURACY RESULTS

Bill Model (test size = 2000):
  R² Score : 0.9986
  RMSE     : ₹67.20
  MAE      : ₹48.25
