In [None]:
import pandas as pd
import numpy as np

url = "https://raw.githubusercontent.com/sharad00004/AI-ML-HACKATHON-Datasets/main/electricity_bill_dataset.csv"
df = pd.read_csv(url)
df.head()

# DATA PRE-PROCESSING

Checking for NULL values

In [None]:
df.isnull().sum()

Checking for duplicates

In [None]:
df.duplicated().sum()

# Data Visualization

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

appliances = ['Fan', 'Refrigerator', 'AirConditioner', 'Television', 'Monitor', 'MotorPump']
df[appliances].hist(bins=15, figsize=(12, 8), layout=(2, 3))
plt.suptitle('Distribution of Appliance Counts (Raw Features)')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(data=df, x='City', y='ElectricityBill')
plt.title('Electricity Bill Distribution by City')
plt.xticks(rotation=45)
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.boxplot(data=df, x='Month', y='ElectricityBill')
plt.title('Monthly Electricity Bill Trend')
plt.xlabel('Month')
plt.ylabel('Electricity Bill')
plt.show()

In [None]:
# Calculate the average electricity bill per city
city_avg = df.groupby('City')['ElectricityBill'].mean().reset_index()
city_avg.rename(columns={'ElectricityBill': 'AvgElectricityBill'}, inplace=True)

# Display the calculated averages
display(city_avg.head())

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import json

# Loading GeoJSON
with open("/content/states_india.geojson", "r") as f:
    india_states = json.load(f)

# Extract all state names from geojson
all_states = [feature['properties']['st_nm'] for feature in india_states['features']]
full_state_df = pd.DataFrame({'State': all_states})

# city → state mapping
city_to_state = {
    'Hyderabad': 'Telangana',
    'Vadodara': 'Gujarat',
    'Shimla': 'Himachal Pradesh',
    'Mumbai': 'Maharashtra',
    'Ratnagiri': 'Maharashtra',
    'New Delhi': 'Delhi',
    'Dahej': 'Gujarat',
    'Ahmedabad': 'Gujarat',
    'Noida': 'Uttar Pradesh',
    'Nagpur': 'Maharashtra',
    'Chennai': 'Tamil Nadu',
    'Faridabad': 'Haryana',
    'Kolkata': 'West Bengal',
    'Pune': 'Maharashtra',
    'Gurgaon': 'Haryana',
    'Navi Mumbai': 'Maharashtra'
}

# Map city to state (assuming df is your original dataframe with City and ElectricityBill columns)
df['State'] = df['City'].map(city_to_state)

#Group by state to compute average electricity bill
state_avg = df.groupby('State')['ElectricityBill'].mean().reset_index()
state_avg.columns = ['State', 'AvgElectricityBill']

#Merge with full list of states
merged_df = full_state_df.merge(state_avg, on='State', how='left')

#Fill NaN values with 0 for states without data
merged_df['AvgElectricityBill'] = merged_df['AvgElectricityBill'].fillna(0)

# Create a status column to differentiate between states with and without data
merged_df['HasData'] = merged_df['AvgElectricityBill'] > 0

# Create custom hover text
merged_df['HoverText'] = merged_df.apply(
    lambda row: f"{row['State']}<br>Avg Bill: ₹{row['AvgElectricityBill']:.2f}"
    if row['HasData'] else f"{row['State']}<br>No data available",
    axis=1
)

#Create the choropleth map
fig = go.Figure()

#Add states with data
states_with_data = merged_df[merged_df['HasData']]
if not states_with_data.empty:
    fig.add_trace(go.Choropleth(
        geojson=india_states,
        featureidkey="properties.st_nm",
        locations=states_with_data['State'],
        z=states_with_data['AvgElectricityBill'],
        colorscale="YlOrRd",
        showscale=True,
        colorbar=dict(title="Avg Electricity Bill (₹)"),
        hovertemplate="%{hovertext}<extra></extra>",
        hovertext=states_with_data['HoverText'],
        marker_line_color='black',
        marker_line_width=0.6,
        name="States with Data"
    ))

#Add states without data (gray)
states_without_data = merged_df[~merged_df['HasData']]
if not states_without_data.empty:
    fig.add_trace(go.Choropleth(
        geojson=india_states,
        featureidkey="properties.st_nm",
        locations=states_without_data['State'],
        z=[1] * len(states_without_data),  # Use constant value for gray coloring
        colorscale=[[0, 'lightgray'], [1, 'lightgray']],
        showscale=False,
        hovertemplate="%{hovertext}<extra></extra>",
        hovertext=states_without_data['HoverText'],
        marker_line_color='black',
        marker_line_width=0.6,
        name="States without Data"
    ))

# Update layout
fig.update_geos(
    fitbounds="locations",
    visible=False,
    projection_type="natural earth"
)

fig.update_layout(
    title={
        'text': "Average Electricity Bill by Indian State",
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 16}
    },
    margin={"r": 0, "t": 60, "l": 0, "b": 0},
    showlegend=False,
    width=800,
    height=600
)

fig.show()

# Optional: Print summary statistics
print("Summary Statistics:")
print(f"Total states in India: {len(all_states)}")
print(f"States with data: {len(states_with_data)}")
print(f"States without data: {len(states_without_data)}")
print(f"\nStates with data: {', '.join(states_with_data['State'].tolist())}")
print(f"\nStates without data: {', '.join(states_without_data['State'].tolist())}")

# Alternative approach using plotly express with custom color handling
# This is a simpler approach if you prefer px over go

def create_map_with_px():
    # Create a copy for PX approach
    merged_df_px = merged_df.copy()

    # Set a minimum value for visualization (states without data will show as this minimum)
    min_bill = merged_df_px[merged_df_px['AvgElectricityBill'] > 0]['AvgElectricityBill'].min()
    merged_df_px.loc[merged_df_px['AvgElectricityBill'] == 0, 'AvgElectricityBill'] = min_bill * 0.1

    # Create custom color scale that makes the minimum value gray
    fig_px = px.choropleth(
        merged_df_px,
        geojson=india_states,
        featureidkey="properties.st_nm",
        locations='State',
        color='AvgElectricityBill',
        color_continuous_scale=[(0, 'lightgray'), (0.1, 'lightgray'), (0.1, '#FFF5F0'), (1, '#8B0000')],
        title="Average Electricity Bill by Indian State (Alternative Approach)",
        hover_name='State',
        hover_data={'AvgElectricityBill': ':,.2f'}
    )

    fig_px.update_geos(fitbounds="locations", visible=False)
    fig_px.update_traces(marker_line_width=0.6, marker_line_color='black')
    fig_px.update_layout(margin={"r": 0, "t": 50, "l": 0, "b": 0})

    return fig_px

# Uncomment the line below to see the alternative approach
# create_map_with_px().show()

## Hot One Encoding



In [None]:
df.head()

# Feature Engineering

In [None]:
# Total number of appliances
df['TotalAppliances'] = df[['Fan', 'Refrigerator', 'AirConditioner', 'Television', 'Monitor', 'MotorPump']].sum(axis=1)

# Cooling load: Fan + AirConditioner
df['CoolingLoad'] = df['Fan'] + df['AirConditioner']

# Entertainment load: Television + Monitor
df['EntertainmentLoad'] = df['Television'] + df['Monitor']

# Essential appliances: Refrigerator + MotorPump
df['EssentialLoad'] = df['Refrigerator'] + df['MotorPump']


In [None]:
df['Month_sin'] = np.sin(2 * np.pi * df['Month'] / 12)
df['Month_cos'] = np.cos(2 * np.pi * df['Month'] / 12)
#using this encoding so that ML model understands that Jan and Dec are closer and does not give any preference to any month based on numbers

Data Visualisation after feature engineering

In [None]:
df.head()

In [None]:
print(df.info())

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
#plot between the distribution of electricity bills across range of money

sns.histplot(df['ElectricityBill'],bins=50,kde=True,color='Purple',edgecolor='black')
plt.title('Distribution of Electricity bills across money range')
plt.xlabel('Bill Amount')
plt.ylabel('Frequency')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
#plot :Electricity bill vs total appliances
sns.scatterplot(data=df,x='TotalAppliances',y='ElectricityBill',hue='CoolingLoad',palette='coolwarm')
plt.title('Bill  vs Total Appliances')
plt.xlabel('Total Appliances')
plt.ylabel('Electricity Bill')

plt.show()

In [None]:
#electricity bill monthwise
sns.boxplot(x='Month',y='ElectricityBill',data=df,palette='Spectral')
plt.title('Electricity Bill over the months')
plt.xlabel('Month(1-Jan,12-Dec)')
plt.ylabel('Bill amount')
plt.show()

In [None]:
#average bill per city(top 10)
# The 'City' column already exists before one-hot encoding, so we can use it directly.
city_avg= df.groupby('City')['ElectricityBill'].mean().sort_values(ascending=False).head(10)

sns.barplot(x=city_avg.values,y=city_avg.index,palette='viridis')
plt.title('Average electricity bill per city(Top 10)')
plt.xlabel("Average Bill")
plt.ylabel('City')
plt.show()

In [None]:
monthly_avg=df.groupby('Month')['ElectricityBill'].mean()

sns.lineplot(x=monthly_avg.index,y=monthly_avg.values,marker='o',color='#16A085',linewidth=3)
plt.xlabel('Month')
plt.ylabel('Avg bill')
plt.title('Average electricity bill by month')
plt.xticks(ticks=range(1,13),labels=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])

plt.show()

# Training the model

In [None]:
df.head()

In [None]:
# One-hot encoding City,Company and State columns
df = pd.get_dummies(df, columns=['City', 'Company','State'], drop_first=True)
df[df.select_dtypes('bool').columns] = df.select_dtypes('bool').astype(int)

In [None]:

df.info()

In [None]:
#separating the target variable form the rest of the table
X=df.drop('ElectricityBill',axis=1)
y=df['ElectricityBill']

In [None]:
#training ,testing and splitting the dataset for trainig the models
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0)

Model1:Linear Regression

In [None]:
#training linear regression model
from sklearn.linear_model import LinearRegression
linreg=LinearRegression()
#fitting the model i.e it will look for patterns
linreg.fit(X_train,y_train)

In [None]:
#predicting the outputs
y_pred_lin=linreg.predict(X_test)

In [None]:
#calculating the metrics of output for Linear Regression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

print("Regression Metrics - Linear Regression")

# Calculating  regression metrics
mae = mean_absolute_error(y_test, y_pred_lin)
mse = mean_squared_error(y_test, y_pred_lin)
rmse = np.sqrt(mse) # Calculate Root Mean Squared Error
r2 = r2_score(y_test, y_pred_lin)

#printing the metrics
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R-squared (R2): {r2:.2f}")

Model2:RandomForest

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf=RandomForestRegressor()
#fitting the model
rf.fit(X_train,y_train)


In [None]:
##calculating the metrics of output for Random ForestRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import numpy as np

y_pred_rf=rf.predict(X_test)
print('Regression metrics - RandomForest')
mae_rf=mean_absolute_error(y_test,y_pred_rf)
mse_rf=mean_squared_error(y_test,y_pred_rf)
rmse_rf=np.sqrt(mse_rf)
r2_rf=r2_score(y_test,y_pred_rf)
print("Mean absolute error:",mae_rf)
print("Mean squares error:",mse_rf)
print("Root mean sqaure:",rmse_rf)
print('r2 score:',r2_rf)

Model3:XGBoost

In [None]:
from xgboost import XGBRegressor
xgb=XGBRegressor()
#fitting the XGB model
xgb.fit(X_train,y_train)

In [None]:
##calculating the metrics of output for XGBRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import numpy as np

y_pred_xgb=xgb.predict(X_test)
print('Regression metrics -XGBoost')
mae_xgb=mean_absolute_error(y_test,y_pred_xgb)
mse_xgb=mean_squared_error(y_test,y_pred_xgb)
rmse_xgb=np.sqrt(mse_xgb)
r2_xgb=r2=r2_score(y_test,y_pred_xgb)
print("Mean absolute error:",mae_xgb)
print('Mean square error:',mse_xgb)
print('Root mean sqaure error:',rmse_xgb)
print('r2_score:',r2_xgb)

Input from user

In [None]:


import ipywidgets as widgets #for sliding option, buttons,dropdowns etc
from IPython.display import display, clear_output
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Set matplotlib style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Load the trained model (assuming this is your best model)
loaded_model = joblib.load('bill_predictor.pkl')

# Create interactive widgets
print("🏠 Electricity Bill Predictor with Model Comparison")
print("=" * 60)

# Appliance widgets (sliders)
fans_slider = widgets.IntSlider(
    value=3,
    min=0,
    max=23,
    step=1,
    description='Fans:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

refrigerator_slider = widgets.IntSlider(
    value=1,
    min=0,
    max=23,
    step=1,
    description='Refrigerators:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

ac_slider = widgets.IntSlider(
    value=1,
    min=0,
    max=3,
    step=1,
    description='Air Conditioners:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

tv_slider = widgets.IntSlider(
    value=2,
    min=0,
    max=22,
    step=1,
    description='Televisions:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

monitor_slider = widgets.IntSlider(
    value=1,
    min=0,
    max=12,
    step=1,
    description='Monitors:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

pump_slider = widgets.IntSlider(
    value=1,
    min=0,
    max=5,
    step=1,
    description='Motor Pumps:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

# Usage and billing widgets
month_dropdown = widgets.Dropdown(
    options=[('January', 1), ('February', 2), ('March', 3), ('April', 4),
             ('May', 5), ('June', 6), ('July', 7), ('August', 8),
             ('September', 9), ('October', 10), ('November', 11), ('December', 12)],
    value=6,
    description='Month:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

hours_slider = widgets.IntSlider(
    value=300,
    min=50,
    max=1000,
    step=10,
    description='Monthly Hours:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

tariff_slider = widgets.FloatSlider(
    value=6.0,
    min=2.0,
    max=15.0,
    step=0.1,
    description='Tariff Rate (₹/kWh):',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

# City dropdown
city_dropdown = widgets.Dropdown(
    options=['Hyderabad', 'Vadodara', 'Shimla', 'Mumbai', 'Ratnagiri', 'New Delhi',
             'Dahej', 'Ahmedabad', 'Noida', 'Nagpur', 'Chennai', 'Faridabad',
             'Kolkata', 'Pune', 'Gurgaon', 'Navi Mumbai'],
    value='Hyderabad',
    description='City:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

# Company dropdown
company_dropdown = widgets.Dropdown(
    options=['Tata Power Company Ltd.', 'NHPC', 'Jyoti Structure', 'Power Grid Corp',
             'Ratnagiri Gas and Power Pvt. Ltd. (RGPPL)', 'Adani Power Ltd.',
             'Kalpataru Power', 'Orient Green', 'Sterlite Power Transmission Ltd',
             'Neueon Towers / Sujana Towers Ltd.', 'BESCOM',
             'Unitech Power Transmission Ltd.', 'Bonfiglioli Transmission Pvt. Ltd.',
             'SJVN Ltd.', 'Maha Transco – Maharashtra State Electricity Transmission Co, Ltd.',
             'L&T Transmission & Distribution', 'Guj Ind Power', 'Torrent Power Ltd.',
             'KEC International', 'Indowind Energy', 'Reliance Energy',
             'GE T&D India Limited', 'NTPC Pvt. Ltd.',
             'Optibelt Power Transmission India Private Limited', 'CESC',
             'Ringfeder Power Transmission India Pvt. Ltd.', 'Reliance Power',
             'JSW Energy Ltd.', 'Sunil Hitech Eng',
             'Toshiba Transmission & Distribution Systems (India) Pvt. Ltd.',
             'Jaiprakash Power TransRail', 'Lighting'],
    value='Tata Power Company Ltd.',
    description='Company:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

# Buttons
predict_button = widgets.Button(
    description='🔮 Predict Bill',
    button_style='success',
    layout=widgets.Layout(width='200px', height='40px')
)

compare_button = widgets.Button(
    description='📊 Compare Models',
    button_style='info',
    layout=widgets.Layout(width='200px', height='40px')
)

# Output widgets
output = widgets.Output()
comparison_output = widgets.Output()

# City to State mapping
city_to_state = {
    'Hyderabad': 'Telangana',
    'Vadodara': 'Gujarat',
    'Shimla': 'Himachal Pradesh',
    'Mumbai': 'Maharashtra',
    'Ratnagiri': 'Maharashtra',
    'New Delhi': 'Delhi',
    'Dahej': 'Gujarat',
    'Ahmedabad': 'Gujarat',
    'Noida': 'Uttar Pradesh',
    'Nagpur': 'Maharashtra',
    'Chennai': 'Tamil Nadu',
    'Faridabad': 'Haryana',
    'Kolkata': 'West Bengal',
    'Pune': 'Maharashtra',
    'Gurgaon': 'Haryana',
    'Navi Mumbai': 'Maharashtra'
}

def prepare_input_data():
    """Prepare input data for prediction"""
    user_input = {
        'Fan': int(fans_slider.value),
        'Refrigerator': int(refrigerator_slider.value),
        'AirConditioner': int(ac_slider.value),
        'Television': int(tv_slider.value),
        'Monitor': int(monitor_slider.value),
        'MotorPump': int(pump_slider.value),
        'Month': int(month_dropdown.value),
        'MonthlyHours': int(hours_slider.value),
        'TariffRate': float(tariff_slider.value),
        'City': city_dropdown.value,
        'Company': company_dropdown.value
    }

    # Convert to DataFrame
    input_df = pd.DataFrame([user_input])

    # Feature engineering
    input_df['TotalAppliances'] = input_df[['Fan', 'Refrigerator', 'AirConditioner',
                                           'Television', 'Monitor', 'MotorPump']].sum(axis=1)
    input_df['CoolingLoad'] = input_df['Fan'] + input_df['AirConditioner']
    input_df['EntertainmentLoad'] = input_df['Television'] + input_df['Monitor']
    input_df['EssentialLoad'] = input_df['Refrigerator'] + input_df['MotorPump']
    input_df['Month_sin'] = np.sin(2 * np.pi * input_df['Month'] / 12)
    input_df['Month_cos'] = np.cos(2 * np.pi * input_df['Month'] / 12)

    # Check if X is available
    if 'X' not in globals():
        raise ValueError("Training data 'X' not found in global variables")

    # One-hot encode categorical features
    city_columns = [col for col in X.columns if col.startswith('City_')]
    company_columns = [col for col in X.columns if col.startswith('Company_')]
    state_columns = [col for col in X.columns if col.startswith('State_')]

    # Create empty columns for all possible one-hot encoded features
    for col in city_columns + company_columns + state_columns:
        input_df[col] = 0

    # Set appropriate values
    city_name = input_df['City'].iloc[0]
    company_name = input_df['Company'].iloc[0]

    # Set city encoding
    city_col = f'City_{city_name}'
    if city_col in input_df.columns:
        input_df[city_col] = 1

    # Set company encoding
    company_col = f'Company_{company_name}'
    if company_col in input_df.columns:
        input_df[company_col] = 1

    # Encode state based on city
    if city_name in city_to_state:
        state = city_to_state[city_name]
        state_col = f'State_{state}'
        if state_col in input_df.columns:
            input_df[state_col] = 1

    # Drop original categorical columns
    input_df = input_df.drop(['City', 'Company'], axis=1, errors='ignore')

    # Add missing columns with value 0
    missing_cols = set(X.columns) - set(input_df.columns)
    for c in missing_cols:
        input_df[c] = 0

    # Reorder columns to match training data
    input_df = input_df[X.columns]

    return input_df, user_input

def predict_bill(button):
    with output:
        clear_output()

        try:
            input_df, user_input = prepare_input_data()

            print("📋 Collected Input:")
            for key, value in user_input.items():
                print(f"  {key}: {value}")
            print()

            print("✅ Feature engineering completed")
            print(f"📊 Input shape: {input_df.shape}")

            # Make prediction
            predicted_bill = loaded_model.predict(input_df)

            print("\n🎯 PREDICTION RESULT")
            print("=" * 30)
            print(f"💰 Predicted Electricity Bill: ₹{predicted_bill[0]:.2f}")
            print("\n📊 INPUT SUMMARY:")
            print(f"📍 Location: {user_input['City']}, {city_to_state.get(user_input['City'], 'Unknown')}")
            print(f"🏢 Company: {user_input['Company']}")

            # Safe reverse lookup to get the month name
            month_label = next((k for k, v in month_dropdown.options if v == month_dropdown.value), "Unknown")
            print(f"📅 Month: {month_label}")

            print(f"⏰ Monthly Hours: {hours_slider.value}")
            print(f"💡 Tariff Rate: ₹{tariff_slider.value}/kWh")
            print(f"🏠 Total Appliances: {int(input_df['TotalAppliances'].iloc[0])}")

        except Exception as e:
            print(f"❌ Error making prediction: {str(e)}")
            print(f"Error type: {type(e).__name__}")
            import traceback
            print("Full traceback:")
            traceback.print_exc()

def compare_models(button):
    with comparison_output:
        clear_output()

        try:
            # Check if training data is available
            if 'X' not in globals() or 'y' not in globals():
                print("❌ Error: Training data 'X' and 'y' not found in global variables")
                print("Please make sure you have run the training code and both 'X' and 'y' are available")
                return

            print("🔄 Training multiple models for comparison...")
            print("This may take a few moments...")

            # Prepare current input
            input_df, user_input = prepare_input_data()

            # Split data for model evaluation (assuming you have train_test_split done)
            from sklearn.model_selection import train_test_split
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            # Initialize models
            models = {
                'Linear Regression': LinearRegression(),
                'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
                'XGBoost': None  # Will be initialized if xgboost is available
            }

            # Try to import XGBoost
            try:
                import xgboost as xgb
                models['XGBoost'] = xgb.XGBRegressor(n_estimators=100, random_state=42)
            except ImportError:
                print("⚠️ XGBoost not available. Using Random Forest instead.")
                models['XGBoost (RF)'] = RandomForestRegressor(n_estimators=150, random_state=42)

            # Train models and collect results
            results = {}
            predictions_current = {}

            for name, model in models.items():
                if model is not None:
                    print(f"Training {name}...")
                    model.fit(X_train, y_train)
                    y_pred = model.predict(X_test)

                    # Calculate metrics
                    mae = mean_absolute_error(y_test, y_pred)
                    mse = mean_squared_error(y_test, y_pred)
                    rmse = np.sqrt(mse)
                    r2 = r2_score(y_test, y_pred)

                    results[name] = {
                        'MAE': mae,
                        'MSE': mse,
                        'RMSE': rmse,
                        'R²': r2
                    }

                    # Predict for current input
                    predictions_current[name] = model.predict(input_df)[0]

            # Create visualizations
            fig, axes = plt.subplots(2, 2, figsize=(15, 12))
            fig.suptitle('Model Performance Comparison', fontsize=16, fontweight='bold')

            # 1. Model Performance Metrics
            metrics_df = pd.DataFrame(results).T

            # MAE Comparison
            axes[0, 0].bar(metrics_df.index, metrics_df['MAE'], color=['#FF6B6B', '#4ECDC4', '#45B7D1'])
            axes[0, 0].set_title('Mean Absolute Error (MAE)', fontweight='bold')
            axes[0, 0].set_ylabel('MAE')
            axes[0, 0].tick_params(axis='x', rotation=45)
            for i, v in enumerate(metrics_df['MAE']):
                axes[0, 0].text(i, v + max(metrics_df['MAE']) * 0.01, f'{v:.2f}', ha='center')

            # R² Score Comparison
            axes[0, 1].bar(metrics_df.index, metrics_df['R²'], color=['#FF6B6B', '#4ECDC4', '#45B7D1'])
            axes[0, 1].set_title('R² Score (Higher is Better)', fontweight='bold')
            axes[0, 1].set_ylabel('R² Score')
            axes[0, 1].tick_params(axis='x', rotation=45)
            for i, v in enumerate(metrics_df['R²']):
                axes[0, 1].text(i, v + max(metrics_df['R²']) * 0.01, f'{v:.3f}', ha='center')

            # Current Input Predictions
            pred_df = pd.DataFrame(list(predictions_current.items()), columns=['Model', 'Prediction'])
            bars = axes[1, 0].bar(pred_df['Model'], pred_df['Prediction'], color=['#FF6B6B', '#4ECDC4', '#45B7D1'])
            axes[1, 0].set_title('Predictions for Current Input', fontweight='bold')
            axes[1, 0].set_ylabel('Predicted Bill (₹)')
            axes[1, 0].tick_params(axis='x', rotation=45)
            for i, v in enumerate(pred_df['Prediction']):
                axes[1, 0].text(i, v + max(pred_df['Prediction']) * 0.01, f'₹{v:.2f}', ha='center')

            # RMSE Comparison
            axes[1, 1].bar(metrics_df.index, metrics_df['RMSE'], color=['#FF6B6B', '#4ECDC4', '#45B7D1'])
            axes[1, 1].set_title('Root Mean Square Error (RMSE)', fontweight='bold')
            axes[1, 1].set_ylabel('RMSE')
            axes[1, 1].tick_params(axis='x', rotation=45)
            for i, v in enumerate(metrics_df['RMSE']):
                axes[1, 1].text(i, v + max(metrics_df['RMSE']) * 0.01, f'{v:.2f}', ha='center')

            plt.tight_layout()
            plt.show()

            # Display detailed results
            print("\n📊 DETAILED MODEL COMPARISON")
            print("=" * 50)

            for name, metrics in results.items():
                print(f"\n🔸 {name}:")
                print(f"   MAE:  {metrics['MAE']:.2f}")
                print(f"   RMSE: {metrics['RMSE']:.2f}")
                print(f"   R²:   {metrics['R²']:.3f}")
                print(f"   Current Prediction: ₹{predictions_current[name]:.2f}")

            # Find best model
            best_model_r2 = max(results.items(), key=lambda x: x[1]['R²'])
            best_model_mae = min(results.items(), key=lambda x: x[1]['MAE'])

            print(f"\n🏆 BEST PERFORMERS:")
            print(f"   Highest R² Score: {best_model_r2[0]} ({best_model_r2[1]['R²']:.3f})")
            print(f"   Lowest MAE: {best_model_mae[0]} ({best_model_mae[1]['MAE']:.2f})")

            # Show current input summary
            print(f"\n📋 CURRENT INPUT SUMMARY:")
            month_label = next((k for k, v in month_dropdown.options if v == month_dropdown.value), "Unknown")
            print(f"   📍 Location: {user_input['City']}, {city_to_state.get(user_input['City'], 'Unknown')}")
            print(f"   📅 Month: {month_label}")
            print(f"   🏠 Total Appliances: {sum([user_input['Fan'], user_input['Refrigerator'], user_input['AirConditioner'], user_input['Television'], user_input['Monitor'], user_input['MotorPump']])}")
            print(f"   ⏰ Monthly Hours: {user_input['MonthlyHours']}")
            print(f"   💡 Tariff Rate: ₹{user_input['TariffRate']}/kWh")

        except Exception as e:
            print(f"❌ Error in model comparison: {str(e)}")
            print(f"Error type: {type(e).__name__}")
            import traceback
            traceback.print_exc()

# Connect buttons to functions
predict_button.on_click(predict_bill)
compare_button.on_click(compare_models)

# Create layout
appliance_section = widgets.VBox([
    widgets.HTML("<h3>🏠 Home Appliances</h3>"),
    fans_slider,
    refrigerator_slider,
    ac_slider,
    tv_slider,
    monitor_slider,
    pump_slider
])

usage_section = widgets.VBox([
    widgets.HTML("<h3>📊 Usage & Billing Details</h3>"),
    month_dropdown,
    hours_slider,
    tariff_slider
])

location_section = widgets.VBox([
    widgets.HTML("<h3>📍 Location & Company</h3>"),
    city_dropdown,
    company_dropdown
])

button_section = widgets.HBox([
    predict_button,
    compare_button
], layout=widgets.Layout(justify_content='center'))

main_interface = widgets.VBox([
    widgets.HTML("<h1 style='text-align: center; color: #2E86AB;'>⚡ Electricity Bill Predictor</h1>"),
    widgets.HTML("<hr>"),
    appliance_section,
    widgets.HTML("<br>"),
    usage_section,
    widgets.HTML("<br>"),
    location_section,
    widgets.HTML("<br>"),
    button_section,
    widgets.HTML("<br>"),
    output,
    widgets.HTML("<br>"),
    comparison_output
])

# Display the interface
display(main_interface)

# Instructions
print("\n" + "="*60)
print("📋 INSTRUCTIONS:")
print("1. Adjust the sliders to set the number of appliances")
print("2. Select your city and electricity company from dropdowns")
print("3. Set the month and usage hours")
print("4. Adjust the tariff rate")
print("5. Click 'Predict Bill' to get your estimate")
print("6. Click 'Compare Models' to see performance comparison")
print("="*60)
