# Energy Consumption Prediction
A comprehensive system for predicting and managing household energy consumption using machine learning and time series forecasting techniques.

In [None]:
# Energy Consumption Prediction

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
import lightgbm as lgb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from prophet import Prophet
import requests
from datetime import datetime, timedelta
import warnings
import joblib
import json
import os

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Display settings
pd.set_option('display.max_columns', None)
sns.set_style('whitegrid')
plt.style.use('ggplot')

# Create directories for saving models and results
os.makedirs('models', exist_ok=True)
os.makedirs('results', exist_ok=True)
os.makedirs('data', exist_ok=True)


## Step 1: Data Collection

In [None]:
# =====================================================================

def fetch_smart_meter_data(start_date='2023-01-01', end_date='2023-12-31', interval='1h'):
    """
    Simulate fetching smart meter data from an API or database.
    In a real scenario, this would connect to an actual data source.
    """
    print(f"Fetching smart meter data from {start_date} to {end_date}...")
    
    # Create a date range with the specified interval
    date_range = pd.date_range(start=start_date, end=end_date, freq=interval)
    
    # Generate synthetic energy consumption data (kWh)
    # Base consumption with daily and weekly patterns
    n_samples = len(date_range)
    base_consumption = 0.5 + 0.3 * np.sin(np.linspace(0, 2*np.pi*365, n_samples))  # Yearly cycle
    daily_pattern = 0.2 * np.sin(np.linspace(0, 2*np.pi*n_samples, n_samples))     # Daily cycle
    weekly_pattern = 0.1 * np.sin(np.linspace(0, 2*np.pi*52, n_samples))           # Weekly cycle
    random_variation = 0.1 * np.random.randn(n_samples)                            # Random noise
    
    # Combine patterns
    consumption = base_consumption + daily_pattern + weekly_pattern + random_variation
    
    # Scale to realistic kWh values (between 0.2 and 2 kWh per hour)
    consumption = 0.2 + 1.8 * (consumption - consumption.min()) / (consumption.max() - consumption.min())
    
    # Create DataFrame
    df = pd.DataFrame({
        'timestamp': date_range,
        'energy_consumption': consumption
    })
    
    # Add some missing values (5% of data)
    mask = np.random.choice([True, False], size=len(df), p=[0.05, 0.95])
    df.loc[mask, 'energy_consumption'] = np.nan
    
    # Add outliers (1% of data)
    outlier_mask = np.random.choice([True, False], size=len(df), p=[0.01, 0.99])
    df.loc[outlier_mask, 'energy_consumption'] = df.loc[outlier_mask, 'energy_consumption'] * np.random.uniform(3, 5, size=sum(outlier_mask))
    
    return df

def fetch_weather_data(location='New York', start_date='2023-01-01', end_date='2023-12-31'):
    """
    Simulate fetching weather data from an API.
    In a real scenario, this would connect to a weather API like OpenWeatherMap or DarkSky.
    """
    print(f"Fetching weather data for {location} from {start_date} to {end_date}...")
    
    # Create a date range with hourly interval

In [None]:
    date_range = pd.date_range(start=start_date, end=end_date, freq='1h')
    n_samples = len(date_range)
    
    # Generate synthetic weather data
    # Temperature with seasonal pattern (°C)
    season_factor = np.sin(np.linspace(0, 2*np.pi, 365*24))
    seasonal_temp = 15 + 15 * np.repeat(season_factor, int(n_samples/(365*24)) + 1)[:n_samples]
    
    # Daily temperature variation
    daily_factor = np.sin(np.linspace(0, 2*np.pi*n_samples, n_samples))
    daily_temp = 5 * daily_factor
    
    # Random variations
    random_temp = 3 * np.random.randn(n_samples)
    
    # Combine patterns for temperature
    temperature = seasonal_temp + daily_temp + random_temp
    
    # Humidity (%)
    humidity = 60 + 20 * np.sin(np.linspace(0, 4*np.pi*365, n_samples)) + 10 * np.random.randn(n_samples)
    humidity = np.clip(humidity, 0, 100)
    
    # Wind speed (m/s)
    wind_speed = 5 + 3 * np.random.randn(n_samples)
    wind_speed = np.clip(wind_speed, 0, 30)
    
    # Cloud cover (%)
    cloud_cover = 50 + 30 * np.sin(np.linspace(0, 8*np.pi*365, n_samples)) + 20 * np.random.randn(n_samples)
    cloud_cover = np.clip(cloud_cover, 0, 100)
    
    # Precipitation (mm)
    precipitation = np.random.exponential(scale=0.5, size=n_samples)
    precipitation_mask = np.random.choice([True, False], size=n_samples, p=[0.2, 0.8])
    precipitation = precipitation * precipitation_mask
    
    # Create DataFrame
    weather_df = pd.DataFrame({
        'timestamp': date_range,
        'temperature': temperature,
        'humidity': humidity,
        'wind_speed': wind_speed,
        'cloud_cover': cloud_cover,
        'precipitation': precipitation
    })
    
    return weather_df

# Fetch the data
energy_df = fetch_smart_meter_data()
weather_df = fetch_weather_data()

In [None]:

# Display sample data
print("\nEnergy Consumption Data (First 5 rows):")
print(energy_df.head())

print("\nWeather Data (First 5 rows):")
print(weather_df.head())

# Save raw data
energy_df.to_csv('data/raw_energy_data.csv', index=False)
weather_df.to_csv('data/raw_weather_data.csv', index=False)


## Step 2: Data Preprocessing

In [None]:
# =====================================================================

def preprocess_energy_data(df):
    """
    Preprocess energy consumption data:
    - Handle missing values
    - Remove outliers
    - Extract datetime features
    - Normalize consumption values
    """
    print("Preprocessing energy consumption data...")
    
    # Create a copy to avoid modifying the original data
    processed_df = df.copy()
    
    # Handle missing values using forward fill and then backward fill
    print(f"Missing values before imputation: {processed_df['energy_consumption'].isna().sum()}")
    processed_df['energy_consumption'] = processed_df['energy_consumption'].interpolate(method='time').ffill().bfill()
    print(f"Missing values after imputation: {processed_df['energy_consumption'].isna().sum()}")
    
    # Detect and handle outliers using IQR method
    Q1 = processed_df['energy_consumption'].quantile(0.25)
    Q3 = processed_df['energy_consumption'].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    outliers_mask = (processed_df['energy_consumption'] < lower_bound) | (processed_df['energy_consumption'] > upper_bound)
    print(f"Number of outliers detected: {outliers_mask.sum()}")
    
    # Cap outliers instead of removing them
    processed_df.loc[processed_df['energy_consumption'] < lower_bound, 'energy_consumption'] = lower_bound
    processed_df.loc[processed_df['energy_consumption'] > upper_bound, 'energy_consumption'] = upper_bound
    
    # Extract datetime features
    processed_df['hour'] = processed_df['timestamp'].dt.hour
    processed_df['day'] = processed_df['timestamp'].dt.day
    processed_df['day_of_week'] = processed_df['timestamp'].dt.dayofweek
    processed_df['month'] = processed_df['timestamp'].dt.month
    processed_df['year'] = processed_df['timestamp'].dt.year
    processed_df['is_weekend'] = processed_df['day_of_week'].isin([5, 6]).astype(int)
    
    # Add time-based flags
    processed_df['is_morning'] = ((processed_df['hour'] >= 6) & (processed_df['hour'] < 12)).astype(int)
    processed_df['is_afternoon'] = ((processed_df['hour'] >= 12) & (processed_df['hour'] < 18)).astype(int)
    processed_df['is_evening'] = ((processed_df['hour'] >= 18) & (processed_df['hour'] < 22)).astype(int)
    processed_df['is_night'] = ((processed_df['hour'] >= 22) | (processed_df['hour'] < 6)).astype(int)
    
    # Add lagged features
    processed_df['consumption_lag_1h'] = processed_df['energy_consumption'].shift(1)

In [None]:
    processed_df['consumption_lag_24h'] = processed_df['energy_consumption'].shift(24)
    processed_df['consumption_lag_168h'] = processed_df['energy_consumption'].shift(168)  # 1 week
    
    # Add rolling mean features
    processed_df['rolling_mean_6h'] = processed_df['energy_consumption'].rolling(window=6).mean()
    processed_df['rolling_mean_24h'] = processed_df['energy_consumption'].rolling(window=24).mean()
    processed_df['rolling_mean_168h'] = processed_df['energy_consumption'].rolling(window=168).mean()
    
    # Add rolling standard deviation
    processed_df['rolling_std_24h'] = processed_df['energy_consumption'].rolling(window=24).std()
    
    # Calculate daily and weekly seasonality
    processed_df['daily_mean'] = processed_df.groupby(['hour'])['energy_consumption'].transform('mean')
    processed_df['weekly_mean'] = processed_df.groupby(['day_of_week', 'hour'])['energy_consumption'].transform('mean')
    
    # Drop rows with NaN values (caused by lag and rolling features)
    processed_df = processed_df.dropna()
    
    return processed_df

def preprocess_weather_data(df):
    """
    Preprocess weather data:
    - Handle missing values
    - Extract additional features
    """
    print("Preprocessing weather data...")
    
    # Create a copy to avoid modifying the original data
    processed_df = df.copy()
    
    # Handle missing values
    processed_df = processed_df.interpolate(method='time').ffill().bfill()
    
    # Create additional weather features
    processed_df['feels_like'] = processed_df['temperature'] - 0.4 * (processed_df['wind_speed'] / 10) * (33 - processed_df['temperature'])
    processed_df['heat_index'] = processed_df['temperature'] + 0.05 * processed_df['humidity']
    
    # Create weather condition categories
    processed_df['is_rainy'] = (processed_df['precipitation'] > 0.5).astype(int)
    processed_df['is_windy'] = (processed_df['wind_speed'] > 10).astype(int)
    processed_df['is_humid'] = (processed_df['humidity'] > 70).astype(int)
    processed_df['is_cold'] = (processed_df['temperature'] < 10).astype(int)
    processed_df['is_hot'] = (processed_df['temperature'] > 25).astype(int)
    
    return processed_df

# Preprocess the datasets
processed_energy_df = preprocess_energy_data(energy_df)
processed_weather_df = preprocess_weather_data(weather_df)

## Step 3: Feature Engineering

In [None]:
# =====================================================================

def merge_data_and_engineer_features(energy_df, weather_df):
    """
    Merge energy and weather data and create additional features.
    """
    print("Merging datasets and engineering features...")
    
    # Merge datasets on timestamp
    merged_df = pd.merge(energy_df, weather_df, on='timestamp', how='inner')
    
    # Create interaction features
    merged_df['temp_humid_interaction'] = merged_df['temperature'] * merged_df['humidity'] / 100
    merged_df['wind_temp_interaction'] = merged_df['wind_speed'] * merged_df['temperature'] / 10
    
    # Create heating/cooling degree days
    base_temp = 18  # Base temperature for HDD/CDD calculation (°C)
    merged_df['heating_degree'] = np.maximum(0, base_temp - merged_df['temperature'])
    merged_df['cooling_degree'] = np.maximum(0, merged_df['temperature'] - base_temp)
    
    # Create categorical season feature
    merged_df['season'] = merged_df['month'].apply(lambda x: 
                                                   'Winter' if x in [12, 1, 2] else
                                                   'Spring' if x in [3, 4, 5] else
                                                   'Summer' if x in [6, 7, 8] else
                                                   'Fall')
    
    # Add holiday flag (simplified - just weekends for demo)
    merged_df['is_holiday'] = merged_df['is_weekend']
    
    # Create time block feature
    merged_df['time_block'] = pd.cut(merged_df['hour'], 
                                     bins=[0, 6, 12, 18, 24], 
                                     labels=['night', 'morning', 'afternoon', 'evening'],
                                     include_lowest=True, right=False)
    
    # Add peak/off-peak feature based on time of day
    merged_df['is_peak_time'] = ((merged_df['hour'] >= 7) & (merged_df['hour'] <= 9) | 
                                  (merged_df['hour'] >= 17) & (merged_df['hour'] <= 20)).astype(int)
    
    # One-hot encode categorical features
    categorical_features = ['season', 'time_block']
    merged_df = pd.get_dummies(merged_df, columns=categorical_features, drop_first=False)
    
    # Calculate energy price based on time of day (simulated)
    peak_price = 0.22  # $/kWh during peak hours
    off_peak_price = 0.12  # $/kWh during off-peak hours
    
    merged_df['energy_price'] = np.where(merged_df['is_peak_time'] == 1, peak_price, off_peak_price)
    merged_df['energy_cost'] = merged_df['energy_consumption'] * merged_df['energy_price']

In [None]:
    
    # Save processed data
    merged_df.to_csv('data/processed_data.csv', index=False)
    
    print(f"Final dataset shape: {merged_df.shape}")
    print(f"Features: {', '.join(merged_df.columns.tolist())}")
    
    return merged_df

# Merge datasets and engineer features
final_df = merge_data_and_engineer_features(processed_energy_df, processed_weather_df)

# Display data info
print("\nFinal Dataset Summary:")
print(final_df.describe())


## Step 4: Exploratory Data Analysis

In [None]:
# =====================================================================

def perform_eda(df):
    """
    Perform exploratory data analysis on the dataset.
    """
    print("\nPerforming Exploratory Data Analysis...")
    
    # Create figure for time series plot
    plt.figure(figsize=(14, 7))
    plt.plot(df['timestamp'], df['energy_consumption'])
    plt.title('Energy Consumption Over Time')
    plt.xlabel('Date')
    plt.ylabel('Energy Consumption (kWh)')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig('results/energy_consumption_time_series.png')
    
    # Create daily and weekly patterns
    # Aggregate by hour of day
    hourly_avg = df.groupby('hour')['energy_consumption'].mean().reset_index()
    
    plt.figure(figsize=(12, 6))
    plt.bar(hourly_avg['hour'], hourly_avg['energy_consumption'])
    plt.title('Average Energy Consumption by Hour of Day')
    plt.xlabel('Hour of Day')
    plt.ylabel('Avg Energy Consumption (kWh)')
    plt.xticks(range(0, 24))
    plt.grid(True, axis='y')
    plt.tight_layout()
    plt.savefig('results/hourly_energy_pattern.png')
    
    # Aggregate by day of week
    weekly_avg = df.groupby('day_of_week')['energy_consumption'].mean().reset_index()
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    
    plt.figure(figsize=(12, 6))
    plt.bar(weekly_avg['day_of_week'], weekly_avg['energy_consumption'])
    plt.title('Average Energy Consumption by Day of Week')
    plt.xlabel('Day of Week')
    plt.ylabel('Avg Energy Consumption (kWh)')
    plt.xticks(range(7), days)
    plt.grid(True, axis='y')
    plt.tight_layout()
    plt.savefig('results/weekly_energy_pattern.png')
    
    # Seasonal patterns
    seasonal_avg = df.groupby('month')['energy_consumption'].mean().reset_index()
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    

In [None]:
    plt.figure(figsize=(12, 6))
    plt.bar(seasonal_avg['month'], seasonal_avg['energy_consumption'])
    plt.title('Average Energy Consumption by Month')
    plt.xlabel('Month')
    plt.ylabel('Avg Energy Consumption (kWh)')
    plt.xticks(range(1, 13), months)
    plt.grid(True, axis='y')
    plt.tight_layout()
    plt.savefig('results/monthly_energy_pattern.png')
    
    # Correlation with weather
    plt.figure(figsize=(14, 7))
    plt.scatter(df['temperature'], df['energy_consumption'], alpha=0.5)
    plt.title('Energy Consumption vs. Temperature')
    plt.xlabel('Temperature (°C)')
    plt.ylabel('Energy Consumption (kWh)')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig('results/energy_vs_temperature.png')
    
    # Correlation matrix
    corr_cols = ['energy_consumption', 'temperature', 'humidity', 'wind_speed', 
                 'cloud_cover', 'precipitation', 'hour', 'day_of_week',
                 'is_weekend', 'heating_degree', 'cooling_degree']
    
    corr_matrix = df[corr_cols].corr()
    
    plt.figure(figsize=(12, 10))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1, center=0)
    plt.title('Correlation Matrix')
    plt.tight_layout()
    plt.savefig('results/correlation_matrix.png')
    
    # Weekend vs Weekday comparison
    plt.figure(figsize=(14, 7))
    
    weekend_data = df[df['is_weekend'] == 1].groupby('hour')['energy_consumption'].mean()
    weekday_data = df[df['is_weekend'] == 0].groupby('hour')['energy_consumption'].mean()
    
    plt.plot(weekend_data.index, weekend_data.values, 'b-', linewidth=2, label='Weekend')
    plt.plot(weekday_data.index, weekday_data.values, 'r-', linewidth=2, label='Weekday')
    
    plt.title('Weekend vs Weekday Energy Consumption Pattern')
    plt.xlabel('Hour of Day')
    plt.ylabel('Average Energy Consumption (kWh)')
    plt.legend()
    plt.grid(True)
    plt.xticks(range(0, 24))
    plt.tight_layout()
    plt.savefig('results/weekend_vs_weekday.png')

In [None]:
    
    # Print summary stats
    print("\nSummary Statistics by Season:")
    print(df.groupby('season_Winter')['energy_consumption'].describe())
    
    print("\nCorrelation with Energy Consumption:")
    correlations = df.corr()['energy_consumption'].sort_values(ascending=False)
    print(correlations.head(10))
    print(correlations.tail(10))
    
    return correlations

# Perform EDA
feature_correlations = perform_eda(final_df)


## Step 5: Feature Selection and Data Preparation

In [None]:
# =====================================================================

def prepare_data_for_modeling(df, target_col='energy_consumption', test_size=0.2, validation_size=0.25):
    """
    Prepare data for modeling:
    - Split into training, validation, and test sets
    - Scale numerical features
    - Select important features
    """
    print("\nPreparing data for modeling...")
    
    # Select features based on correlation analysis
    # Exclude some columns that shouldn't be used for modeling
    exclude_cols = ['timestamp', 'energy_price', 'energy_cost']
    
    # Get list of potentially useful features
    features = [col for col in df.columns if col != target_col and col not in exclude_cols]
    
    # First split: training+validation vs test
    train_val_df, test_df = train_test_split(df, test_size=test_size, shuffle=False)
    
    # Second split: training vs validation
    train_df, val_df = train_test_split(train_val_df, test_size=validation_size, shuffle=False)
    
    # Get final feature list
    X_train = train_df[features]
    y_train = train_df[target_col]
    
    X_val = val_df[features]
    y_val = val_df[target_col]
    
    X_test = test_df[features]
    y_test = test_df[target_col]
    
    # Define numerical features
    numerical_features = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()
    
    # Create preprocessor pipeline
    preprocessor = StandardScaler()
    
    # Fit preprocessor on training data
    X_train_scaled = pd.DataFrame(
        preprocessor.fit_transform(X_train[numerical_features]),
        columns=numerical_features,
        index=X_train.index
    )
    
    # Transform validation and test data
    X_val_scaled = pd.DataFrame(
        preprocessor.transform(X_val[numerical_features]),

In [None]:
        columns=numerical_features,
        index=X_val.index
    )
    
    X_test_scaled = pd.DataFrame(
        preprocessor.transform(X_test[numerical_features]),
        columns=numerical_features,
        index=X_test.index
    )
    
    # Save scaler for later use
    joblib.dump(preprocessor, 'models/scaler.pkl')
    
    print(f"Training set: {X_train.shape}, Validation set: {X_val.shape}, Test set: {X_test.shape}")
    
    return X_train, X_train_scaled, y_train, X_val, X_val_scaled, y_val, X_test, X_test_scaled, y_test, features, numerical_features

# Prepare data for modeling
X_train, X_train_scaled, y_train, X_val, X_val_scaled, y_val, X_test, X_test_scaled, y_test, features, numerical_features = prepare_data_for_modeling(final_df)


## Step 6: Model Training and Evaluation

In [None]:
# =====================================================================

def evaluate_model(y_true, y_pred, model_name):
    """
    Evaluate model performance.
    """
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n{model_name} Performance:")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"R² Score: {r2:.4f}")
    
    # Calculate MAPE
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
    
    return {
        'model_name': model_name,
        'mse': mse,
        'rmse': rmse,
        'mae': mae,
        'r2': r2,
        'mape': mape
    }

def plot_predictions(y_true, y_pred, title, filename):
    """
    Plot actual vs predicted values.
    """
    plt.figure(figsize=(12, 6))
    plt.plot(y_true.index, y_true.values, 'b-', label='Actual')
    plt.plot(y_true.index, y_pred, 'r-', label='Predicted')
    plt.title(title)
    plt.xlabel('Date')
    plt.ylabel('Energy Consumption (kWh)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'results/{filename}')

# Function to create LSTM input sequences
def create_sequences(X, y, time_steps=24):
    """
    Create sequences for LSTM model.
    """

In [None]:
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

# 1. Gradient Boosting Model
def train_gradient_boosting(X_train, y_train, X_val, y_val):
    """
    Train a Gradient Boosting Regressor.
    """
    print("\nTraining Gradient Boosting model...")
    
    # Define parameter grid
    param_grid = {
        'n_estimators': [100, 200],
        'learning_rate': [0.05, 0.1],
        'max_depth': [3, 5]
    }
    
    # Create model
    gb_model = GradientBoostingRegressor(random_state=42)
    
    # Create grid search
    grid_search = GridSearchCV(
        estimator=gb_model,
        param_grid=param_grid,
        cv=3,
        n_jobs=-1,
        scoring='neg_mean_squared_error',
        verbose=1
    )
    
    # Fit grid search
    grid_search.fit(X_train, y_train)
    
    # Get best model
    best_gb_model = grid_search.best_estimator_
    
    print(f"Best parameters: {grid_search.best_params_}")
    
    # Make predictions
    val_predictions = best_gb_model.predict(X_val)
    
    # Evaluate model
    metrics = evaluate_model(y_val, val_predictions, "Gradient Boosting")
    
    # Plot predictions
    plot_predictions(y_val, val_predictions, 'Gradient Boosting: Actual vs Predicted', 'gb_predictions.png')
    

In [None]:
    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': X_train.columns,
        'importance': best_gb_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("\nFeature Importance (Gradient Boosting):")
    print(feature_importance.head(10))
    
    # Plot feature importance
    plt.figure(figsize=(12, 8))
    sns.barplot(x='importance', y='feature', data=feature_importance.head(15))
    plt.title('Feature Importance (Gradient Boosting)')
    plt.tight_layout()
    plt.savefig('results/gb_feature_importance.png')
    
    # Save model
    joblib.dump(best_gb_model, 'models/gradient_boosting_model.pkl')
    
    return best_gb_model, metrics

# 2. XGBoost Model
def train_xgboost(X_train, y_train, X_val, y_val):
    """
    Train an XGBoost model.
    """
    print("\nTraining XGBoost model...")
    
    # Define parameter grid
    param_grid = {
        'n_estimators': [100, 200],
        'learning_rate': [0.05, 0.1],
        'max_depth': [3, 5],
        'subsample': [0.8, 1.0]
    }
    
    # Create model
    xgb_model = xgb.XGBRegressor(random_state=42)
    
    # Create grid search
    grid_search = GridSearchCV(
        estimator=xgb_model,
        param_grid=param_grid,
        cv=3,
        n_jobs=-1,
        scoring='neg_mean_squared_error',
        verbose=1
    )
    
    # Fit grid search

In [None]:
    grid_search.fit(X_train, y_train)
    
    # Get best model
    best_xgb_model = grid_search.best_estimator_
    
    print(f"Best parameters: {grid_search.best_params_}")
    
    # Make predictions
    val_predictions = best_xgb_model.predict(X_val)
    
    # Evaluate model
    metrics = evaluate_model(y_val, val_predictions, "XGBoost")
    
    # Plot predictions
    plot_predictions(y_val,

# Plot predictions
    plot_predictions(y_val, val_predictions, 'XGBoost: Actual vs Predicted', 'xgb_predictions.png')
    
    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': X_train.columns,
        'importance': best_xgb_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("\nFeature Importance (XGBoost):")
    print(feature_importance.head(10))
    
    # Plot feature importance
    plt.figure(figsize=(12, 8))
    sns.barplot(x='importance', y='feature', data=feature_importance.head(15))
    plt.title('Feature Importance (XGBoost)')
    plt.tight_layout()
    plt.savefig('results/xgb_feature_importance.png')
    
    # Save model
    joblib.dump(best_xgb_model, 'models/xgboost_model.pkl')
    
    return best_xgb_model, metrics

# 3. LightGBM Model
def train_lightgbm(X_train, y_train, X_val, y_val):
    """
    Train a LightGBM model.
    """
    print("\nTraining LightGBM model...")
    
    # Define parameter grid
    param_grid = {
        'n_estimators': [100, 200],

In [None]:
        'learning_rate': [0.05, 0.1],
        'num_leaves': [31, 63],
        'subsample': [0.8, 1.0]
    }
    
    # Create model
    lgb_model = lgb.LGBMRegressor(random_state=42)
    
    # Create grid search
    grid_search = GridSearchCV(
        estimator=lgb_model,
        param_grid=param_grid,
        cv=3,
        n_jobs=-1,
        scoring='neg_mean_squared_error',
        verbose=1
    )
    
    # Fit grid search
    grid_search.fit(X_train, y_train)
    
    # Get best model
    best_lgb_model = grid_search.best_estimator_
    
    print(f"Best parameters: {grid_search.best_params_}")
    
    # Make predictions
    val_predictions = best_lgb_model.predict(X_val)
    
    # Evaluate model
    metrics = evaluate_model(y_val, val_predictions, "LightGBM")
    
    # Plot predictions
    plot_predictions(y_val, val_predictions, 'LightGBM: Actual vs Predicted', 'lgb_predictions.png')
    
    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': X_train.columns,
        'importance': best_lgb_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("\nFeature Importance (LightGBM):")
    print(feature_importance.head(10))
    
    # Save model
    joblib.dump(best_lgb_model, 'models/lightgbm_model.pkl')
    
    return best_lgb_model, metrics

# 4. LSTM Model

In [None]:
def train_lstm_model(X_train_scaled, y_train, X_val_scaled, y_val, time_steps=24):
    """
    Train an LSTM model for time series forecasting.
    """
    print("\nTraining LSTM model...")
    
    # Create sequences for LSTM
    X_train_seq, y_train_seq = create_sequences(X_train_scaled.values, y_train.values, time_steps)
    X_val_seq, y_val_seq = create_sequences(X_val_scaled.values, y_val.values, time_steps)
    
    print(f"LSTM sequence shapes: X_train={X_train_seq.shape}, y_train={y_train_seq.shape}")
    
    # Build LSTM model
    input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])
    
    model = Sequential([
        LSTM(64, activation='relu', input_shape=input_shape, return_sequences=True),
        Dropout(0.2),
        LSTM(32, activation='relu'),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(1)
    ])
    
    # Compile model
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    
    # Print model summary
    model.summary()
    
    # Early stopping
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True
    )
    
    # Train model
    history = model.fit(
        X_train_seq, y_train_seq,
        epochs=50,
        batch_size=32,
        validation_data=(X_val_seq, y_val_seq),
        callbacks=[early_stopping],
        verbose=1
    )
    
    # Plot training history
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['loss'], label='Training Loss')

In [None]:
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('LSTM Training History')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig('results/lstm_training_history.png')
    
    # Make predictions
    val_predictions = model.predict(X_val_seq)
    
    # Original validation indices range
    val_indices = y_val.index[time_steps:]
    
    # Evaluate model
    metrics = evaluate_model(y_val[time_steps:], val_predictions.flatten(), "LSTM")
    
    # Plot predictions
    plt.figure(figsize=(12, 6))
    plt.plot(val_indices, y_val[time_steps:].values, 'b-', label='Actual')
    plt.plot(val_indices, val_predictions.flatten(), 'r-', label='Predicted')
    plt.title('LSTM: Actual vs Predicted')
    plt.xlabel('Date')
    plt.ylabel('Energy Consumption (kWh)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig('results/lstm_predictions.png')
    
    # Save model
    model.save('models/lstm_model.h5')
    
    # Save sequence parameters
    with open('models/lstm_params.json', 'w') as f:
        json.dump({
            'time_steps': time_steps,
            'features': X_train_scaled.columns.tolist()
        }, f)
    
    return model, metrics

# 5. Prophet Model
def train_prophet_model(df, target_col='energy_consumption', exogenous_features=None):
    """
    Train a Prophet model for time series forecasting.
    """
    print("\nTraining Prophet model...")
    
    # Prepare data for Prophet

In [None]:
    prophet_df = df.reset_index()[['timestamp', target_col]].copy()
    prophet_df.columns = ['ds', 'y']
    
    # Split data
    train_size = int(len(prophet_df) * 0.8)
    prophet_train = prophet_df.iloc[:train_size]
    prophet_test = prophet_df.iloc[train_size:]
    
    # Create model
    model = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=True,
        changepoint_prior_scale=0.05
    )
    
    # Add exogenous features if provided
    if exogenous_features:
        for feature in exogenous_features:
            model.add_regressor(feature)
            prophet_train[feature] = df[feature].iloc[:train_size].values
            prophet_test[feature] = df[feature].iloc[train_size:].values
    
    # Fit model
    model.fit(prophet_train)
    
    # Make predictions
    future = model.make_future_dataframe(periods=len(prophet_test), freq='H')
    
    # Add exogenous features to future dataframe
    if exogenous_features:
        for feature in exogenous_features:
            future[feature] = df[feature].values
    
    forecast = model.predict(future)
    
    # Get predictions for test period
    test_predictions = forecast.iloc[-len(prophet_test):]['yhat'].values
    
    # Evaluate model
    metrics = evaluate_model(prophet_test['y'].values, test_predictions, "Prophet")
    
    # Plot predictions
    fig = model.plot(forecast)
    plt.title('Prophet: Forecast vs Actual')
    plt.savefig('results/prophet_predictions.png')
    
    # Plot components
    fig = model.plot_components(forecast)
    plt.savefig('results/prophet_components.png')

In [None]:
    
    # Save model
    with open('models/prophet_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    
    return model, metrics

# Train all models
gb_model, gb_metrics = train_gradient_boosting(X_train, y_train, X_val, y_val)
xgb_model, xgb_metrics = train_xgboost(X_train, y_train, X_val, y_val)
lgb_model, lgb_metrics = train_lightgbm(X_train, y_train, X_val, y_val)
lstm_model, lstm_metrics = train_lstm_model(X_train_scaled, y_train, X_val_scaled, y_val)

# Train Prophet model with some exogenous features
exogenous_features = ['temperature', 'humidity', 'is_weekend']
prophet_model, prophet_metrics = train_prophet_model(final_df, exogenous_features=exogenous_features)


## Step 7: Model Comparison & Selection

In [None]:
# =====================================================================

def compare_models(metrics_list):
    """
    Compare different models and select the best one.
    """
    print("\nModel Comparison:")
    
    # Create comparison table
    comparison_df = pd.DataFrame(metrics_list)
    comparison_df = comparison_df.set_index('model_name')
    
    print(comparison_df)
    
    # Plot comparison
    plt.figure(figsize=(12, 10))
    
    # RMSE comparison
    plt.subplot(2, 2, 1)
    comparison_df['rmse'].plot(kind='bar')
    plt.title('RMSE Comparison')
    plt.ylabel('RMSE')
    plt.grid(axis='y')
    
    # MAE comparison
    plt.subplot(2, 2, 2)
    comparison_df['mae'].plot(kind='bar')
    plt.title('MAE Comparison')
    plt.ylabel('MAE')
    plt.grid(axis='y')
    
    # R² comparison
    plt.subplot(2, 2, 3)
    comparison_df['r2'].plot(kind='bar')
    plt.title('R² Comparison')
    plt.ylabel('R²')
    plt.grid(axis='y')
    
    # MAPE comparison
    plt.subplot(2, 2, 4)
    comparison_df['mape'].plot(kind='bar')
    plt.title('MAPE Comparison')
    plt.ylabel('MAPE (%)')
    plt.grid(axis='y')
    
    plt.tight_layout()
    plt.savefig('results/model_comparison.png')
    
    # Find best model based on RMSE
    best_model_name = comparison_df['rmse'].idxmin()

In [None]:
    print(f"\nThe best model based on RMSE is: {best_model_name}")
    
    return comparison_df, best_model_name

# Compare all models
metrics_list = [gb_metrics, xgb_metrics, lgb_metrics, lstm_metrics, prophet_metrics]
comparison_df, best_model_name = compare_models(metrics_list)


## Step 8: Best Model Evaluation on Test Set

In [None]:
# =====================================================================

def evaluate_best_model_on_test_set(best_model_name):
    """
    Evaluate the best model on the test set.
    """
    print(f"\nEvaluating {best_model_name} on test set...")
    
    # Load the appropriate model based on the name
    if best_model_name == "XGBoost":
        model = joblib.load('models/xgboost_model.pkl')
        test_predictions = model.predict(X_test)
        actual = y_test
    elif best_model_name == "LightGBM":
        model = joblib.load('models/lightgbm_model.pkl')
        test_predictions = model.predict(X_test)
        actual = y_test
    elif best_model_name == "Gradient Boosting":
        model = joblib.load('models/gradient_boosting_model.pkl')
        test_predictions = model.predict(X_test)
        actual = y_test
    elif best_model_name == "LSTM":
        model = tf.keras.models.load_model('models/lstm_model.h5')
        # Load sequence parameters
        with open('models/lstm_params.json', 'r') as f:
            lstm_params = json.load(f)
        
        time_steps = lstm_params['time_steps']
        X_test_seq, y_test_seq = create_sequences(X_test_scaled.values, y_test.values, time_steps)
        test_predictions = model.predict(X_test_seq).flatten()
        actual = y_test[time_steps:]
    elif best_model_name == "Prophet":
        with open('models/prophet_model.pkl', 'rb') as f:
            model = pickle.load(f)
        
        # This is simplified - in reality, you'd need proper data preparation for Prophet
        test_predictions = prophet_metrics['predictions'] if 'predictions' in prophet_metrics else None
        actual = y_test
    
    # Evaluate on test set
    test_metrics = evaluate_model(actual, test_predictions, f"{best_model_name} (Test Set)")
    
    # Plot test predictions
    plot_predictions(actual, test_predictions, f'{best_model_name}: Test Set Predictions', 'best_model_test_predictions.png')
    
    return test_metrics

# Evaluate the best model on the test set
best_model_test_metrics = evaluate_best_model_on_test_set(best_model_name)


## Step 9: Feature Importance Analysis

In [None]:
# =====================================================================

def analyze_feature_importance(best_model_name):
    """
    Analyze feature importance for the best model.
    """
    print("\nAnalyzing feature importance...")
    
    if best_model_name in ["XGBoost", "LightGBM", "Gradient Boosting"]:
        # Load the appropriate model
        if best_model_name == "XGBoost":
            model = joblib.load('models/xgboost_model.pkl')
        elif best_model_name == "LightGBM":
            model = joblib.load('models/lightgbm_model.pkl')
        else:  # Gradient Boosting
            model = joblib.load('models/gradient_boosting_model.pkl')
        
        # Get feature importance
        feature_importance = pd.DataFrame({
            'feature': X_train.columns,
            'importance': model.feature_importances_
        }).sort_values('importance', ascending=False)
        
        # Plot feature importance
        plt.figure(figsize=(14, 10))
        sns.barplot(x='importance', y='feature', data=feature_importance.head(20))
        plt.title(f'Feature Importance ({best_model_name})')
        plt.tight_layout()
        plt.savefig('results/best_model_feature_importance.png')
        
        print("\nTop 20 Important Features:")
        print(feature_importance.head(20))
        
        return feature_importance
    else:
        print(f"Feature importance analysis not implemented for {best_model_name}")
        return None

# Analyze feature importance
feature_importance = analyze_feature_importance(best_model_name)


## Step 10: Model Deployment

In [None]:
# =====================================================================

def create_prediction_pipeline(best_model_name):
    """
    Create a prediction pipeline for the best model.
    """
    print("\nCreating prediction pipeline...")
    
    class EnergyPredictionPipeline:
        """
        Energy prediction pipeline for real-time predictions.
        """
        def __init__(self, model_path, scaler_path=None, model_type=None, lstm_params_path=None):
            self.model_type = model_type
            
            # Load model
            if model_type in ["XGBoost", "LightGBM", "Gradient Boosting"]:
                self.model = joblib.load(model_path)
            elif model_type == "LSTM":
                self.model = tf.keras.models.load_model(model_path)
                # Load LSTM parameters
                with open(lstm_params_path, 'r') as f:
                    lstm_params = json.load(f)
                self.time_steps = lstm_params['time_steps']
                self.features = lstm_params['features']
            elif model_type == "Prophet":
                with open(model_path, 'rb') as f:
                    self.model = pickle.load(f)
            
            # Load scaler if needed
            if scaler_path and model_type in ["XGBoost", "LightGBM", "Gradient Boosting", "LSTM"]:
                self.scaler = joblib.load(scaler_path)
            
        def preprocess_data(self, df):
            """
            Preprocess input data.
            """
            # This should implement the same preprocessing steps as in the training pipeline
            # For demonstration, we'll assume df is already preprocessed
            return df
        
        def predict(self, df):
            """
            Make predictions using the loaded model.
            """
            # Preprocess data
            processed_df = self.preprocess_data(df)
            
            if self.model_type in ["XGBoost", "LightGBM", "Gradient Boosting"]:
                # Scale numerical features if needed

In [None]:
                if hasattr(self, 'scaler'):
                    numerical_features = processed_df.select_dtypes(include=['int64', 'float64']).columns
                    processed_df[numerical_features] = self.scaler.transform(processed_df[numerical_features])
                
                # Make predictions
                predictions = self.model.predict(processed_df)
                
            elif self.model_type == "LSTM":
                # Scale data
                numerical_features = processed_df.select_dtypes(include=['int64', 'float64']).columns
                scaled_data = self.scaler.transform(processed_df[numerical_features])
                
                # Create sequences
                sequences = []
                for i in range(len(scaled_data) - self.time_steps + 1):
                    sequences.append(scaled_data[i:i+self.time_steps])
                
                # Convert to numpy array
                sequences = np.array(sequences)
                
                # Make predictions
                predictions = self.model.predict(sequences).flatten()
                
                # Adjust predictions length to match input length
                # Pad with NaN at the beginning
                padding = np.full(self.time_steps - 1, np.nan)
                predictions = np.concatenate([padding, predictions])
                
            elif self.model_type == "Prophet":
                # Prepare data for Prophet
                prophet_df = processed_df.reset_index()[['timestamp']].copy()
                prophet_df.columns = ['ds']
                
                # Make predictions
                forecast = self.model.predict(prophet_df)
                predictions = forecast['yhat'].values
            
            return predictions
    
    # Create the appropriate pipeline based on the best model
    if best_model_name in ["XGBoost", "LightGBM", "Gradient Boosting"]:
        model_path = f'models/{best_model_name.lower().replace(" ", "_")}_model.pkl'
    elif best_model_name == "LSTM":
        model_path = 'models/lstm_model.h5'
    else:  # Prophet
        model_path = 'models/prophet_model.pkl'
    
    # Create the pipeline instance
    if best_model_name == "LSTM":
        pipeline = EnergyPredictionPipeline(

In [None]:
            model_path=model_path,
            scaler_path='models/scaler.pkl',
            model_type=best_model_name,
            lstm_params_path='models/lstm_params.json'
        )
    else:
        pipeline = EnergyPredictionPipeline(
            model_path=model_path,
            scaler_path='models/scaler.pkl',
            model_type=best_model_name
        )
    
    # Save the pipeline class definition
    with open('models/prediction_pipeline.py', 'w') as f:
        f.write("""
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
import pickle
import json

class EnergyPredictionPipeline:
    \"\"\"
    Energy prediction pipeline for real-time predictions.
    \"\"\"
    def __init__(self, model_path, scaler_path=None, model_type=None, lstm_params_path=None):
        self.model_type = model_type
        
        # Load model
        if model_type in ["XGBoost", "LightGBM", "Gradient Boosting"]:
            self.model = joblib.load(model_path)
        elif model_type == "LSTM":
            self.model = tf.keras.models.load_model(model_path)
            # Load LSTM parameters
            with open(lstm_params_path, 'r') as f:
                lstm_params = json.load(f)
            self.time_steps = lstm_params['time_steps']
            self.features = lstm_params['features']
        elif model_type == "Prophet":
            with open(model_path, 'rb') as f:
                self.model = pickle.load(f)
        
        # Load scaler if needed
        if scaler_path and model_type in ["XGBoost", "LightGBM", "Gradient Boosting", "LSTM"]:
            self.scaler = joblib.load(scaler_path)
        
    def preprocess_data(self, df):
        \"\"\"
        Preprocess input data.

In [None]:
        \"\"\"
        # This should implement the same preprocessing steps as in the training pipeline
        # For demonstration, we'll assume df is already preprocessed
        return df
    
    def predict(self, df):
        \"\"\"
        Make predictions using the loaded model.
        \"\"\"
        # Preprocess data
        processed_df = self.preprocess_data(df)
        
        if self.model_type in ["XGBoost", "LightGBM", "Gradient Boosting"]:
            # Scale numerical features if needed
            if hasattr(self, 'scaler'):
                numerical_features = processed_df.select_dtypes(include=['int64', 'float64']).columns
                processed_df[numerical_features] = self.scaler.transform(processed_df[numerical_features])
            
            # Make predictions
            predictions = self.model.predict(processed_df)
            
        elif self.model_type == "LSTM":
            # Scale data
            numerical_features = processed_df.select_dtypes(include=['int64', 'float64']).columns
            scaled_data = self.scaler.transform(processed_df[numerical_features])
            
            # Create sequences
            sequences = []
            for i in range(len(scaled_data) - self.time_steps + 1):
                sequences.append(scaled_data[i:i+self.time_steps])
            
            # Convert to numpy array
            sequences = np.array(sequences)
            
            # Make predictions
            predictions = self.model.predict(sequences).flatten()
            
            # Adjust predictions length to match input length
            # Pad with NaN at the beginning
            padding = np.full(self.time_steps - 1, np.nan)
            predictions = np.concatenate([padding, predictions])
            
        elif self.model_type == "Prophet":
            # Prepare data for Prophet
            prophet_df = processed_df.reset_index()[['timestamp']].copy()
            prophet_df.columns = ['ds']
            
            # Make predictions
            forecast = self.model.predict(prophet_df)
            predictions = forecast['yhat'].values

In [None]:
        
        return predictions

def load_pipeline():
    \"\"\"
    Load the prediction pipeline.
    \"\"\"
    best_model_name = "{}"
    
    if best_model_name in ["XGBoost", "LightGBM", "Gradient Boosting"]:
        model_path = f'models/{{best_model_name.lower().replace(" ", "_")}}_model.pkl'
    elif best_model_name == "LSTM":
        model_path = 'models/lstm_model.h5'
    else:  # Prophet
        model_path = 'models/prophet_model.pkl'
    
    if best_model_name == "LSTM":
        pipeline = EnergyPredictionPipeline(
            model_path=model_path,
            scaler_path='models/scaler.pkl',
            model_type=best_model_name,
            lstm_params_path='models/lstm_params.json'
        )
    else:
        pipeline = EnergyPredictionPipeline(
            model_path=model_path,
            scaler_path='models/scaler.pkl',
            model_type=best_model_name
        )
    
    return pipeline
""".format(best_model_name))

    print(f"Prediction pipeline created for {best_model_name} model.")
    
    return pipeline

# Create prediction pipeline
pipeline = create_prediction_pipeline(best_model_name)


## Step 11: Create Flask API for Deployment

In [None]:
# =====================================================================

def create_flask_api():
    """
    Create a Flask API for the energy prediction model.
    """
    print("\nCreating Flask API for deployment...")
    
    with open('app.py', 'w') as f:
        f.write("""
from flask import Flask, request, jsonify
import pandas as pd
import numpy as np
from datetime import datetime
import json
from models.prediction_pipeline import load_pipeline

app = Flask(__name__)

# Load the prediction pipeline
pipeline = load_pipeline()

@app.route('/health', methods=['GET'])
def health_check():
    return jsonify({'status': 'healthy'})

@app.route('/predict', methods=['POST'])
def predict():
    try:
        # Get input data
        data = request.json
        
        # Convert to DataFrame
        if 'features' in data:
            # Direct feature input
            df = pd.DataFrame(data['features'])
        elif 'timestamp' in data:
            # Time-based prediction
            timestamps = data['timestamp']
            if not isinstance(timestamps, list):
                timestamps = [timestamps]
            
            # Convert to datetime
            timestamps = [datetime.fromisoformat(ts.replace('Z', '+00:00')) for ts in timestamps]
            
            # Create DataFrame
            df = pd.DataFrame({'timestamp': timestamps})
            
            # Add weather data if provided
            if 'weather' in data:

In [None]:
                for key, values in data['weather'].items():
                    df[key] = values
        else:
            return jsonify({'error': 'Invalid input format'}), 400
        
        # Make predictions
        predictions = pipeline.predict(df)
        
        # Convert predictions to list
        predictions = predictions.tolist()
        
        # Create response
        response = {
            'predictions': predictions,
            'timestamp': df['timestamp'].astype(str).tolist() if 'timestamp' in df else None
        }
        
        return jsonify(response)
    
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/batch_predict', methods=['POST'])
def batch_predict():
    try:
        # Get input data
        file = request.files['file']
        
        # Read CSV file
        df = pd.read_csv(file)
        
        # Convert timestamp to datetime if present
        if 'timestamp' in df.columns:
            df['timestamp'] = pd.to_datetime(df['timestamp'])
        
        # Make predictions
        predictions = pipeline.predict(df)
        
        # Create response
        response = {
            'predictions': predictions.tolist(),
            'timestamp': df['timestamp'].astype(str).tolist() if 'timestamp' in df else None
        }
        
        return jsonify(response)
    
    except Exception as e:
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':

In [None]:
    app.run(debug=True, host='0.0.0.0', port=5000)
""")
    
    # Create requirements.txt file
    with open('requirements.txt', 'w') as f:
        f.write("""
numpy==1.24.3
pandas==2.0.3
scikit-learn==1.3.0
tensorflow==2.14.0
xgboost==1.7.6
lightgbm==4.0.0
prophet==1.1.4
matplotlib==3.7.2
seaborn==0.12.2
flask==2.3.3
gunicorn==21.2.0
joblib==1.3.2
""")
    
    # Create Dockerfile
    with open('Dockerfile', 'w') as f:
        f.write("""
FROM python:3.10-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

EXPOSE 5000

CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app"]
""")
    
    print("Flask API, requirements.txt, and Dockerfile created for deployment.")

# Create Flask API
create_flask_api()


## Step 12: Energy Savings Calculator

In [None]:
# =====================================================================

def create_energy_savings_calculator():
    """
    Create an energy savings calculator.
    """
    print("\nCreating energy savings calculator...")
    
    with open('energy_savings_calculator.py', 'w') as f:
        f.write("""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from models.prediction_pipeline import load_pipeline

class EnergySavingsCalculator:
    \"\"\"
    A calculator for estimating energy savings based on different scenarios.
    \"\"\"
    def __init__(self, pipeline=None):
        if pipeline is None:
            self.pipeline = load_pipeline()
        else:
            self.pipeline = pipeline
        
        # Default energy price ($/kWh)
        self.peak_price = 0.22
        self.off_peak_price = 0.12
        
        # Peak hours (7-9 AM and 5-8 PM)
        self.peak_hours = list(range(7, 10)) + list(range(17, 21))
    
    def calculate_baseline_consumption(self, df):
        \"\"\"
        Calculate baseline energy consumption.
        \"\"\"
        # Make predictions using the trained model
        baseline_predictions = self.pipeline.predict(df)
        
        # Calculate total consumption
        total_consumption = np.nansum(baseline_predictions)
        
        # Calculate cost
        cost = self._calculate_cost(df, baseline_predictions)
        
        return {
            'predictions': baseline_predictions,
            'total_consumption': total_consumption,
            'cost': cost
        }

In [None]:
    
    def calculate_scenario_consumption(self, df, scenario):

def calculate_scenario_consumption(self, df, scenario):
        """
        Calculate energy consumption for a specific scenario.
        
        Scenarios:
        - 'temperature_reduction': Reduce temperature by 2°C during summer months
        - 'smart_scheduling': Shift energy usage from peak to off-peak hours
        - 'efficient_appliances': Reduce overall consumption by 15%
        """
        # Create a copy of the input data
        scenario_df = df.copy()
        
        if scenario == 'temperature_reduction':
            # Only apply during summer months (6-9)
            if 'month' in scenario_df.columns:
                summer_mask = scenario_df['month'].isin([6, 7, 8, 9])
                if 'temperature' in scenario_df.columns:
                    # Reduce temperature setpoint by 2°C
                    scenario_df.loc[summer_mask, 'temperature'] += 2
        
        elif scenario == 'smart_scheduling':
            # Shift energy from peak to off-peak hours
            # This is simplified - in reality, you'd need to implement more complex logic
            if 'hour' in scenario_df.columns:
                peak_mask = scenario_df['hour'].isin(self.peak_hours)
                # Add a feature indicating smart scheduling is active
                scenario_df['smart_scheduling'] = peak_mask.astype(int)
        
        elif scenario == 'efficient_appliances':
            # For this scenario, we'll assume a post-prediction adjustment
            pass
        
        # Make predictions
        scenario_predictions = self.pipeline.predict(scenario_df)
        
        # Apply post-prediction adjustments
        if scenario == 'efficient_appliances':
            # 15% reduction in energy consumption
            scenario_predictions = scenario_predictions * 0.85
        elif scenario == 'smart_scheduling':
            # Shift 20% of peak consumption to off-peak
            if 'hour' in scenario_df.columns:
                peak_mask = scenario_df['hour'].isin(self.peak_hours)
                peak_reduction = scenario_predictions[peak_mask] * 0.2
                
                # Distribute this to off-peak hours
                off_peak_mask = ~peak_mask

In [None]:
                if np.sum(off_peak_mask) > 0:  # Make sure we have off-peak hours
                    avg_increase = np.sum(peak_reduction) / np.sum(off_peak_mask)
                    scenario_predictions[off_peak_mask] += avg_increase
                    scenario_predictions[peak_mask] -= peak_reduction
        
        # Calculate total consumption
        total_consumption = np.nansum(scenario_predictions)
        
        # Calculate cost
        cost = self._calculate_cost(scenario_df, scenario_predictions)
        
        return {
            'predictions': scenario_predictions,
            'total_consumption': total_consumption,
            'cost': cost
        }
    
    def _calculate_cost(self, df, predictions):
        """
        Calculate energy cost based on time-of-use pricing.
        """
        if 'hour' not in df.columns:
            # If hour is not available, use average price
            return np.nansum(predictions) * ((self.peak_price + self.off_peak_price) / 2)
        
        # Create peak/off-peak mask
        peak_mask = df['hour'].isin(self.peak_hours)
        
        # Calculate cost
        peak_cost = np.nansum(predictions[peak_mask]) * self.peak_price
        off_peak_cost = np.nansum(predictions[~peak_mask]) * self.off_peak_price
        
        return peak_cost + off_peak_cost
    
    def compare_scenarios(self, df, scenarios=None):
        """
        Compare different energy-saving scenarios.
        """
        if scenarios is None:
            scenarios = ['temperature_reduction', 'smart_scheduling', 'efficient_appliances']
        
        # Calculate baseline
        baseline = self.calculate_baseline_consumption(df)
        
        results = {'baseline': baseline}
        
        # Calculate scenarios
        for scenario in scenarios:
            results[scenario] = self.calculate_scenario_consumption(df, scenario)
        

In [None]:
        # Calculate savings
        for scenario in scenarios:
            consumption_savings = baseline['total_consumption'] - results[scenario]['total_consumption']
            cost_savings = baseline['cost'] - results[scenario]['cost']
            
            results[scenario]['consumption_savings'] = consumption_savings
            results[scenario]['cost_savings'] = cost_savings
            results[scenario]['consumption_savings_pct'] = (consumption_savings / baseline['total_consumption']) * 100
            results[scenario]['cost_savings_pct'] = (cost_savings / baseline['cost']) * 100
        
        return results
    
    def plot_comparison(self, results):
        """
        Plot comparison of different scenarios.
        """
        scenarios = list(results.keys())
        
        # Plot consumption comparison
        plt.figure(figsize=(12, 6))
        consumption_values = [results[s]['total_consumption'] for s in scenarios]
        
        plt.subplot(1, 2, 1)
        plt.bar(scenarios, consumption_values)
        plt.title('Total Energy Consumption')
        plt.ylabel('Energy (kWh)')
        plt.grid(axis='y')
        
        # Plot cost comparison
        plt.subplot(1, 2, 2)
        cost_values = [results[s]['cost'] for s in scenarios]
        
        plt.bar(scenarios, cost_values)
        plt.title('Total Energy Cost')
        plt.ylabel('Cost ($)')
        plt.grid(axis='y')
        
        plt.tight_layout()
        plt.savefig('results/scenario_comparison.png')
        
        # Plot savings percentages
        plt.figure(figsize=(12, 6))
        savings_scenarios = [s for s in scenarios if s != 'baseline']
        
        plt.subplot(1, 2, 1)
        consumption_savings = [results[s]['consumption_savings_pct'] for s in savings_scenarios]
        
        plt.bar(savings_scenarios, consumption_savings)
        plt.title('Energy Savings (%)')
        plt.ylabel('Savings (%)')

In [None]:
        plt.grid(axis='y')
        
        plt.subplot(1, 2, 2)
        cost_savings = [results[s]['cost_savings_pct'] for s in savings_scenarios]
        
        plt.bar(savings_scenarios, cost_savings)
        plt.title('Cost Savings (%)')
        plt.ylabel('Savings (%)')
        plt.grid(axis='y')
        
        plt.tight_layout()
        plt.savefig('results/savings_comparison.png')
        
        return plt
""")
    
    print("Energy savings calculator created.")

# Create energy savings calculator
create_energy_savings_calculator()


## Step 13: Create Dashboard Integration Example

In [None]:
# =====================================================================

def create_dashboard_integration():
    """
    Create a simple dashboard integration example.
    """
    print("\nCreating dashboard integration example...")
    
    with open('dashboard_integration.py', 'w') as f:
        f.write("""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
from datetime import datetime, timedelta
from models.prediction_pipeline import load_pipeline
from energy_savings_calculator import EnergySavingsCalculator

class EnergyDashboard:
    \"\"\"
    A simple dashboard integration example.
    \"\"\"
    def __init__(self):
        # Load pipeline
        self.pipeline = load_pipeline()
        
        # Create energy savings calculator
        self.calculator = EnergySavingsCalculator(self.pipeline)
    
    def get_recent_data(self, days=7):
        \"\"\"
        Get recent energy and weather data.
        This is a simulation - in a real scenario, this would fetch actual data.
        \"\"\"
        # Create date range
        end_date = datetime.now()
        start_date = end_date - timedelta(days=days)
        
        # Create hourly timestamps
        timestamps = pd.date_range(start=start_date, end=end_date, freq='1h')
        
        # Create DataFrame
        df = pd.DataFrame({'timestamp': timestamps})
        
        # Add datetime features
        df['hour'] = df['timestamp'].dt.hour
        df['day_of_week'] = df['timestamp'].dt.dayofweek
        df['month'] = df['timestamp'].dt.month
        df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)

In [None]:
        
        # Add simulated weather data
        df['temperature'] = 20 + 5 * np.sin(np.linspace(0, days * 2 * np.pi, len(df)))
        df['humidity'] = 60 + 20 * np.random.rand(len(df))
        df['wind_speed'] = 5 + 3 * np.random.rand(len(df))
        df['cloud_cover'] = 50 + 30 * np.random.rand(len(df))
        df['precipitation'] = np.random.exponential(scale=0.5, size=len(df)) * np.random.choice([0, 1], size=len(df), p=[0.8, 0.2])
        
        # Add derived weather features
        df['feels_like'] = df['temperature'] - 0.4 * (df['wind_speed'] / 10) * (33 - df['temperature'])
        df['heat_index'] = df['temperature'] + 0.05 * df['humidity']
        
        # Create weather condition categories
        df['is_rainy'] = (df['precipitation'] > 0.5).astype(int)
        df['is_windy'] = (df['wind_speed'] > 10).astype(int)
        df['is_humid'] = (df['humidity'] > 70).astype(int)
        df['is_cold'] = (df['temperature'] < 10).astype(int)
        df['is_hot'] = (df['temperature'] > 25).astype(int)
        
        return df
    
    def get_predictions(self, df=None, days_ahead=1):
        \"\"\"
        Get energy consumption predictions.
        \"\"\"
        if df is None:
            df = self.get_recent_data()
        
        # Make predictions for current data
        current_predictions = self.pipeline.predict(df)
        
        # Create future data for forecasting
        last_timestamp = df['timestamp'].iloc[-1]
        future_start = last_timestamp + timedelta(hours=1)
        future_end = future_start + timedelta(days=days_ahead)
        
        future_timestamps = pd.date_range(start=future_start, end=future_end, freq='1h')
        future_df = pd.DataFrame({'timestamp': future_timestamps})
        
        # Add datetime features
        future_df['hour'] = future_df['timestamp'].dt.hour
        future_df['day_of_week'] = future_df['timestamp'].dt.dayofweek
        future_df['month'] = future_df['timestamp'].dt.month
        future_df['is_weekend'] = future_df['day_of_week'].isin([5, 6]).astype(int)
        
        # Add simulated weather data (simplified forecast)
        future_df['temperature'] = 20 + 5 * np.sin(np.linspace(0, days_ahead * 2 * np.pi, len(future_df)))
        future_df['humidity'] = 60 + 20 * np.random.rand(len(future_df))
        future_df['wind_speed'] = 5 + 3 * np.random.rand(len(future_df))
        future_df['cloud_cover'] = 50 + 30 * np.random.rand(len(future_df))

In [None]:
        future_df['precipitation'] = np.random.exponential(scale=0.5, size=len(future_df)) * np.random.choice([0, 1], size=len(future_df), p=[0.8, 0.2])
        
        # Add derived weather features
        future_df['feels_like'] = future_df['temperature'] - 0.4 * (future_df['wind_speed'] / 10) * (33 - future_df['temperature'])
        future_df['heat_index'] = future_df['temperature'] + 0.05 * future_df['humidity']
        
        # Create weather condition categories
        future_df['is_rainy'] = (future_df['precipitation'] > 0.5).astype(int)
        future_df['is_windy'] = (future_df['wind_speed'] > 10).astype(int)
        future_df['is_humid'] = (future_df['humidity'] > 70).astype(int)
        future_df['is_cold'] = (future_df['temperature'] < 10).astype(int)
        future_df['is_hot'] = (future_df['temperature'] > 25).astype(int)
        
        # Make future predictions
        future_predictions = self.pipeline.predict(future_df)
        
        return {
            'current': {
                'data': df,
                'predictions': current_predictions
            },
            'future': {
                'data': future_df,
                'predictions': future_predictions
            }
        }
    
    def get_savings_opportunities(self, df=None):
        \"\"\"
        Get energy savings opportunities.
        \"\"\"
        if df is None:
            df = self.get_recent_data()
            
        # Extend data for future predictions
        future_days = 30  # One month forecast
        predictions = self.get_predictions(df, days_ahead=future_days)
        future_df = predictions['future']['data']
        
        # Compare different scenarios
        scenarios = ['temperature_reduction', 'smart_scheduling', 'efficient_appliances']
        results = self.calculator.compare_scenarios(future_df, scenarios)
        
        # Create summary
        summary = {}
        for scenario in scenarios:
            summary[scenario] = {
                'consumption_savings_kwh': float(results[scenario]['consumption_savings']),
                'consumption_savings_pct': float(results[scenario]['consumption_savings_pct']),
                'cost_savings_usd': float(results[scenario]['cost_savings']),

In [None]:
                'cost_savings_pct': float(results[scenario]['cost_savings_pct'])
            }
        
        # Plot comparison
        self.calculator.plot_comparison(results)
        
        return summary
    
    def get_dashboard_data(self):
        \"\"\"
        Get all data needed for the dashboard.
        \"\"\"
        # Get recent data
        recent_data = self.get_recent_data(days=7)
        
        # Get predictions
        predictions = self.get_predictions(recent_data, days_ahead=7)
        
        # Get savings opportunities
        savings = self.get_savings_opportunities(recent_data)
        
        # Calculate current day's consumption
        today = datetime.now().date()
        today_mask = recent_data['timestamp'].dt.date == today
        today_data = recent_data[today_mask]
        
        if len(today_data) > 0:
            today_predictions = predictions['current']['predictions'][today_mask]
            today_consumption = np.nansum(today_predictions)
        else:
            today_consumption = 0
        
        # Calculate yesterday's consumption
        yesterday = today - timedelta(days=1)
        yesterday_mask = recent_data['timestamp'].dt.date == yesterday
        yesterday_data = recent_data[yesterday_mask]
        
        if len(yesterday_data) > 0:
            yesterday_predictions = predictions['current']['predictions'][yesterday_mask]
            yesterday_consumption = np.nansum(yesterday_predictions)
        else:
            yesterday_consumption = 0
        
        # Calculate week's consumption
        week_consumption = np.nansum(predictions['current']['predictions'])
        
        # Calculate month-to-date consumption
        month_start = datetime(today.year, today.month, 1).date()
        month_mask = recent_data['timestamp'].dt.date >= month_start
        month_data = recent_data[month_mask]

In [None]:
        
        if len(month_data) > 0:
            month_predictions = predictions['current']['predictions'][month_mask]
            month_consumption = np.nansum(month_predictions)
        else:
            month_consumption = 0
        
        # Create dashboard data
        dashboard_data = {
            'current_stats': {
                'today_consumption': float(today_consumption),
                'yesterday_consumption': float(yesterday_consumption),
                'week_consumption': float(week_consumption),
                'month_consumption': float(month_consumption),
                'day_over_day_change': float((today_consumption - yesterday_consumption) / yesterday_consumption * 100) if yesterday_consumption > 0 else 0
            },
            'predictions': {
                'timestamps': predictions['future']['data']['timestamp'].astype(str).tolist(),
                'values': predictions['future']['predictions'].tolist()
            },
            'recent': {
                'timestamps': predictions['current']['data']['timestamp'].astype(str).tolist(),
                'values': predictions['current']['predictions'].tolist()
            },
            'savings_opportunities': savings
        }
        
        # Save dashboard data
        with open('results/dashboard_data.json', 'w') as f:
            json.dump(dashboard_data, f, indent=2)
        
        return dashboard_data
    
    def plot_dashboard_preview(self):
        \"\"\"
        Create a preview of the dashboard.
        \"\"\"
        # Get dashboard data
        data = self.get_dashboard_data()
        
        # Create figure
        plt.figure(figsize=(15, 12))
        
        # Plot recent consumption
        plt.subplot(3, 1, 1)
        recent_timestamps = pd.to_datetime(data['recent']['timestamps'])
        plt.plot(recent_timestamps, data['recent']['values'])
        plt.title('Recent Energy Consumption')
        plt.xlabel('Date')
        plt.ylabel('Energy (kWh)')

In [None]:
        plt.grid(True)
        
        # Plot predicted consumption
        plt.subplot(3, 1, 2)
        future_timestamps = pd.to_datetime(data['predictions']['timestamps'])
        plt.plot(future_timestamps, data['predictions']['values'], 'r--')
        plt.title('Forecasted Energy Consumption')
        plt.xlabel('Date')
        plt.ylabel('Energy (kWh)')
        plt.grid(True)
        
        # Plot savings opportunities
        plt.subplot(3, 1, 3)
        scenarios = list(data['savings_opportunities'].keys())
        savings_values = [data['savings_opportunities'][s]['cost_savings_usd'] for s in scenarios]
        
        plt.bar(scenarios, savings_values)
        plt.title('Monthly Savings Opportunities ($)')
        plt.ylabel('Savings ($)')
        plt.grid(axis='y')
        
        plt.tight_layout()
        plt.savefig('results/dashboard_preview.png')
        
        return plt

# Create a function to generate the dashboard
def generate_dashboard():
    dashboard = EnergyDashboard()
    data = dashboard.get_dashboard_data()
    dashboard.plot_dashboard_preview()
    print(f"Dashboard data saved to 'results/dashboard_data.json'")
    print(f"Dashboard preview saved to 'results/dashboard_preview.png'")
    return data

if __name__ == "__main__":
    generate_dashboard()
""")
    
    print("Dashboard integration example created.")

# Create dashboard integration
create_dashboard_integration()


## Step 14: Create a README file

In [None]:
# =====================================================================

def create_readme():
    """
    Create a README file for the project.
    """
    print("\nCreating README file...")
    
    with open('README.md', 'w') as f:
        f.write("""
# Energy Consumption Prediction System

A comprehensive system for predicting and managing household energy consumption using machine learning and time series forecasting techniques.

## Overview

This project implements a complete end-to-end solution for energy consumption prediction, from data collection and preprocessing to model training, evaluation, and deployment. The system helps households manage and reduce their energy usage by providing accurate forecasts and actionable insights.

## Features

- **Data Collection**: Simulated smart meter and weather data (extensible to real API integrations)
- **Data Preprocessing**: Handling missing values, outliers, and feature engineering
- **Model Training**: Multiple models implemented (Gradient Boosting, XGBoost, LightGBM, LSTM, Prophet)
- **Model Evaluation**: Comprehensive performance metrics and visualizations
- **Deployment**: Flask API for real-time predictions
- **Energy Savings Calculator**: Tools to estimate potential savings from different strategies
- **Dashboard Integration**: Example code for integration with visualization dashboards

## Project Structure

```
├── app.py                     # Flask API for model deployment
├── Dockerfile                 # Docker configuration for deployment
├── requirements.txt           # Python dependencies
├── README.md                  # Project documentation
├── data/                      # Data storage
│   ├── raw_energy_data.csv    # Raw energy consumption data
│   ├── raw_weather_data.csv   # Raw weather data
│   └── processed_data.csv     # Processed and merged dataset
├── models/                    # Trained models and pipelines
│   ├── gradient_boosting_model.pkl
│   ├── xgboost_model.pkl
│   ├── lightgbm_model.pkl
│   ├── lstm_model.h5
│   ├── prophet_model.pkl
│   ├── scaler.pkl             # Feature scaler
│   ├── lstm_params.json       # LSTM parameters
│   └── prediction_pipeline.py # Prediction pipeline class
└── results/                   # Visualizations and analysis results
    ├── energy_consumption_time_series.png

In [None]:
    ├── hourly_energy_pattern.png
    ├── weekly_energy_pattern.png
    ├── monthly_energy_pattern.png
    ├── energy_vs_temperature.png
    ├── correlation_matrix.png
    ├── weekend_vs_weekday.png
    ├── gb_predictions.png
    ├── xgb_predictions.png
    ├── lgb_predictions.png
    ├── lstm_predictions.png
    ├── prophet_predictions.png
    ├── model_comparison.png
    ├── best_model_test_predictions.png
    ├── best_model_feature_importance.png
    ├── scenario_comparison.png
    ├── savings_comparison.png
    ├── dashboard_preview.png
    └── dashboard_data.json
```

## Installation and Setup

1. Clone the repository:
```bash
git clone https://github.com/yourusername/energy-consumption-prediction.git
cd energy-consumption-prediction
```

2. Install dependencies:
```bash
pip install -r requirements.txt
```

3. Run the Jupyter notebook to train models and generate results:
```bash
jupyter notebook energy_consumption_prediction.ipynb
```

4. Start the Flask API:
```bash
python app.py
```

5. Or build and run with Docker:
```bash
docker build -t energy-prediction .
docker run -p 5000:5000 energy-prediction
```

## Usage

In [None]:

### Making Predictions with the API

```python
import requests
import json

# Single prediction
data = {
    'timestamp': '2023-04-10T12:00:00Z',
    'weather': {
        'temperature': 22.5,
        'humidity': 65.0,
        'wind_speed': 8.2,
        'cloud_cover': 40.0,
        'precipitation': 0.0
    }
}

response = requests.post('http://localhost:5000/predict', json=data)
predictions = response.json()
print(predictions)

# Batch prediction
files = {'file': open('path/to/your/input_data.csv', 'rb')}
response = requests.post('http://localhost:5000/batch_predict', files=files)
batch_predictions = response.json()
print(batch_predictions)
```

### Using the Energy Savings Calculator

```python
from energy_savings_calculator import EnergySavingsCalculator
import pandas as pd

# Load your data
data = pd.read_csv('your_energy_data.csv')

# Initialize calculator
calculator = EnergySavingsCalculator()

# Compare different scenarios
results = calculator.compare_scenarios(data)

# Plot comparison
calculator.plot_comparison(results)
```

### Dashboard Integration

In [None]:

```python
from dashboard_integration import generate_dashboard

# Generate dashboard data and preview
dashboard_data = generate_dashboard()
print(dashboard_data['current_stats'])
```

## Model Performance

The system trains and evaluates multiple models:

1. Gradient Boosting Regressor
2. XGBoost
3. LightGBM
4. LSTM Neural Network
5. Prophet

Performance metrics including RMSE, MAE, R², and MAPE are calculated for each model, and the best-performing model is selected for deployment.

## Energy Saving Strategies

The system evaluates the impact of various energy-saving strategies:

1. **Temperature Reduction**: Adjusting temperature settings during summer months
2. **Smart Scheduling**: Shifting energy usage from peak to off-peak hours
3. **Efficient Appliances**: Reducing overall consumption through more efficient devices

## License

This project is licensed under the MIT License - see the LICENSE file for details.

## Acknowledgments

- Data generation based on typical household consumption patterns
- Weather patterns based on typical seasonal variations
- Inspired by real-world energy management systems
""")
    
    print("README file created.")

# Create README
create_readme()

# Run the main analysis
# =====================================================================

print("\nEnergy Consumption Prediction System Created Successfully!")
print("\nProject Structure:")

In [None]:
print("- data/: Contains raw and processed data")
print("- models/: Contains trained models and prediction pipeline")
print("- results/: Contains visualizations and analysis results")
print("- app.py: Flask API for model deployment")
print("- energy_savings_calculator.py: Tool for estimating energy savings")
print("- dashboard_integration.py: Example code for dashboard integration")
print("- README.md: Project documentation")
print("- requirements.txt: Python dependencies")
print("- Dockerfile: Docker configuration for deployment")

print("\nThis notebook implements a comprehensive energy consumption prediction system")
print("that can be used for household energy management and savings.")


