In [None]:
# ============================
# 1️⃣ PREPROCESSING CONSUMPTION DATA (Per Household)
# ============================

# Get all household CSV files
csv_files = glob.glob("Consumer*-Table 1.csv")

# Convert 'Periods' to hours and minutes for better learning
def convert_periods_to_time(period):
    return (period - 1) * 15  # Convert to minutes

# Dictionary to store trained models
household_models = {}

for file in csv_files:
    # Load each household dataset
    df = pd.read_csv(file)

    # Drop 'Unnamed: 0' if it exists
    if 'Unnamed: 0' in df.columns:
        df.drop(columns=['Unnamed: 0'], inplace=True)

    # Extract household ID from filename
    household_id = file.split("-")[0]

    # Convert 'Periods' to hour and minute features
    df['Minutes'] = df['Periods'].apply(convert_periods_to_time)
    df['Hour'] = df['Minutes'] // 60
    df['Minute'] = df['Minutes'] % 60

    # Drop 'Minutes' as we already extracted hours & minutes
    df.drop(columns=['Minutes'], inplace=True)

    # Handle missing values
    df.fillna(0, inplace=True)

    # Convert necessary columns to numeric
    for col in df.columns:
        if col not in ['Sheet_Name', 'Periods']:
            df[col] = pd.to_numeric(df[col], errors='coerce')

    # Features & Target
    X = df.drop(columns=['Total Consumption'])
    y = df['Total Consumption']

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    # print(X.columns)
    model.fit(X_train, y_train)

    # Store trained model
    household_models[household_id] = model

    # Predictions & evaluation
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    print(f"✅ Household {household_id} Model Trained | MAE: {mae:.4f} | RMSE: {rmse:.4f}")

print("\n✅ All household consumption models trained!")

✅ Household Consumer1 Model Trained | MAE: 0.5587 | RMSE: 0.8108
✅ Household Consumer2 Model Trained | MAE: 0.3003 | RMSE: 0.6177
✅ Household Consumer3 Model Trained | MAE: 0.1332 | RMSE: 0.2704
✅ Household Consumer4 Model Trained | MAE: 0.4479 | RMSE: 0.9022
✅ Household Consumer5 Model Trained | MAE: 0.6353 | RMSE: 0.9133
✅ Household Consumer6 Model Trained | MAE: 0.1758 | RMSE: 0.2485
✅ Household Consumer8 Model Trained | MAE: 0.1037 | RMSE: 0.1658
✅ Household Consumer7 Model Trained | MAE: 0.5965 | RMSE: 0.8775
✅ Household Consumer9 Model Trained | MAE: 0.7522 | RMSE: 1.1000
✅ Household Consumer10 Model Trained | MAE: 0.1678 | RMSE: 0.2407
✅ Household Consumer49 Model Trained | MAE: 0.2059 | RMSE: 0.2967
✅ Household Consumer50 Model Trained | MAE: 0.1714 | RMSE: 0.2391

✅ All household consumption models trained!


In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Change directory to dataset location
import os
os.chdir('/content/drive/My Drive/Zenodo')

import pandas as pd
import glob
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

Mounted at /content/drive


In [None]:
print(household_models)

{'Consumer1': RandomForestRegressor(random_state=42), 'Consumer2': RandomForestRegressor(random_state=42), 'Consumer3': RandomForestRegressor(random_state=42), 'Consumer4': RandomForestRegressor(random_state=42), 'Consumer5': RandomForestRegressor(random_state=42), 'Consumer6': RandomForestRegressor(random_state=42), 'Consumer8': RandomForestRegressor(random_state=42), 'Consumer7': RandomForestRegressor(random_state=42), 'Consumer9': RandomForestRegressor(random_state=42), 'Consumer10': RandomForestRegressor(random_state=42), 'Consumer49': RandomForestRegressor(random_state=42), 'Consumer50': RandomForestRegressor(random_state=42)}


In [None]:
# ============================
# 2️⃣ PREPROCESSING SOLAR GENERATION DATA
# ============================

# Load solar production dataset
solar_df = pd.read_csv('/content/Solar_Energy_Generation.csv')

# Convert timestamp to datetime format
solar_df['Timestamp'] = pd.to_datetime(solar_df['Timestamp'])

# Extract useful time-based features
solar_df['Hour'] = solar_df['Timestamp'].dt.hour
solar_df['Day'] = solar_df['Timestamp'].dt.day
solar_df['Month'] = solar_df['Timestamp'].dt.month

# Select only required columns
solar_df = solar_df[['Hour', 'Day', 'Month', 'SolarGeneration']]

# Train-test split
X_solar = solar_df.drop(columns=['SolarGeneration'])
y_solar = solar_df['SolarGeneration']
X_train_solar, X_test_solar, y_train_solar, y_test_solar = train_test_split(X_solar, y_solar, test_size=0.2, random_state=42)

X_train_solar = X_train_solar[~y_train_solar.isna()]
y_train_solar = y_train_solar.dropna()
X_test_solar = X_test_solar[~y_test_solar.isna()]
y_test_solar = y_test_solar.dropna()

total_rows = len(X_train_solar)  # Number of rows in X_train_solar
print(f"Total number of rows: {total_rows}")
print(y_train_solar.isna().sum())  # Number of NaN values in y_train_solar

Total number of rows: 956173
0


In [None]:
# Train Random Forest model for solar generation
solar_model = RandomForestRegressor(n_estimators=100, random_state=42)
solar_model.fit(X_train_solar, y_train_solar)

In [None]:
# Predictions & evaluation
y_pred_solar = solar_model.predict(X_test_solar)
mae_solar = mean_absolute_error(y_test_solar, y_pred_solar)
rmse_solar = np.sqrt(mean_squared_error(y_test_solar, y_pred_solar))

print(f"\n✅ Solar Generation Model Trained | MAE: {mae_solar:.4f} | RMSE: {rmse_solar:.4f}")


✅ Solar Generation Model Trained | MAE: 5.4271 | RMSE: 10.4294


In [None]:
# ============================
# 3️⃣ ENERGY DISTRIBUTION MODEL
# ============================

def predict_energy_distribution(household_id, period, weather_forecast):
    """
    Predict energy sent to the house, backup, and grid.

    :param household_id: Household identifier (e.g., Consumer1)
    :param period: Period (1-96) for 15-minute interval
    :param weather_forecast: Expected weather condition (0 = Sunny, 1 = Cloudy)
    :return: (house_energy, backup_energy, grid_energy)
    """
    if household_id not in household_models:
        raise ValueError(f"Household ID {household_id} not found!")

    # Predict household consumption
    period_in_minutes = convert_periods_to_time(period)
    hour = period_in_minutes // 60
    minute = period_in_minutes % 60
    # consumption_features = np.array([[period, hour, minute]])  # Adjust if more features are needed
    consumption_features = np.array([[period,0,0,0.648033333333333,0,0,0,0,0,0,0,hour,minute]])  # Adjust if more features are needed
    predicted_consumption = household_models[household_id].predict(consumption_features)[0]

    # Predict solar generation
    solar_features = np.array([[hour, 15, 6]])  # Assume day 15 of June (Example)
    predicted_solar = solar_model.predict(solar_features)[0]

    # Energy distribution logic
    if weather_forecast == 1:  # Cloudy
        backup_energy = predicted_solar * 0.6  # Store 60% in backup
    else:  # Sunny
        backup_energy = predicted_solar * 0.3  # Store 30% in backup

    house_energy = min(predicted_consumption, predicted_solar - backup_energy)
    grid_energy = max(0, predicted_solar - (house_energy + backup_energy))

    return house_energy, backup_energy, grid_energy

# Example Usage
house_energy, backup_energy, grid_energy = predict_energy_distribution("Consumer1", 40, weather_forecast=0)
print("\n🔹 Energy Allocation Results")
print(f"🏠 House Energy: {house_energy:.2f} kWh")
print(f"🔋 Backup Energy: {backup_energy:.2f} kWh")
print(f"⚡ Grid Energy: {grid_energy:.2f} kWh")


🔹 Energy Allocation Results
🏠 House Energy: 1.15 kWh
🔋 Backup Energy: 0.70 kWh
⚡ Grid Energy: 0.47 kWh


