In [13]:
# faker.py
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def generate_household_data():
    # Set random seed for reproducibility
    np.random.seed(42)
    
    # Constants
    BASE_RATE = 0.14  # $0.14 per kWh
    WEEKS = 110
    start_date = datetime(2023, 1, 1)
    
    # Household characteristics (base values)
    households = {
        'H1': {'base_usage': 800, 'base_generation': 600, 'usage_variance': 100},  # Average household
        'H2': {'base_usage': 1200, 'base_generation': 800, 'usage_variance': 150}, # High consumer
        'H3': {'base_usage': 600, 'base_generation': 900, 'usage_variance': 80},   # Efficient with high solar
        'H4': {'base_usage': 1000, 'base_generation': 700, 'usage_variance': 120}, # Large family
        'H5': {'base_usage': 700, 'base_generation': 1000, 'usage_variance': 90}   # Solar enthusiast
    }
    
    def get_seasonal_multiplier(date):
        # Month-based season detection
        month = date.month
        if month in [6, 7, 8]:  # Summer
            return 1.4
        elif month in [3, 4, 5]:  # Spring
            return 1.0
        elif month in [9, 10, 11]:  # Fall
            return 0.8
        else:  # Winter
            return 0.6
    
    all_data = []
    
    for household_id, characteristics in households.items():
        for week in range(WEEKS):
            current_date = start_date + timedelta(weeks=week)
            seasonal_mult = get_seasonal_multiplier(current_date)
            
            # Add weekly pattern (more usage on weekends)
            weekly_mult = 1.2 if current_date.weekday() >= 5 else 1.0
            
            # Calculate energy values with seasonal and weekly patterns
            energy_used = np.random.normal(
                characteristics['base_usage'] * weekly_mult,
                characteristics['usage_variance']
            )
            
            # Generation affected by seasonal patterns
            energy_generated = np.random.normal(
                characteristics['base_generation'] * seasonal_mult,
                characteristics['base_generation'] * 0.1
            )
            
            # Ensure non-negative values
            energy_used = max(0, energy_used)
            energy_generated = max(0, energy_generated)
            
            # Calculate energy bought and sold
            energy_bought = max(0, energy_used - energy_generated)
            energy_sold = max(0, energy_generated - energy_used)
            
            # Calculate expenditure with random rate fluctuation
            current_rate = BASE_RATE * (1 + np.random.uniform(-0.05, 0.05))
            expenditure = round(
                energy_bought * current_rate - 
                energy_sold * (current_rate * 0.8),  # Selling at 80% of buying rate
                2
            )
            
            all_data.append({
                'HouseholdID': household_id,
                'Date': current_date.strftime('%Y-%m-%d'),
                'EnergyUsed': round(energy_used, 2),
                'EnergyGeneratedFromRenewableSources': round(energy_generated, 2),
                'EnergyBought': round(energy_bought, 2),
                'EnergySold': round(energy_sold, 2),
                'TotalExpenditure': expenditure
            })
    
    # Convert to DataFrame and sort by date
    df = pd.DataFrame(all_data)
    df = df.sort_values(['Date', 'HouseholdID']).reset_index(drop=True)
    return df

# Generate and save data
if __name__ == "__main__":
    data = generate_household_data()
    data.to_csv('../data/sample_household_data.csv', index=False)
    print("Data generated successfully!")


Data generated successfully!


In [1]:
import sys
import os
from pathlib import Path

# Add parent directory to Python path
notebook_dir = Path(os.getcwd())
project_dir = notebook_dir.parent
sys.path.append(str(project_dir))

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import sys
import os
from src.data_processor import DataProcessor

# Load and process data
processor = DataProcessor()
df = processor.load_household_data('../data/sample_household_data.csv')

# 1. Energy Usage vs Generation Plot
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=df['Date'], 
                         y=df['EnergyUsed'], 
                         name='Energy Used',
                         line=dict(color='red')))
fig1.add_trace(go.Scatter(x=df['Date'], 
                         y=df['EnergyGeneratedFromRenewableSources'], 
                         name='Energy Generated',
                         line=dict(color='green')))
fig1.update_layout(
    title='Weekly Energy Usage vs Generation',
    xaxis_title='Date',
    yaxis_title='Energy (kWh)',
    hovermode='x unified'
)
fig1.show()

# 2. Energy Delta Analysis
df['Delta'] = processor.calculate_delta(
    df['EnergyGeneratedFromRenewableSources'], 
    df['EnergyUsed']
)
fig2 = go.Figure()
fig2.add_trace(go.Scatter(x=df['Date'], 
                         y=df['Delta'],
                         line=dict(color='blue')))
fig2.update_layout(
    title='Energy Surplus/Deficit Over Time',
    xaxis_title='Date',
    yaxis_title='Energy Delta (kWh)',
    hovermode='x'
)
fig2.add_hline(y=0, line_dash="dash", line_color="red")
fig2.show()

# 3. Financial Analysis
fig3 = go.Figure()
fig3.add_trace(go.Scatter(x=df['Date'], 
                         y=df['TotalExpenditure'],
                         line=dict(color='purple')))
fig3.update_layout(
    title='Weekly Energy Expenditure',
    xaxis_title='Date',
    yaxis_title='Expenditure ($)',
    hovermode='x'
)
fig3.add_hline(y=0, line_dash="dash", line_color="gray")
fig3.show()

# 4. Energy Trading Analysis
fig4 = go.Figure()
fig4.add_trace(go.Scatter(x=df['Date'], 
                         y=df['EnergyBought'],
                         name='Energy Bought',
                         line=dict(color='red')))
fig4.add_trace(go.Scatter(x=df['Date'], 
                         y=df['EnergySold'],
                         name='Energy Sold',
                         line=dict(color='green')))
fig4.update_layout(
    title='Weekly Energy Trading Activity',
    xaxis_title='Date',
    yaxis_title='Energy (kWh)',
    hovermode='x unified'
)
fig4.show()

# 5. Calculate and display key statistics
stats = {
    'Average Weekly Usage (kWh)': df['EnergyUsed'].mean(),
    'Average Weekly Generation (kWh)': df['EnergyGeneratedFromRenewableSources'].mean(),
    'Average Weekly Expenditure ($)': df['TotalExpenditure'].mean(),
    'Total Net Expenditure ($)': df['TotalExpenditure'].sum(),
    'Weeks as Seller': (df['Delta'] > 0).sum(),
    'Weeks as Buyer': (df['Delta'] < 0).sum()
}

print("\nKey Statistics:")
for key, value in stats.items():
    print(f"{key}: {value:.2f}")

# 6. Household Comparison Analysis
# Energy Usage Comparison
fig5 = go.Figure()
for household in df['HouseholdID'].unique():
    household_data = df[df['HouseholdID'] == household]
    fig5.add_trace(go.Scatter(
        x=household_data['Date'],
        y=household_data['EnergyUsed'],
        name=f'{household} Usage',
        mode='lines'
    ))
fig5.update_layout(
    title='Energy Usage Comparison Across Households',
    xaxis_title='Date',
    yaxis_title='Energy Usage (kWh)',
    hovermode='x unified'
)
fig5.show()

# 7. Generation Capacity Comparison
fig6 = go.Figure()
for household in df['HouseholdID'].unique():
    household_data = df[df['HouseholdID'] == household]
    fig6.add_trace(go.Scatter(
        x=household_data['Date'],
        y=household_data['EnergyGeneratedFromRenewableSources'],
        name=f'{household} Generation',
        mode='lines'
    ))
fig6.update_layout(
    title='Renewable Energy Generation Comparison',
    xaxis_title='Date',
    yaxis_title='Energy Generated (kWh)',
    hovermode='x unified'
)
fig6.show()

# 8. Net Trading Position
fig7 = go.Figure()
for household in df['HouseholdID'].unique():
    household_data = df[df['HouseholdID'] == household]
    net_position = household_data['EnergySold'] - household_data['EnergyBought']
    fig7.add_trace(go.Scatter(
        x=household_data['Date'],
        y=net_position,
        name=f'{household} Net Position',
        mode='lines'
    ))
fig7.update_layout(
    title='Net Trading Position by Household',
    xaxis_title='Date',
    yaxis_title='Net Energy Position (kWh)',
    hovermode='x unified'
)
fig7.add_hline(y=0, line_dash="dash", line_color="gray")
fig7.show()

# 9. Household Statistics Table
household_stats = df.groupby('HouseholdID').agg({
    'EnergyUsed': 'mean',
    'EnergyGeneratedFromRenewableSources': 'mean',
    'TotalExpenditure': ['mean', 'sum'],
    'EnergySold': 'sum',
    'EnergyBought': 'sum'
}).round(2)

print("\nHousehold Statistics:")
print(household_stats)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




Key Statistics:
Average Weekly Usage (kWh): 1028.83
Average Weekly Generation (kWh): 752.13
Average Weekly Expenditure ($): 40.76
Total Net Expenditure ($): 22420.42
Weeks as Seller: 137.00
Weeks as Buyer: 413.00



Household Statistics:
            EnergyUsed EnergyGeneratedFromRenewableSources TotalExpenditure  \
                  mean                                mean             mean   
HouseholdID                                                                   
H1              945.15                              571.46            52.27   
H2             1435.22                              754.89            95.10   
H3              716.46                              844.09           -12.89   
H4             1206.54                              653.16            77.78   
H5              840.77                              937.06            -8.44   

                      EnergySold EnergyBought  
                  sum        sum          sum  
HouseholdID                                    
H1            5749.22     338.92     41444.42  
H2           10461.39     134.74     74970.65  
H3           -1417.79   19712.36      5673.47  
H4            8555.55       0.00     60870.83  
H5      

In [None]:
def generate_household_plots(df, household_id):
    """
    Generate comprehensive plots for a specific household's energy data using Plotly
    
    Parameters:
    df (pandas.DataFrame): Complete household data
    household_id (str): Household ID (e.g., 'H1', 'H2', etc.)
    """
    # Filter data for specific household
    household_df = df[df['HouseholdID'] == household_id].copy()
    household_df['Delta'] = household_df['EnergyGeneratedFromRenewableSources'] - household_df['EnergyUsed']
    
    # 1. Energy Usage vs Generation Plot
    fig1 = go.Figure()
    fig1.add_trace(go.Scatter(
        x=household_df['Date'],
        y=household_df['EnergyUsed'],
        name='Energy Used',
        line=dict(color='red')
    ))
    fig1.add_trace(go.Scatter(
        x=household_df['Date'],
        y=household_df['EnergyGeneratedFromRenewableSources'],
        name='Energy Generated',
        line=dict(color='green')
    ))
    fig1.update_layout(
        title=f'Energy Usage vs Generation - Household {household_id}',
        xaxis_title='Date',
        yaxis_title='Energy (kWh)',
        hovermode='x unified'
    )
    fig1.show()

    # 2. Energy Surplus/Deficit Analysis
    fig2 = go.Figure()
    fig2.add_trace(go.Scatter(
        x=household_df['Date'],
        y=household_df['Delta'],
        line=dict(color='blue')
    ))
    fig2.update_layout(
        title=f'Energy Surplus/Deficit Over Time - Household {household_id}',
        xaxis_title='Date',
        yaxis_title='Energy Delta (kWh)',
        hovermode='x'
    )
    fig2.add_hline(y=0, line_dash="dash", line_color="red")
    fig2.show()

    # 3. Trading Activity
    fig3 = go.Figure()
    fig3.add_trace(go.Scatter(
        x=household_df['Date'],
        y=household_df['EnergyBought'],
        name='Energy Bought',
        line=dict(color='red')
    ))
    fig3.add_trace(go.Scatter(
        x=household_df['Date'],
        y=household_df['EnergySold'],
        name='Energy Sold',
        line=dict(color='green')
    ))
    fig3.update_layout(
        title=f'Trading Activity - Household {household_id}',
        xaxis_title='Date',
        yaxis_title='Energy (kWh)',
        hovermode='x unified'
    )
    fig3.show()

    # 4. Financial Analysis
    fig4 = go.Figure()
    fig4.add_trace(go.Scatter(
        x=household_df['Date'],
        y=household_df['TotalExpenditure'],
        line=dict(color='purple')
    ))
    fig4.update_layout(
        title=f'Weekly Energy Expenditure - Household {household_id}',
        xaxis_title='Date',
        yaxis_title='Expenditure ($)',
        hovermode='x'
    )
    fig4.add_hline(y=0, line_dash="dash", line_color="gray")
    fig4.show()

    # Print summary statistics
    stats = {
        'Average Weekly Usage (kWh)': household_df['EnergyUsed'].mean(),
        'Average Weekly Generation (kWh)': household_df['EnergyGeneratedFromRenewableSources'].mean(),
        'Average Weekly Expenditure ($)': household_df['TotalExpenditure'].mean(),
        'Total Net Expenditure ($)': household_df['TotalExpenditure'].sum(),
        'Weeks as Seller': (household_df['Delta'] > 0).sum(),
        'Weeks as Buyer': (household_df['Delta'] < 0).sum()
    }
    
    print(f"\nSummary Statistics for Household {household_id}")
    print("-" * 50)
    for key, value in stats.items():
        print(f"{key}: {value:.2f}")

# Load data
processor = DataProcessor()
df = processor.load_household_data('../data/sample_household_data.csv')

# Generate plots for household H1
generate_household_plots(df, 'H1')



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




Summary Statistics for Household H1
--------------------------------------------------
Average Weekly Usage (kWh): 945.15
Average Weekly Generation (kWh): 571.46
Average Weekly Expenditure ($): 52.27
Total Net Expenditure ($): 5749.22
Weeks as Seller: 4.00
Weeks as Buyer: 106.00


In [13]:
df[df['HouseholdID'] == 'H5'].to_csv('../data/sample_household_data_H5.csv', index=False)