In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta



In [2]:
# Configuration
houses = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
monthly_avg_consumption = {
    'A1': 8.0, 'B1': 8.0,    # 8 m³/month
    'A2': 12.0, 'A3': 12.0, 'B2': 12.0, 'B3': 12.0,  # 12 m³/month
    'A4': 15.0, 'B4': 15.0   # 15 m³/month
}


In [3]:
unit_price = 200
start_date = datetime(2025, 1, 1)
end_date = datetime(2025, 12, 31)

def generate_house_data(house):
    data = []
    current_date = start_date
    prev_reading = 0.0
    
    while current_date <= end_date:
        if current_date.day == 15:  # Monthly reading on the 15th
            # Base consumption with seasonal variation (higher in summer)
            season_factor = 1.2 if 5 <= current_date.month <= 9 else 0.9  # +20% in summer
            consumption = monthly_avg_consumption[house] * season_factor * np.random.uniform(0.9, 1.1)
            
            # 10% chance of a leak (2-3x normal usage)
            if np.random.random() < 0.1:
                consumption *= np.random.uniform(2.0, 3.0)
                leak_flag = "(LEAK)"
            else:
                leak_flag = ""
            
            current_reading = prev_reading + consumption
            duration = 30  # Days since last reading
            
            data.append([
                f"{house}_{current_date.strftime('%m')}{leak_flag}",
                current_date.strftime('%d-%m-%Y'),
                round(prev_reading, 3),
                round(current_reading, 3),
                round(consumption, 3),
                duration,
                unit_price,
                round(consumption * unit_price, 2),
                house
            ])
            prev_reading = current_reading
            
        current_date += timedelta(days=1)
    
    return pd.DataFrame(data, columns=[
        'invoice no', 'current date', 'previous reading', 
        'current reading', 'consumption', 'duration', 
        'unit price', 'amount due', 'house_no'
    ]
    )

In [4]:

# Generate all tables
all_tables = {house: generate_house_data(house) for house in houses}

# Save to Excel
with pd.ExcelWriter('water_meter_data_monthly_avg.xlsx') as writer:
    for house, df in all_tables.items():
        df.to_excel(writer, sheet_name=house, index=False)

print("Monthly average data generated successfully!")

Monthly average data generated successfully!
