In [7]:
import pandas as pd
import random

# Load the owner dataset (assuming it's in CSV format)
owner_data = pd.read_csv('Owner_sheet_0.1.csv')

# Appliance Data (Name, Min Wattage, Max Wattage)
appliance_data = [
    ("Light", 5, 100),
    ("Fan", 30, 100),
    ("Mobile", 5, 20),
    ("TV", 50, 300),
    ("AC", 1000, 2000),
    ("Washing Machine", 500, 1500),
    ("Induction", 1000, 2000),
    ("Fridge", 150, 300),
    ("Water Purifier", 50, 150),
    ("Water Pump", 400, 1000),
    ("Geyser", 1500, 2500),
    ("Chimney", 100, 300),
    ("Inverter", 200, 1500),
    ("Desktop", 100, 300),
    ("Laptop", 50, 150)
]

# List of appliances the poor class should not have
restricted_appliances_poor = [
    "AC", "Washing Machine", "Induction", "Water Purifier",
    "Water Pump", "Geyser", "Chimney", "Inverter",
    "Desktop", "Laptop"
]

# Generate Appliance Data for each user
def generate_appliance_data(user_strata):
    appliance_info = []
    mandatory_appliances = ["Light", "Fan", "Mobile", "TV"]

    # Add mandatory appliances
    for appliance in mandatory_appliances:
        min_watt, max_watt = next((min_w, max_w) for app, min_w, max_w in appliance_data if app == appliance)
        num_appliances = random.randint(1, 3)
        wattage = random.randint(min_watt, max_watt)
        voltage = random.randint(210, 240)  # Random voltage
        time_on = random.randint(1, 24)  # Random hours the appliance was on
        units_spent = (wattage * time_on) / 1000  # kWh
        amount_spent = units_spent * 5  # Assuming Rs. 5 per unit

        appliance_info.append({
            'Appliance Name': appliance,
            'No of Appliances': num_appliances,
            'Wattage': wattage,
            'Voltage': voltage,
            'Ampere': round(wattage / voltage, 2),
            'Time On (hours)': time_on,
            'Electricity Units Spent (kWh)': units_spent,
            'Amount Spent (Rs.)': amount_spent,
            'Anomalous': 'Yes' if voltage < 220 or voltage > 230 else 'No'
        })

    # Add additional appliances based on the user's strata
    for appliance, min_watt, max_watt in appliance_data:
        if appliance not in mandatory_appliances:
            # Skip restricted appliances for the poor class
            if user_strata == 'poor' and appliance in restricted_appliances_poor:
                continue

            # Determine the number of appliances based on strata
            if user_strata == 'affluent':
                num_appliances = random.randint(1, 3)
            elif user_strata == 'upper class':
                num_appliances = random.randint(1, 2)
            elif user_strata == 'middle class':
                num_appliances = random.randint(0, 1)
            else:  # lower class and poor
                num_appliances = random.randint(0, 1)

            if num_appliances > 0:
                wattage = random.randint(min_watt, max_watt)
                voltage = random.randint(210, 240)  # Random voltage
                time_on = random.randint(1, 24)  # Random hours the appliance was on
                units_spent = (wattage * time_on) / 1000  # kWh
                amount_spent = units_spent * 5  # Assuming Rs. 5 per unit

                appliance_info.append({
                    'Appliance Name': appliance,
                    'No of Appliances': num_appliances,
                    'Wattage': wattage,
                    'Voltage': voltage,
                    'Ampere': round(wattage / voltage, 2),
                    'Time On (hours)': time_on,
                    'Electricity Units Spent (kWh)': units_spent,
                    'Amount Spent (Rs.)': amount_spent,
                    'Anomalous': 'Yes' if voltage < 220 or voltage > 230 else 'No'
                })

    return pd.DataFrame(appliance_info)

# Iterate through each user in the owner dataset
for index, row in owner_data.iterrows():
    owner_id = row['Owner ID']
    user_strata = row['Strata']
    
    # Generate appliance data for the current user
    appliance_df = generate_appliance_data(user_strata)
    
    # Save the generated data into CSV files
    # All Data (Anomalous + Non-Anomalous)
    all_data_df = appliance_df.copy()
    all_data_df.to_csv(f'user_{owner_id}_all_data.csv', index=False)
    
    # Non-Anomalous Data
    non_anomalous_df = appliance_df[appliance_df['Anomalous'] == 'No'].copy()
    non_anomalous_df.to_csv(f'user_{owner_id}_non_anomalous_data.csv', index=False)
    
    # Anomalous Data
    anomalous_df = appliance_df[appliance_df['Anomalous'] == 'Yes'].copy()
    anomalous_df.to_csv(f'user_{owner_id}_anomalous_data.csv', index=False)

print("Datasets generated successfully!")


Datasets generated successfully!
