In [None]:
import numpy as np
import pandas as pd

# Function to simulate data based on income level ranges
def simulate_data(num_samples, income_ranges, feature_ranges):
    data = []
    for _ in range(num_samples):
        # Randomly select an income range
        income_level = np.random.choice(list(income_ranges.keys()))
        income_low, income_high = income_ranges[income_level]
        income = np.random.randint(income_low, income_high)
        
        # Generate data based on the selected income range
        record = {
            'income_level': income,
            'user_id': np.random.randint(24001,24100),
            'demography': np.random.choice(['Urban', 'Suburban', 'Rural']),
            'strata': np.random.choice(['High', 'Middle', 'Low']),
            'num_occupants': np.random.randint(feature_ranges[income_level]['num_occupants']),
            'num_rooms': np.random.randint(feature_ranges[income_level]['num_rooms']),
            'num_fans': np.random.randint(feature_ranges[income_level]['num_fans']),
            'num_ACs': np.random.randint(feature_ranges[income_level]['num_ACs']),
            'num_geysers': np.random.randint(feature_ranges[income_level]['num_geysers']),
            'num_refrigerators': np.random.randint(feature_ranges[income_level]['num_refrigerators']),
            'num_lights': np.random.randint(feature_ranges[income_level]['num_lights']),
            'num_microwaves': np.random.randint(feature_ranges[income_level]['num_microwaves']),
            'num_toasters': np.random.randint(feature_ranges[income_level]['num_toasters']),
            'num_inductions': np.random.randint(feature_ranges[income_level]['num_inductions']),
            'num_washing_machines': np.random.randint(feature_ranges[income_level]['num_washing_machines']),
            'other_appliances': np.random.randint(feature_ranges[income_level]['other_appliances']),
            'power_usage_fans': np.random.randint(20, 100),
            'power_usage_ACs': np.random.randint(500, 1500),
            'power_usage_geysers': np.random.randint(1000, 3000),
            'power_usage_refrigerators': np.random.randint(100, 400),
            'power_usage_lights': np.random.randint(5, 100),
            'power_usage_microwaves': np.random.randint(800, 1500),
            'power_usage_toasters': np.random.randint(600, 1200),
            'power_usage_inductions': np.random.randint(1000, 2000),
            'power_usage_washing_machines': np.random.randint(300, 800),
            'power_usage_other_appliances': np.random.randint(50, 500)
        }
        
        # Calculate total power consumption and current drawn
        record['total_power_consumption'] = (
            record['num_fans'] * record['power_usage_fans'] +
            record['num_ACs'] * record['power_usage_ACs'] +
            record['num_geysers'] * record['power_usage_geysers'] +
            record['num_refrigerators'] * record['power_usage_refrigerators'] +
            record['num_lights'] * record['power_usage_lights'] +
            record['num_microwaves'] * record['power_usage_microwaves'] +
            record['num_toasters'] * record['power_usage_toasters'] +
            record['num_inductions'] * record['power_usage_inductions'] +
            record['num_washing_machines'] * record['power_usage_washing_machines'] +
            record['other_appliances'] * record['power_usage_other_appliances']
        )
        
        # Assuming a constant voltage of 230V to calculate the current drawn
        record['total_current_drawn'] = record['total_power_consumption'] / 230
        
        data.append(record)
    
    return pd.DataFrame(data)

# Define income ranges
income_ranges = {
    'Low': (20000, 50000),
    'Middle': (50000, 100000),
    'High': (100000, 200000)
}

# Define ranges for other features based on income level
feature_ranges = {
    'Low': {
        'num_occupants': (1, 5),
        'num_rooms': (1, 3),
        'num_fans': (1, 3),
        'num_ACs': (0, 1),
        'num_geysers': (0, 1),
        'num_refrigerators': (0, 1),
        'num_lights': (5, 10),
        'num_microwaves': (0, 1),
        'num_toasters': (0, 1),
        'num_inductions': (0, 1),
        'num_washing_machines': (0, 1),
        'other_appliances': (0, 2)
    },
    'Middle': {
        'num_occupants': (2, 7),
        'num_rooms': (2, 5),
        'num_fans': (2, 5),
        'num_ACs': (1, 2),
        'num_geysers': (1, 2),
        'num_refrigerators': (1, 2),
        'num_lights': (10, 15),
        'num_microwaves': (1, 2),
        'num_toasters': (1, 2),
        'num_inductions': (1, 2),
        'num_washing_machines': (1, 2),
        'other_appliances': (1, 3)
    },
    'High': {
        'num_occupants': (3, 10),
        'num_rooms': (3, 6),
        'num_fans': (3, 10),
        'num_ACs': (2, 4),
        'num_geysers': (1, 3),
        'num_refrigerators': (1, 2),
        'num_lights': (15, 20),
        'num_microwaves': (1, 2),
        'num_toasters': (1, 2),
        'num_inductions': (1, 2),
        'num_washing_machines': (1, 2),
        'other_appliances': (2, 5)
    }
}

# Number of samples to generate
num_samples = 1000

# Generate the synthetic dataset
synthetic_data = simulate_data(num_samples, income_ranges, feature_ranges)

# Display the first few rows of the synthetic dataset
print(synthetic_data.head())

# Save the synthetic dataset to a CSV file
synthetic_data.to_csv('synthetic_data.csv', index=False)
