In [2]:
import csv
import random
from datetime import datetime, timedelta

# Configurable parameters
output_file = 'sensor_data.csv'
num_rows = 500
start_time = datetime.now()
interval_seconds = 5

# Enable simulated data quality issues
inject_anomalies = True
inject_missing = True

# Sensor simulation
def simulate_sensor_data():
    with open(output_file, mode='w', newline='') as csvfile:
        fieldnames = ['timestamp', 'sensor_id', 'temperature', 'humidity', 'pressure']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for i in range(num_rows):
            timestamp = (start_time + timedelta(seconds=i * interval_seconds)).isoformat() + 'Z'
            sensor_id = f"sensor_{random.randint(1, 5)}"
            
            # Base values
            temp = round(random.uniform(20.0, 30.0), 2)
            hum = round(random.uniform(40.0, 60.0), 2)
            pres = round(random.uniform(990.0, 1025.0), 2)

            # Inject anomaly (e.g., sudden spike)
            if inject_anomalies and random.random() < 0.05:
                temp += random.uniform(10, 20)  # spike
                hum -= random.uniform(10, 15)

            # Inject missing value
            if inject_missing and random.random() < 0.05:
                if random.choice(['temp', 'hum', 'pres']) == 'temp':
                    temp = ''
                elif random.choice(['temp', 'hum', 'pres']) == 'hum':
                    hum = ''
                else:
                    pres = ''

            writer.writerow({
                'timestamp': timestamp,
                'sensor_id': sensor_id,
                'temperature': temp,
                'humidity': hum,
                'pressure': pres
            })

    print(f"✅ Generated {num_rows} rows of sensor data in {output_file}")

if __name__ == '__main__':
    simulate_sensor_data()


✅ Generated 500 rows of sensor data in sensor_data.csv
