In [None]:
#Creating a Syntecthic dataset in order to mimic the real Conditions  
#

In [15]:
import time 
import uuid
import random 
import math 
from datetime import datetime,timezone 
import threading 
import json
import os
import csv
from pathlib import Path 

In [26]:

"""these are all the default parameters that were set up by observing three data sets 
for demonstration of realism 
   1. GEOLIFE - MICROSOFT 
   2. Electricity consumption dataset  -- https://archive.ics.uci.edu/ml/datasets/individual+household+electric+power+consumption
   3. Industrial Emission - Government of India - Scrapped by beautiful soup
"""
transport_inteval = 1 
electricity_interval = 60 
industry_interval = 10 

num_user  = int(os.getenv("num_user",5))
num_houses = int(os.getenv("num_houses",3))
num_factories = int(os.getenv("num_factories",2))

"""Getting current date and time"""
def current_datetime(): 
    return datetime.now(timezone.utc).isoformat()

"""Json return type -- how the date will be saved
Entity defines what generates the carbon emission , then value and unit 
are stheir to store the corresponding value"""
def event(sector,entity_id,value,unit,details=None): 
    return  { 
        "id" : str(uuid.uuid4()), 
        "sector" : sector,
        "entity_id" : entity_id,
        "activity_value" : round(value,3), 
        "activity_unit" : unit, 
        "timestamp" : current_datetime(), 
        "metadata": {
            "source": "synthetic",
            "details": details or {}
        }

    }
def print_event(event) : 
    print(json.dumps(event))

"""Saving the data in the csv"""
Path("data/stream").mkdir(parents=True,exist_ok=True)
files =  {
    "transport": open("data/stream/transport_events.csv", "a", newline=""),
    "electricity": open("data/stream/electricity_events.csv", "a", newline=""),
    "industry": open("data/stream/industry_events.csv", "a", newline="")
}


write = { 
    sector:csv.writer(f) for sector,f in files.items()
}
def save_csv(event) : 
    w=write[event["sector"]]
    w.writerow([
        event["id"],
        event["sector"],
        event["entity_id"],
        event["activity_value"],
        event["activity_unit"],
        event["timestamp"],
        json.dumps(event["metadata"])
    ])
    files[event["sector"]].flush() 



In [27]:
def transport_generator():
    """
    Simulates per-second movement using speed distributions
    """
    users = [f"user_{i}" for i in range(1, num_user + 1)]

    while True:
        for user in users:
            speed_kmh = max(5, random.lognormvariate(3.0, 0.4))
            distance_km = speed_kmh / 3600  

            newevent = event(
                sector="transport",
                entity_id=user,
                value=distance_km,
                unit="km",
                details={
                    "speed_kmh": round(speed_kmh, 2)
                }
            )
            print_event(newevent)
            save_csv(newevent)

        time.sleep(transport_inteval)

def electricity_generator(): 
    houses = [f"house_{i}" for i in range(1, num_houses + 1)]
    while True:
        hour = datetime.now().hour
        for house in houses:
            base_load = random.uniform(0.001, 0.003)
            peak_multiplier = 2 if 18 <= hour <= 22 else 1
            kwh = base_load * peak_multiplier
            newevent =event(
                sector="electricity",
                entity_id=house,
                value=kwh,
                unit="kWh",
                details={
                    "hour": hour,
                    "peak": peak_multiplier > 1
                }
            )
            print_event(newevent)
            save_csv(newevent)

        time.sleep(electricity_interval)
def industry_generator(): 
    factories  = [f"factory_{i}" for i in range(1,num_factories+1)]
    while True : 
        for factory in factories : 
            hour  = datetime.now().hour 
        if 8 <= hour <= 20:
            energy_kwh = random.uniform(5, 15)
            active = True
        else:
            energy_kwh = random.uniform(1, 3)
            active = False
        newevent = event(
                sector="industry",
                entity_id=factory,
                value=energy_kwh,
                unit="kWh",
                details={
                    "active_shift": active
                }
            )
        print_event(newevent)
        save_csv(newevent)
        time.sleep(industry_interval)


In [None]:
if __name__  == "__main__" : 
    threads  = [ 
        threading.Thread(target=transport_generator,daemon=True),
        threading.Thread(target=electricity_generator,daemon=True),
        threading.Thread(target=industry_generator,daemon=True)
    ]
    for t in threads : 
        t.start()
    while True : 
        time.sleep(1)

{"id": "93e0f6e0-d44f-470c-afbd-c1291d95de91", "sector": "transport", "entity_id": "user_1", "activity_value": 0.006, "activity_unit": "km", "timestamp": "2026-01-12T09:34:40.189783+00:00", "metadata": {"source": "synthetic", "details": {"speed_kmh": 23.16}}}
{"id": "44467791-f19e-4508-98d2-59e9361237c4", "sector": "transport", "entity_id": "user_2", "activity_value": 0.005, "activity_unit": "km", "timestamp": "2026-01-12T09:34:40.190397+00:00", "metadata": {"source": "synthetic", "details": {"speed_kmh": 18.05}}}
{"id": "5c8df16b-867e-43e3-a63a-09be0f31cc2b", "sector": "transport", "entity_id": "user_3", "activity_value": 0.006, "activity_unit": "km", "timestamp": "2026-01-12T09:34:40.190504+00:00", "metadata": {"source": "synthetic", "details": {"speed_kmh": 20.06}}}
{"id": "ec408c11-e4a6-4ad1-a099-73b649367006", "sector": "transport", "entity_id": "user_4", "activity_value": 0.003, "activity_unit": "km", "timestamp": "2026-01-12T09:34:40.190611+00:00", "metadata": {"source": "synthe

KeyboardInterrupt: 

{"id": "d5c6ed42-bd39-4771-b4f3-df53e6985dea", "sector": "transport", "entity_id": "user_1", "activity_value": 0.004, "activity_unit": "km", "timestamp": "2026-01-12T09:35:55.285420+00:00", "metadata": {"source": "synthetic", "details": {"speed_kmh": 13.02}}}
{"id": "d0481e15-c010-4bc6-aed0-2fcbd4f920d7", "sector": "transport", "entity_id": "user_2", "activity_value": 0.007, "activity_unit": "km", "timestamp": "2026-01-12T09:35:55.285837+00:00", "metadata": {"source": "synthetic", "details": {"speed_kmh": 25.4}}}
{"id": "01e1ef11-4c8a-463f-8739-6e3c057ff14d", "sector": "transport", "entity_id": "user_3", "activity_value": 0.007, "activity_unit": "km", "timestamp": "2026-01-12T09:35:55.285967+00:00", "metadata": {"source": "synthetic", "details": {"speed_kmh": 24.22}}}
{"id": "b78d25ad-0625-483a-9357-7ab29fd6be32", "sector": "transport", "entity_id": "user_4", "activity_value": 0.006, "activity_unit": "km", "timestamp": "2026-01-12T09:35:55.286067+00:00", "metadata": {"source": "synthet