## Simulated Data Generating Process (DGP)

Sensor n° 1, 2, and 3

For $t<T_1$:

* y_1 ~ N(mu_1, sigma_1)
* y_2 ~ N(mu_2, sigma_2)
* y_3 ~ N(mu_3, sigma_3)

For $t>=T_1$:

* y_1 ~ N(mu_11, sigma_1) -> Mean Drift!
* y_2 ~ N(mu_2, sigma_2)
* y_3 ~ N(mu_3, sigma_3)

For $t>=T_2$:

* y_1 ~ N(mu_11, sigma_1) -> Mean Drift (started at $T_1$)
* y_2 ~ N(mu_2, sigma_2)
* y_3 ~ N(mu_33, sigma_3) -> Mean Drift!


In [None]:
from kafka import KafkaProducer
import time
import json
import numpy as np
import random

In [None]:
from dataclasses import dataclass

@dataclass
class SensorParams:
    sensor_name: str
    mu: float
    sigma: float
    eps: float
    delay_mode: str  # "fix" or "random"
    delay_value: float
    max_delay: float = None  # Optional for "random" delay mode

In [None]:
# Configurations
SEED=42
i=0
params_sens1 = SensorParams(sensor_name="sensor-1",
                            mu=2,
                            sigma=5,
                            eps=0.2,
                            delay_mode="random",
                            delay_value=1,
                            max_delay=5)

In [None]:
def send_sensor_data(producer, topic, params=SensorParams):
    start_time=time.time()
    noise=params.eps * np.random.normal(0, 1)
    data_value=np.random.normal(params.mu, params.sigma, 1)[0] + noise
    data_sent={'sensor': params.sensor_name, 'value': data_value}
    producer.send(topic, value=data_sent)
    if params.delay_mode == "fix":
        time.sleep(params.delay_value)
    elif params.delay_mode == "random":
        if params.max_delay is None:
            raise ValueError("max_delay should be specified in delay_mode 'random'")
        time.sleep(random.uniform(params.delay_value, params.max_delay))
    else:
        raise ValueError("delay_mode should be 'fix' or 'random'")
    end_time=time.time() - start_time
    return data_value, end_time

In [None]:
producer = KafkaProducer(bootstrap_servers='localhost:9092', 
                         value_serializer=lambda v: json.dumps(v).encode('utf-8'))
producer

In [None]:
np.random.seed(SEED)

list_sensobs_1 = []
list_delay_1 = []

for i in range(10):

    print("--"*20)
    print(f"Time step {i}")
    print(" ")
    
    # Sensor 1
    data1, time_end_sens_1 = send_sensor_data(producer=producer, 
                                              topic="sensors", 
                                              params=params_sens1)

    list_sensobs_1.append(data1)
    list_delay_1.append(time_end_sens_1)

    print(f"[sensor-1] Sent: {data1}")
    print(" ")
    print(f"[sensor-1] Delay: {time_end_sens_1}")
    print(f"Total Delay: {time_end_sens_1}")


In [None]:
import matplotlib.pyplot as plt

plt.hist(list_delay_1, alpha=0.5, label="sensor-1", color="red", bins=50)
plt.title("Histogram of Delays")
plt.legend()
plt.show()