## Simulated Data Generating Process (DGP)

Sensor n° 1, 2, and 3

For $t<T_1$:

* y_1 ~ N(mu_1, sigma_1)
* y_2 ~ N(mu_2, sigma_2)
* y_3 ~ N(mu_3, sigma_3)

For $t>=T_1$:

* y_1 ~ N(mu_11, sigma_1) -> Mean Drift!
* y_2 ~ N(mu_2, sigma_2)
* y_3 ~ N(mu_3, sigma_3)

For $t>=T_2$:

* y_1 ~ N(mu_11, sigma_1) -> Mean Drift (started at $T_1$)
* y_2 ~ N(mu_2, sigma_2)
* y_3 ~ N(mu_33, sigma_3) -> Mean Drift!


In [1]:
from kafka import KafkaProducer
import time
import datetime
import json
import numpy as np
import random
from utils.kafka_sender import set_sensor_delay
from utils.kafka_sender import plot_sens_obs, plot_drift_stream
from schemas.schema_sensor import SensorParams, DriftParams

In [2]:
drift_events_sens_1 = [{"drift-event": DriftParams(mu_drift=5,
                                                   t0_drift=10,
                                                   delta_drift=5)},
                        {"drift-event": DriftParams(mu_drift =30,
                                                    t0_drift = 20, 
                                                    delta_drift=10)}]

drift_events_sens_3 = [{"drift-event": DriftParams(mu_drift =15,
                                                   t0_drift = 0,
                                                   delta_drift=5)},
                        {"drift-event": DriftParams(mu_drift =25,
                                                    t0_drift = 25,
                                                    delta_drift=10)}]

In [3]:
# Configurations
SEED=42
i=0
streaming_duration=35

params_sens1 = SensorParams(sensor_name="sensor-1",
                            mu=2,
                            sigma=.3,
                            eps=0.1,
                            delay_mode="fix",
                            delay_value=1,
                            max_delay=5,
                            drifts=drift_events_sens_1
                            )

params_sens2 = SensorParams(sensor_name="sensor-2",
                            mu=3.5,
                            sigma=.3,
                            eps=0.1,
                            delay_mode="fix",
                            delay_value=1,
                            max_delay=5
                            )

params_sens3 = SensorParams(sensor_name="sensor-3",
                            mu=1,
                            sigma=.3,
                            eps=0.1,
                            delay_mode="fix",
                            delay_value=1,
                            max_delay=5,
                            drifts=drift_events_sens_3
                            )

In [None]:
def send_sensor_data(producer: KafkaProducer, topic: str, params: SensorParams, time_step: int) -> tuple:
    """Send sensor data to Kafka topic with specified parameters.
    Args:
        producer (KafkaProducer): Kafka producer instance.
        topic (str): Kafka topic to send data to.
        params (SensorParams): Parameters for the sensor data generation.
        time_step (int): Time step
    Returns:
        tuple: Generated data value and the time taken to send the data.
    """
    start_time=time.time()
    noise= params.eps * np.random.normal(0, 1)
    drift = False
    data_value = np.random.normal(params.mu, params.sigma) + noise

    if params.drifts:
        for i, drift_event in enumerate(params.drifts):
            t0 = drift_event['drift-event'].t0_drift
            delta = drift_event['drift-event'].delta_drift
            if t0 <= time_step <= t0 + delta:
                mu_drift = drift_event['drift-event'].mu_drift
                data_value = np.random.normal(mu_drift, params.sigma) + noise
                drift = True
                break
        
    data_sent={'sensor': params.sensor_name,
               'info': {'timestamp': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                        'obs': data_value,
                        'drift': drift}}
    # simulate delay
    set_sensor_delay(params)
    
    try:
        future= producer.send(topic, value=data_sent)
        record = future.get(timeout=10)
        print(f"Message delivered: topic-{record.topic}:partition-{record.partition}")
    except Exception as e:
        print(f"Error sending data to Kafka: {e}")
        
    end_time=time.time()-start_time
    
    return data_value, drift, end_time

In [5]:
producer = KafkaProducer(bootstrap_servers='localhost:9092', 
                         value_serializer=lambda v: json.dumps(v).encode('utf-8'))

In [None]:
np.random.seed(SEED)

list_sensobs_1=[]
list_drifts_1=[]
list_delay_1=[]

list_sensobs_2 = []
list_drifts_2=[]
list_delay_2=[]

list_sensobs_3=[]
list_drifts_3=[]
list_delay_3=[]

#for i in range(streaming_duration):

while True:

    print("--"*20)
    print(f"Time step {i}")
    print(" ")
    
    # Sensor-1
    data_1, \
        drift_1,\
            delay_1 = send_sensor_data(producer=producer,
                                           topic="sensors",
                                           params=params_sens1,
                                           time_step=i)
    # Sensor-2
    data_2, \
        drift_2,\
            delay_2 = send_sensor_data(producer=producer,
                                           topic="sensors",
                                           params=params_sens2,
                                           time_step=i)
    # Sensor-3
    data_3, \
        drift_3,\
            delay_3 = send_sensor_data(producer=producer,
                                           topic="sensors",
                                           params=params_sens3,
                                           time_step=i)

    list_sensobs_1.append(data_1)
    list_drifts_1.append(drift_1)
    list_delay_1.append(delay_1)

    list_sensobs_2.append(data_2)
    list_drifts_2.append(drift_2)
    list_delay_2.append(delay_2)

    list_sensobs_3.append(data_3)
    list_drifts_3.append(drift_3)
    list_delay_3.append(delay_3)

    print(" ")
    print(f"[sensor-1] Obs: {round(data_1,3)}, Drift: {drift_1}, Delay: {round(delay_1,3)}")
    print(f"[sensor-2] Obs: {round(data_2,3)}, Drift: {drift_2}, Delay: {round(delay_2,3)}")
    print(f"[sensor-3] Obs: {round(data_3,3)}, Drift: {drift_3}, Delay: {round(delay_3,3)}")
    print(" ")
    print(f"Total Delay: {delay_1 + delay_2 + delay_3}")

    i+=1  # increase count

----------------------------------------
Time step 0
 
Message delivered to sensors:0
Message delivered to sensors:0
Message delivered to sensors:0
 
[sensor-1] Obs: 2.008, Drift: False, Delay: 1.007
[sensor-2] Obs: 4.022, Drift: False, Delay: 1.002
[sensor-3] Obs: 15.45, Drift: True, Delay: 1.004
 
Total Delay: 3.013005018234253
----------------------------------------
Time step 1
 
Message delivered to sensors:0
Message delivered to sensors:0
Message delivered to sensors:0
 
[sensor-1] Obs: 1.936, Drift: False, Delay: 1.003
[sensor-2] Obs: 3.415, Drift: False, Delay: 1.005
[sensor-3] Obs: 14.379, Drift: True, Delay: 1.003
 
Total Delay: 3.010974407196045
----------------------------------------
Time step 2
 
Message delivered to sensors:0
Message delivered to sensors:0
Message delivered to sensors:0
 
[sensor-1] Obs: 1.659, Drift: False, Delay: 1.004
[sensor-2] Obs: 3.493, Drift: False, Delay: 1.005
[sensor-3] Obs: 15.349, Drift: True, Delay: 1.006
 
Total Delay: 3.015104055404663
--

In [None]:
obs_data_stream = np.array([list_sensobs_1, list_sensobs_2, list_sensobs_3])

# plot sensor observations
y_upper_limit= 35
y_lower_limit= -35
plot_sens_obs(obs_data_stream, ylim_low=y_lower_limit, ylim_up=y_upper_limit)

In [None]:
drift_data_stream = np.array([list_drifts_1, list_drifts_2, list_drifts_3])

# plot drifts in data streams
plot_drift_stream(drift_data_stream)

In [None]:
import matplotlib.pyplot as plt

plt.hist(list_delay_1, alpha=0.5, label="sensor-1", color="red", bins=50)
plt.hist(list_delay_2, alpha=0.5, label="sensor-2", color="blue", bins=50)
plt.hist(list_delay_3, alpha=0.5, label="sensor-3", color="green", bins=50)
plt.title("Histogram of Delays")
plt.legend()
plt.show()