In [1]:
import pandas as pd

pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)

# Case Study - Generate Sensor Data

In this case study, we try and fabricator some sensor data that we might encounter
in an operations study.

## Pattern 1 - Tall Format

![sensor_model1](../images/sensor_model1.drawio.svg)

Note that all `_underscored` columns are not included as they are temporary columns
used to generate the final output. One can omit these columns programmatically in the
end.

In [2]:
from data_fabricator.v0.core.fabricator import MockDataGenerator
import yaml

config_str = """

sensor_table:
  num_rows: 4
  columns:
    sensor_id:
      type: generate_unique_id
      seed: 1 # defaults to None
    sensor_name:
      type: row_apply
      list_of_values: sensor_table.sensor_id
      row_func: "lambda x: 'sensor_' + str(x)"
    _random_shift:
      type: generate_random_numbers
      integer: True
      start_range: 1
      end_range: 100

sensor_readings:
  columns:
    sensor_name:
      type: explode
      list_of_values:
        - sensor_table.sensor_name
        - sensor_table._random_shift
      explode_func: generate_unique_id
      explode_func_kwargs:
        num_rows: 100
      position: 0
    _random_shift:
      type: explode
      list_of_values:
        - sensor_table.sensor_name
        - sensor_table._random_shift
      explode_func: generate_unique_id
      explode_func_kwargs:
        num_rows: 100
      position: 1
    time_index:
      type: explode
      list_of_values:
        - sensor_table.sensor_name
        - sensor_table._random_shift
      explode_func: generate_unique_id
      explode_func_kwargs:
        num_rows: 100
      position: 2
    _sensor_reading_smooth:
      type: row_apply
      list_of_values: [sensor_readings.time_index, sensor_readings._random_shift]
      row_func: "lambda x,y: y * math.sin((int(x)+int(y))/(2*3.412))"
    _random_error:
      type: row_apply
      list_of_values: sensor_readings._random_shift
      row_func: "lambda x: random.random()*x*0.2"
    sensor_reading:
      type: row_apply
      list_of_values: [sensor_readings._sensor_reading_smooth, sensor_readings._random_error]
      row_func: "lambda x, y: x + y"
"""
example_config = yaml.safe_load(config_str)
print(example_config)

# Setting seed is not recommended for general use, please consider when to use seed
mock_generator = MockDataGenerator(instructions=example_config, seed=1)
mock_generator.generate_all()

print(mock_generator.all_dataframes["sensor_table"].head(10))
print(mock_generator.all_dataframes["sensor_readings"].head(10))

{'sensor_table': {'num_rows': 4, 'columns': {'sensor_id': {'type': 'generate_unique_id', 'seed': 1}, 'sensor_name': {'type': 'row_apply', 'list_of_values': 'sensor_table.sensor_id', 'row_func': "lambda x: 'sensor_' + str(x)"}, '_random_shift': {'type': 'generate_random_numbers', 'integer': True, 'start_range': 1, 'end_range': 100}}}, 'sensor_readings': {'columns': {'sensor_name': {'type': 'explode', 'list_of_values': ['sensor_table.sensor_name', 'sensor_table._random_shift'], 'explode_func': 'generate_unique_id', 'explode_func_kwargs': {'num_rows': 100}, 'position': 0}, '_random_shift': {'type': 'explode', 'list_of_values': ['sensor_table.sensor_name', 'sensor_table._random_shift'], 'explode_func': 'generate_unique_id', 'explode_func_kwargs': {'num_rows': 100}, 'position': 1}, 'time_index': {'type': 'explode', 'list_of_values': ['sensor_table.sensor_name', 'sensor_table._random_shift'], 'explode_func': 'generate_unique_id', 'explode_func_kwargs': {'num_rows': 100}, 'position': 2}, '_se

  from data_fabricator.v0.core.fabricator import MockDataGenerator


If we plot this we get:

In [3]:
import plotly.express as px

df = mock_generator.all_dataframes["sensor_readings"]
fig = px.line(data_frame=df, x="time_index", y="sensor_reading", color="sensor_name")
fig.write_image("data_fabricator/docs/images/sensor_sample.png")

![sensor_sample](../images/sensor_sample.png)

## Pattern 2 - Wide Format

Wide format is easier if you need to control for the relationships between sensor
readings during fabrication, particularly using the `row_apply` syntax.

![sensor_model2](../images/sensor_model2.drawio.svg)

In [4]:
from data_fabricator.v0.core.fabricator import MockDataGenerator
import yaml

config_str = """
sensor_readings:
  num_rows: 100
  columns:
    time_index:
      type: generate_unique_id
    sensor_1:
      type: row_apply
      list_of_values: sensor_readings.time_index
      row_func: "lambda x: math.sin(int(x)/(2*3.412))"
    sensor_2:
      type: row_apply
      list_of_values: sensor_readings.time_index
      row_func: "lambda x: math.cos(int(x)/(2*3.412))"
    sensor_3:
      type: row_apply
      list_of_values: ["sensor_readings.sensor_1", "sensor_readings.sensor_2"]
      row_func: "lambda x,y: x*y"

"""
example_config = yaml.safe_load(config_str)
print(example_config)

# Setting seed is not recommended for general use, please consider when to use seed
mock_generator = MockDataGenerator(instructions=example_config, seed=1)
mock_generator.generate_all()

print(mock_generator.all_dataframes["sensor_readings"].head(10))

{'sensor_readings': {'num_rows': 100, 'columns': {'time_index': {'type': 'generate_unique_id'}, 'sensor_1': {'type': 'row_apply', 'list_of_values': 'sensor_readings.time_index', 'row_func': 'lambda x: math.sin(int(x)/(2*3.412))'}, 'sensor_2': {'type': 'row_apply', 'list_of_values': 'sensor_readings.time_index', 'row_func': 'lambda x: math.cos(int(x)/(2*3.412))'}, 'sensor_3': {'type': 'row_apply', 'list_of_values': ['sensor_readings.sensor_1', 'sensor_readings.sensor_2'], 'row_func': 'lambda x,y: x*y'}}}}
  time_index  sensor_1  sensor_2  sensor_3
0          1  0.146018  0.989282  0.144453
1          2  0.288905  0.957358  0.276586
2          3    0.4256  0.904911   0.38513
3          4  0.553172  0.833067  0.460829
4          5  0.668885  0.743366  0.497226
5          6  0.770261  0.637729  0.491218
6          7  0.855125  0.518422  0.443316
7          8  0.921658  0.388003  0.357606
8          9  0.968435  0.249266  0.241398
9         10  0.994453  0.105185  0.104602


If we were to plot this:

In [5]:
import plotly.graph_objects as go

df = mock_generator.all_dataframes["sensor_readings"]
# Create traces
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=df["time_index"], y=df["sensor_1"], mode="lines", name="sensor_1")
)
fig.add_trace(
    go.Scatter(x=df["time_index"], y=df["sensor_2"], mode="lines", name="sensor_2")
)
fig.add_trace(
    go.Scatter(x=df["time_index"], y=df["sensor_3"], mode="lines", name="sensor_3")
)
fig.write_image("data_fabricator/docs/images/sensor_sample2.png")

![sensor_sample2](../images/sensor_sample2.png)