## Create dataset

* Number of batteries: 10

* One month, one hour granularity

* Two sensors per battery: power meter and temperature sensor

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import json
import requests

# Set random seed for consistency
np.random.seed(123)

# Constants
n_batteries = 10
days_in_january = 31
hours_per_day = 24
total_hours = days_in_january * hours_per_day  # 744 hours
start_time = datetime(2025, 3, 1, 0, 0, 0)  # March 1, 2025, 00:00:00

# Simulate daily usage (1–3 hours/day) and covariates
daily_usage = np.random.uniform(1, 3, size=(n_batteries, days_in_january))
hourly_usage = daily_usage.cumsum(axis=1)  # Cumulative usage per day
temperature = np.random.uniform(20, 55, n_batteries)  # 20–55°C
load = np.random.uniform(1, 32, n_batteries)  # 1–32 kW
manufacturers = np.random.choice(['A', 'B', 'C'], n_batteries)

In [None]:
# API endpoints
BASE_URL_REMOTE = "http://13.60.16.153/api/v1/"
HEADERS = {"Content-Type": "application/json"}

In [None]:
requests.get(f"{BASE_URL_REMOTE}buildings").json()

{'buildings': [{'building_id': 'b001',
   'name': 'HQ',
   'description': 'Headquarters',
   'location': '1234 Main St.'}]}

In [None]:
requests.get(f"{BASE_URL_REMOTE}spaces").json()

{'spaces': [{'building_id': 'b001',
   'name': 'Room 0',
   'type': 'Operation',
   'floor': 1,
   'space_id': 'sp1'},
  {'building_id': 'b001',
   'name': 'Room 1',
   'type': 'Operation',
   'floor': 1,
   'space_id': 'sp2'},
  {'building_id': 'b001',
   'name': 'Room 2',
   'type': 'Operation',
   'floor': 1,
   'space_id': 'sp3'},
  {'building_id': 'b001',
   'name': 'Room 3',
   'type': 'Operation',
   'floor': 1,
   'space_id': 'sp4'},
  {'building_id': 'b001',
   'name': 'Engine lab',
   'floor': 2,
   'type': 'Lab',
   'space_id': 'lab002'}]}

In [None]:
# 2. Space Data
space = {
    "space_id": "lab001",
    "building_id": "b001",
    "name": "Battery Test Lab",
    "type": "laboratory",
    "floor": 2
}
space

{'space_id': 'lab001',
 'building_id': 'b001',
 'name': 'Battery Test Lab',
 'type': 'laboratory',
 'floor': 2}

In [None]:
response = requests.post(f"{BASE_URL_REMOTE}spaces", headers=HEADERS, data=json.dumps(space))
print(f"Space Post Status: {response.status_code} - {response.text}")

Space Post Status: 201 - {"space":{"building_id":"b001","name":"Battery Test Lab","floor":2,"type":"laboratory","space_id":"lab001"}}


In [None]:
response = requests.get(f"{BASE_URL_REMOTE}spaces")
print(f"Space Get Status: {response.status_code} - {response.text}")

Space Get Status: 200 - {"spaces":[{"building_id":"b001","name":"Room 0","type":"Operation","floor":1,"space_id":"sp1"},{"building_id":"b001","name":"Room 1","type":"Operation","floor":1,"space_id":"sp2"},{"building_id":"b001","name":"Room 2","type":"Operation","floor":1,"space_id":"sp3"},{"building_id":"b001","name":"Room 3","type":"Operation","floor":1,"space_id":"sp4"},{"building_id":"b001","name":"Engine lab","type":"Lab","floor":2,"space_id":"lab002"},{"building_id":"b001","name":"Battery Test Lab","type":"laboratory","floor":2,"space_id":"lab001"}]}


In [None]:
# 3. Equipment Data (one per battery)
equipment_list = [
    {
        "equipment_id": f"batt_{i+1}",
        "space_id": "lab001",
        "name": f"Battery {i+1}",
        "type": "Battery",
        "manufacturer": manufacturers[i],
        "model": "SimBatt-2025"
    } for i in range(n_batteries)
]

In [None]:
for equip in equipment_list:
    response = requests.post(f"{BASE_URL_REMOTE}equipment", headers=HEADERS, data=json.dumps(equip))
    print(f"Equipment {equip['equipment_id']} Post Status: {response.status_code} - {response.text}")

Equipment batt_1 Post Status: 201 - {"equipment":{"name":"Battery 1","model":"SimBatt-2025","type":"Battery","space_id":"lab001","equipment_id":"batt_1","manufacturer":"B"}}
Equipment batt_2 Post Status: 201 - {"equipment":{"name":"Battery 2","model":"SimBatt-2025","type":"Battery","space_id":"lab001","equipment_id":"batt_2","manufacturer":"B"}}
Equipment batt_3 Post Status: 201 - {"equipment":{"name":"Battery 3","model":"SimBatt-2025","type":"Battery","space_id":"lab001","equipment_id":"batt_3","manufacturer":"A"}}
Equipment batt_4 Post Status: 201 - {"equipment":{"name":"Battery 4","model":"SimBatt-2025","type":"Battery","space_id":"lab001","equipment_id":"batt_4","manufacturer":"A"}}
Equipment batt_5 Post Status: 201 - {"equipment":{"name":"Battery 5","model":"SimBatt-2025","type":"Battery","space_id":"lab001","equipment_id":"batt_5","manufacturer":"B"}}
Equipment batt_6 Post Status: 201 - {"equipment":{"name":"Battery 6","model":"SimBatt-2025","type":"Battery","space_id":"lab001","

In [None]:
# 4. Sensor Data (two sensors per battery: power and temperature)
sensors_list = []
for i in range(n_batteries):
    # Power sensor for Load
    sensors_list.append({
        "sensor_id": f"s_power_{i+1}",
        "equipment_id": f"batt_{i+1}",
        "type": "power",
        "unit": "kW",
        "measurement_range": "0-50"
    })
    # Temperature sensor
    sensors_list.append({
        "sensor_id": f"s_temp_{i+1}",
        "equipment_id": f"batt_{i+1}",
        "type": "temperature",
        "unit": "°C",
        "measurement_range": "0-100"
    })

In [None]:
for sensor in sensors_list:
    response = requests.post(f"{BASE_URL_REMOTE}sensors", headers=HEADERS, data=json.dumps(sensor))
    print(f"Sensor {sensor['sensor_id']} Post Status: {response.status_code} - {response.text}")

In [None]:
import hashlib

In [None]:
# Updated Data Points with 24-character hash
data_points = []
for i in range(n_batteries):
    batt_usage = hourly_usage[i]
    batt_temp = temperature[i]
    batt_load = load[i]
    hourly_increments = np.diff(np.concatenate([[0], batt_usage]))
    daily_index = 0
    remaining_hours = hourly_increments[0]

    for hour in range(total_hours):
        timestamp = (start_time + timedelta(hours=hour)).isoformat() + "Z"

        if remaining_hours <= 0 and daily_index < days_in_march - 1:
            daily_index += 1
            remaining_hours = hourly_increments[daily_index]

        # Generate unique 24-character hash for power data point
        power_hash_input = f"s_power_{i+1}_{timestamp}_{hour}"
        power_dp_id = hashlib.sha256(power_hash_input.encode()).hexdigest()[:24]
        power_data = {
            "data_point_id": power_dp_id,
            "sensor_id": f"s_power_{i+1}",
            "timestamp": timestamp,
            "value": batt_load if remaining_hours > 0 else 0
        }
        data_points.append(power_data)

        # Generate unique 24-character hash for temperature data point
        temp_hash_input = f"s_temp_{i+1}_{timestamp}_{hour}"
        temp_dp_id = hashlib.sha256(temp_hash_input.encode()).hexdigest()[:24]
        temp_data = {
            "data_point_id": temp_dp_id,
            "sensor_id": f"s_temp_{i+1}",
            "timestamp": timestamp,
            "value": batt_temp if remaining_hours > 0 else batt_temp * 0.8
        }
        data_points.append(temp_data)

        if remaining_hours > 0:
            remaining_hours -= 1 / hours_per_day

print(f"Total Data Points Posted: {len(data_points)}")

Total Data Points Posted: 14880


In [None]:
data_points[14000]

{'data_point_id': '46c0bd5d6df87c928ae1e8b1',
 'sensor_id': 's_power_10',
 'timestamp': '2025-03-13T16:00:00Z',
 'value': 29.524626122529153}

In [None]:
# Post data points in batches
batch_size = 1000
for i in range(0, len(data_points), batch_size):
    batch = data_points[i:i + batch_size]
    for point in batch:
        # For data-ingest, use the expected format with "device_id" and "data"
        ingest_data = {
            "device_id": point["sensor_id"],
            "timestamp": point["timestamp"],
            "data": {"load" if "power" in point["sensor_id"] else "temperature": point["value"]}
        }
        response = requests.post(f"{BASE_URL_REMOTE}data-ingest", headers=HEADERS, data=json.dumps(ingest_data))
        print(f"Data Point {point['data_point_id']} for {point['sensor_id']} at {point['timestamp']} Status: {response.status_code} - {response.text}")

In [None]:
data_points[0]

{'data_point_id': 'dp_power_1_1',
 'sensor_id': 's_power_1',
 'timestamp': '2025-03-01T00:00:00Z',
 'value': 1.1094985399536985}

In [None]:
with open('equipment.json', 'w') as f:
    json.dump(equipment_list, f, indent=4)
with open('sensors.json', 'w') as f:
    json.dump(sensors_list, f, indent=4)
with open('data_points.json', 'w') as f:
    json.dump(data_points, f, indent=4)