# proxmox_node_status

Processes basic Proxmox status by node, and aggregates into hourly min/avg/max cpu and memory by node to the gold layer.

# Ensure Tables and Parameters are Set Up

In [0]:
-- CREATE SILVER SCHEMA AND TABLE
CREATE SCHEMA IF NOT EXISTS main.silver;

CREATE TABLE IF NOT EXISTS main.silver.proxmox_node_status (
    node_name STRING,
    status STRING,
    cpu DOUBLE,
    maxcpu INT,
    mem BIGINT,
    maxmem BIGINT,
    uptime BIGINT,
    ingestion_timestamp TIMESTAMP,
    partition_date DATE
  ) USING DELTA
  PARTITIONED BY (partition_date)
  LOCATION 'abfss://homelab-telemetry@sa01homelabdata.dfs.core.windows.net/silver/proxmox/node_status/';


In [0]:

-- CREATE GOLD SCHEMA AND TABLE
CREATE SCHEMA IF NOT EXISTS main.gold;

CREATE TABLE IF NOT EXISTS main.gold.hourly_node_performance_metrics (
    node_name STRING,
    report_date DATE,
    report_hour INT,
    avg_cpu_pct DOUBLE,
    peak_cpu_pct DOUBLE,
    min_cpu_pct DOUBLE,
    avg_mem_pct DOUBLE,
    peak_mem_pct DOUBLE,
    min_mem_pct DOUBLE,
    uptime_seconds BIGINT
  ) USING DELTA
  LOCATION 'abfss://homelab-telemetry@sa01homelabdata.dfs.core.windows.net/gold/proxmox/hourly_node_metrics/';

# Load Bronze Data

In [0]:
-- LOAD BRONZE (limit to rolling N days, default 3 set in bronze_load_window_days parameter)
DROP TEMPORARY TABLE IF EXISTS node_status_bronze_view;

CREATE TEMPORARY TABLE node_status_bronze_view AS
SELECT
  -- Extract node name from meta.host URL (e.g., "hl4" from "https://hl4.d.mckaylab.me:8006")
  regexp_extract(meta.host, 'https://([^.]+)', 1) as node_name,
  -- Derive status from uptime (if uptime > 0, node is online)
  CASE
    WHEN payload.data.uptime > 0 THEN 'online'
    ELSE 'offline'
  END as status,
  payload.data.cpu as cpu,
  payload.data.cpuinfo.cpus as maxcpu,
  payload.data.memory.used as mem,
  payload.data.memory.total as maxmem,
  payload.data.uptime as uptime,
  -- Use the collected_at_utc timestamp from meta
  CAST(meta.collected_at_utc AS TIMESTAMP) as ingestion_timestamp,
  dt as partition_date
FROM
  read_files(
    'abfss://homelab-telemetry@sa01homelabdata.dfs.core.windows.net/bronze/proxmox/node_status/*/*.jsonl',
    format => 'json'
  )
WHERE
  dt >= date_sub(current_date(), CAST(:bronze_load_window_days AS INT))

In [0]:
-- Verify how many rows were loaded into the bronze temp table
SELECT 
  COUNT(*) as total_rows,
  COUNT(DISTINCT node_name) as distinct_nodes,
  MIN(partition_date) as earliest_date,
  MAX(partition_date) as latest_date
FROM node_status_bronze_view;

# Write into Silver

In [0]:
-- MERGE BRONZE INTO SILVER
-- Dedup where node & ingestion_timestamp are matched
MERGE INTO
  main.silver.proxmox_node_status AS target
USING
  node_status_bronze_view AS source
ON
  target.node_name = source.node_name
  AND target.ingestion_timestamp = source.ingestion_timestamp
WHEN NOT MATCHED THEN INSERT *;

# Write Gold

In [0]:
-- Add data to GOLD (processing ALL historical data)
-- Aggregates to datapoint by hour to get min/max/avg mem and cpu by hour 
INSERT INTO main.gold.hourly_node_performance_metrics
  SELECT
    node_name,
    partition_date as report_date,
    hour(ingestion_timestamp) as report_hour,
    ROUND(AVG(cpu) * 100, 2) as avg_cpu_pct,
    ROUND(MAX(cpu) * 100, 2) as peak_cpu_pct,
    ROUND(MIN(cpu) * 100, 2) as min_cpu_pct,
    ROUND(AVG(mem / maxmem) * 100, 2) as avg_mem_pct,
    ROUND(MAX(mem / maxmem) * 100, 2) as peak_mem_pct,
    ROUND(MIN(mem / maxmem) * 100, 2) as min_mem_pct,
    MAX(uptime) as uptime_seconds
  FROM
    main.silver.proxmox_node_status
  GROUP BY
    node_name,
    report_date,
    report_hour;