# proxmox_node_status

Processes basic Proxmox status by node, and aggregates into hourly min/avg/max cpu and memory by node to the gold layer.

## Load Bronze Data

In [0]:
-- LOAD BRONZE (limit to rolling N days, default 3 set in bronze_load_window_days parameter)
DROP VIEW IF EXISTS node_status_bronze_view;

CREATE TEMPORARY VIEW node_status_bronze_view AS
SELECT
  -- Extract node name from meta.host URL (e.g., "hl4" from "https://hl4.d.mckaylab.me:8006")
  regexp_extract(meta.host, 'https://([^.]+)', 1) as node_name,
  -- Derive status from uptime (if uptime > 0, node is online)
  CASE
    WHEN payload.data.uptime > 0 THEN 'online'
    ELSE 'offline'
  END as status,
  payload.data.cpu as cpu,
  payload.data.cpuinfo.cpus as maxcpu,
  payload.data.memory.used as mem,
  payload.data.memory.total as maxmem,
  payload.data.uptime as uptime,
  -- Use the collected_at_utc timestamp from meta
  CAST(meta.collected_at_utc AS TIMESTAMP) as ingestion_timestamp,
  dt as partition_date
FROM
  read_files(
    'abfss://homelab-telemetry@sa01homelabdata.dfs.core.windows.net/bronze/proxmox/node_status/*/*.jsonl',
    format => 'json'
  )
WHERE
  dt >= date_sub(current_date(), 3);

In [0]:
-- Verify how many rows were loaded into the bronze temp table
SELECT 
  COUNT(*) as total_rows,
  COUNT(DISTINCT node_name) as distinct_nodes,
  MIN(partition_date) as earliest_date,
  MAX(partition_date) as latest_date
FROM node_status_bronze_view;

## Write into Silver

In [0]:
-- MERGE BRONZE INTO SILVER
-- Dedup where node & ingestion_timestamp are matched
MERGE INTO
  homelab.silver.proxmox_node_status AS target
USING
  node_status_bronze_view AS source
ON
  target.node_name = source.node_name
  AND target.ingestion_timestamp = source.ingestion_timestamp
WHEN NOT MATCHED THEN INSERT *;

## Write Gold

In [0]:
-- Add data to GOLD (processing ALL historical data)
-- Aggregates to datapoint by hour to get min/max/avg mem and cpu by hour 
INSERT INTO homelab.gold.fact_proxmox_node_metrics_hourly
  SELECT
    dn.node_key,
    ddh.date_hour_key,
    s.partition_date as report_date,
    hour(s.ingestion_timestamp) as report_hour,
    -- Don't have to multiple cpu by 100 because it's already stored as a % in the jsonl
    ROUND(AVG(s.cpu), 4) as avg_cpu_pct,
    ROUND(MAX(s.cpu), 4) as peak_cpu_pct,
    ROUND(MIN(s.cpu), 4) as min_cpu_pct,
    ROUND(AVG(CASE WHEN s.maxmem > 0 THEN s.mem / s.maxmem ELSE 0 END) * 100, 2) as avg_mem_pct,
    ROUND(MAX(CASE WHEN s.maxmem > 0 THEN s.mem / s.maxmem ELSE 0 END) * 100, 2) as peak_mem_pct,
    ROUND(MIN(CASE WHEN s.maxmem > 0 THEN s.mem / s.maxmem ELSE 0 END) * 100, 2) as min_mem_pct,
    MAX(s.uptime) as uptime_seconds
  FROM
    homelab.silver.proxmox_node_status s
  INNER JOIN homelab.gold.dim_node dn ON s.node_name = dn.node_name
  INNER JOIN homelab.gold.dim_date_hour ddh ON s.partition_date = ddh.report_date AND hour(s.ingestion_timestamp) = ddh.report_hour
  GROUP BY
    dn.node_key,
    ddh.date_hour_key,
    s.partition_date,
    hour(s.ingestion_timestamp);