In [1]:
import requests
import pandas as pd
from math import ceil
from time import sleep
import time
import json
from IPython.display import display

# 1. Data Retrieval and Integration

In [2]:
BASE = "https://api.openelectricity.org.au/v4"
TOKEN = "oe_3ZPA7phVarPYZc7Qks7uijfE" 
NETWORK = "NEM"     

HEADERS = {
    "Authorization": f"Bearer {TOKEN}",
    "Accept": "application/json",
}


DATE_START = "2025-10-01T00:00:00"  
DATE_END   = "2025-10-08T00:00:00" 


In [3]:
#get all facilities and related units in NEM
def get_nem_facilities():
    url = f"{BASE}/facilities/"
    # 按文档：network_id 可传数组；也可用 status_id / fueltech_id 做更细过滤
    params = {
        "network_id": ["NEM"],          # 只要 NEM
        "status_id": ["operating"],     # 只要运营中
       
    }
    r = requests.get(url, headers=HEADERS, params=params, timeout=60)
    r.raise_for_status()
    js = r.json()
 
    fac_df = pd.json_normalize(js["data"])  # 设施层
    # 展开 units 列为行（record_path=units），带上 facility 的 code 作为 meta
    units_df = pd.json_normalize(
    js["data"],
    record_path="units",
    meta=["code"],
    meta_prefix="facility_",    # ← 防止重复名
    errors="ignore"
    ).rename(columns={"code": "unit_code", "facility_code": "facility_code"})

    # 某些版本列名就是 'code'（unit）与 'facility_code'（meta），做个兜底：
    if "unit_code" not in units_df.columns:
        # 常见字段名：'code'（unit 的 code）
        if "code" in units_df.columns:
            units_df = units_df.rename(columns={"code": "unit_code"})

    return fac_df, units_df

fac_df, units_df = get_nem_facilities()

units_df.head()

Unnamed: 0,unit_code,fueltech_id,status_id,capacity_registered,capacity_maximum,data_first_seen,data_last_seen,dispatch_type,created_at,updated_at,capacity_storage,emissions_factor_co2,facility_code
0,ADPPV1,solar_utility,operating,24.75,19.0,2021-05-18T13:10:00+10:00,2025-11-07T15:00:00+10:00,GENERATOR,2023-10-18T04:34:30Z,2024-12-16T23:52:12Z,,,ADP
1,ADPPV2,solar_utility,operating,0.2,0.2,,,GENERATOR,2023-10-18T04:34:30Z,2024-12-16T23:50:10Z,,,ADP
2,ADPPV3,solar_utility,operating,0.02,0.02,,,GENERATOR,2023-10-18T04:34:30Z,2024-12-16T23:51:11Z,,,ADP
3,ADPBA1G,battery_discharging,operating,7.76,6.15,2021-05-18T10:55:00+10:00,2025-11-07T15:00:00+10:00,GENERATOR,2023-10-18T04:34:30Z,2025-06-23T05:34:25Z,12.6,,ADP
4,ADPBA1L,battery_charging,operating,7.76,6.15,2021-05-18T09:55:00+10:00,2025-11-07T14:55:00+10:00,LOAD,2023-10-18T04:34:30Z,2025-06-23T05:34:16Z,12.6,,ADP


In [4]:
fac_df.head()

Unnamed: 0,code,name,network_id,network_region,description,units,updated_at,created_at,location.lat,location.lng
0,ADP,Adelaide Desalination,NEM,SA1,"<p>The Adelaide Desalination plant (ADP), form...","[{'code': 'ADPPV1', 'fueltech_id': 'solar_util...",2025-08-05T06:08:12Z,2023-10-18T04:34:30Z,-35.096948,138.484061
1,ALDGASF,Aldoga,NEM,QLD1,<p>The Aldoga Solar Farm will be approximately...,"[{'code': 'ALDGASF1', 'fueltech_id': 'solar_ut...",2025-03-25T00:52:44Z,2025-01-31T04:19:33Z,-23.839544,151.0849
2,ANGASTON,Angaston,NEM,SA1,<p>Angaston Power Station is a diesel-powered ...,"[{'code': 'ANGAST1', 'fueltech_id': 'distillat...",2025-09-07T01:53:13Z,2023-10-18T04:34:32Z,-34.503948,139.024296
3,APPIN,Appin,NEM,NSW1,"<p>In a world first, EDL developed the largest...","[{'code': 'APPIN', 'fueltech_id': 'gas_wcmg', ...",2025-09-07T01:53:15Z,2023-10-18T04:34:32Z,-34.210868,150.792711
4,ARWF,Ararat,NEM,VIC1,<p>Ararat Wind Farm is wind farm in western Vi...,"[{'code': 'ARWF1', 'fueltech_id': 'wind', 'sta...",2025-07-08T03:42:06Z,2023-10-18T04:34:32Z,-37.263393,143.082116


In [5]:
#get specific facilities time-series data
def fetch_facility_timeseries(facility_codes, date_start=DATE_START, date_end=DATE_END):
    """对一批 facility_codes 请求两种指标（power, emissions），5m 粒度"""
    url = f"{BASE}/data/facilities/{NETWORK}"
    params = {
        "metrics": ["power", "emissions"],
        "interval": "5m",
        "facility_code": facility_codes,  # 批量
        "date_start": date_start,
        "date_end": date_end,
    }
    r = requests.get(url, headers=HEADERS, params=params, timeout=120)
    r.raise_for_status()
    return r.json()

def timeseries_to_df(js):
    """把返回扁平化为：
       timestamp | unit_code | metric | value | unit
    """
    rows = []
    data_block = js.get("data", [])
    if not isinstance(data_block, list):
        return pd.DataFrame(rows)  # 可能是 '-'，直接空表

    for blk in data_block:
        if not isinstance(blk, dict):
            continue
        metric = blk.get("metric")  # 'power' / 'emissions'
        unit   = blk.get("unit")    # 'MW' / 'tCO2e'
        results = blk.get("results", [])
        for res in results:
            if not isinstance(res, dict):
                continue
            # unit_code 优先从 columns 取；没有就从 name 拆
            unit_code = None
            cols = res.get("columns")
            if isinstance(cols, dict):
                unit_code = cols.get("unit_code")
            name = res.get("name")
            if not unit_code and isinstance(name, str) and "_" in name:
                unit_code = name.split("_", 1)[1]  # e.g. power_ADPBA1 → ADPBA1

            for item in res.get("data", []):  # item = [timestamp, value]
                if isinstance(item, (list, tuple)) and len(item) >= 2:
                    ts, val = item[0], item[1]
                    rows.append({
                        "timestamp": ts,
                        "unit_code": unit_code,
                        "metric": metric,
                        "value": val,
                        "unit": unit
                    })
    return pd.DataFrame(rows)

# 先拿到 facility_code 列表（可按需过滤 fueltech/network_region 来减少范围）
facility_codes = fac_df["code"].dropna().unique().tolist()

# 日配额考虑：把 batch 调大一点（但单次响应也会更大）
BATCH   = 30
SLEEP_S = 0.25

all_parts = []
req_count = 0

for i in range(0, len(facility_codes), BATCH):
    batch = facility_codes[i:i+BATCH]
    js = fetch_facility_timeseries(batch)
    req_count += 1
    df_part = timeseries_to_df(js)
    all_parts.append(df_part)
    time.sleep(SLEEP_S)

df_unit = pd.concat(all_parts, ignore_index=True) if all_parts else pd.DataFrame()
print("requests used:", req_count, "rows:", len(df_unit))
display(df_unit.head())

requests used: 14 rows: 1958730


Unnamed: 0,timestamp,unit_code,metric,value,unit
0,2025-10-01T00:00:00+10:00,ADPBA1,power,-0.004,MW
1,2025-10-01T00:05:00+10:00,ADPBA1,power,-0.046,MW
2,2025-10-01T00:10:00+10:00,ADPBA1,power,0.0,MW
3,2025-10-01T00:15:00+10:00,ADPBA1,power,0.003,MW
4,2025-10-01T00:20:00+10:00,ADPBA1,power,-0.018,MW


In [6]:
# 建立 unit_code → facility_code 的映射
unit_to_fac = units_df[["unit_code", "facility_code"]].dropna().drop_duplicates()
df_unit = df_unit.merge(unit_to_fac, on="unit_code", how="left")

# per-facility 宽表：同一时间点、同一 facility 的 power/emissions 各一列（把多单元求和）
df_fac_wide = (
    df_unit
    .pivot_table(index=["timestamp", "facility_code"], columns="metric", values="value", aggfunc="sum")
    .rename_axis(columns=None)
    .reset_index()
    .sort_values(["timestamp", "facility_code"])
)

# 将 fac_df 中需要的列挑出来
fac_info = fac_df[["code", "name", "location.lat", "location.lng","network_region"]].rename(
    columns={"code": "facility_code", "name": "facility_name"}
)

# 左连接合并（保留 df_fac_wide 中的所有行）
df_fac_wide = df_fac_wide.merge(fac_info, on="facility_code", how="left")

# 查看结果
display(df_fac_wide.head())


Unnamed: 0,timestamp,facility_code,emissions,power,facility_name,location.lat,location.lng,network_region
0,2025-10-01T00:00:00+10:00,0MREH,0.0,0.0,Melbourne A1,-37.661274,144.726302,VIC1
1,2025-10-01T00:00:00+10:00,0MREHA2,0.0,0.0,Melbourne A2,-37.663934,144.726927,VIC1
2,2025-10-01T00:00:00+10:00,0TARONGBESS,0.0,0.0,Tarong,-26.780051,151.912068,QLD1
3,2025-10-01T00:00:00+10:00,0WAMBOWF,0.0,65.23,Wambo,-26.603045,151.246876,QLD1
4,2025-10-01T00:00:00+10:00,ADP,0.0,0.0,Adelaide Desalination,-35.096948,138.484061,SA1


In [7]:
def fetch_market_network(network_region=None,date_start=DATE_START, date_end=DATE_END):
    url = f"{BASE}/market/network/{NETWORK}"
    params = {
        "metrics": ["price", "demand"],
        "interval": "5m",
        "date_start": date_start,
        "date_end": date_end,
        "primary_grouping": "network_region",  # "network" 或 "network_region"
    }
    if network_region:
        params["network_region"] = network_region

    r = requests.get(url, headers=HEADERS, params=params, timeout=60)
    r.raise_for_status()
    return r.json()

# -------- 2) 扁平化为 DataFrame（长表：timestamp / metric / value / unit / network_region） --------
def market_to_df(js):
    """
    将 /v4/market/network 返回的数据扁平化为长表：
    列：timestamp | network_region | metric | value | unit
    兼容 columns.region / columns.network_region；必要时从 name 里解析区域。
    """
    rows = []
    data_block = js.get("data", [])
    if not isinstance(data_block, list):
        return pd.DataFrame(rows)

    for blk in data_block:
        metric = blk.get("metric")         # e.g. "price" / "demand"
        unit   = blk.get("unit")           # e.g. "$/MWh" / "MW"

        for res in blk.get("results", []):
            cols = res.get("columns") or {}
            name = res.get("name") or ""

            # 优先按你截图：columns 里是 {"region": "NSW1"}
            region = cols.get("region") or cols.get("network_region")

            # 兜底：从 name 里解析（如 "price_NSW1"）
            if region is None and isinstance(name, str):
                m = re.search(r'_(NSW1|VIC1|QLD1|SA1|TAS1)\b', name)
                region = m.group(1) if m else None

            # 展开该区域的时序点
            for item in res.get("data", []):
                if isinstance(item, (list, tuple)) and len(item) >= 2:
                    ts, val = item[0], item[1]
                    rows.append({
                        "timestamp": ts,
                        "network_region": region,
                        "metric": metric,
                        "value": val,
                        "unit": unit
                    })

    df = pd.DataFrame(rows)
    return df


js = fetch_market_network()

df_market = market_to_df(js)

# # 如需宽表（每行一个时间+区域，price/demand 两列）：
df_market_wide = (
    df_market
    .pivot_table(
        index=["timestamp", "network_region"],  # 每个区域、每个时间点一行
        columns="metric",                      # 每种指标（price, demand）变成一列
        values="value",                        # 数值列
        aggfunc="mean"                         # 遇到重复取最后一个（或可换成 'mean'）
    )
    .rename_axis(columns=None)                 # 去掉多余 axis 名称
    .reset_index()                             # 把索引恢复成普通列
    .sort_values(["timestamp", "network_region"])
)
display(df_market_wide.head(10))

Unnamed: 0,timestamp,network_region,demand,price
0,2025-10-01T00:00:00+10:00,NSW1,7105.57,56.98
1,2025-10-01T00:00:00+10:00,QLD1,5989.24,54.82
2,2025-10-01T00:00:00+10:00,SA1,1564.92,8.11
3,2025-10-01T00:00:00+10:00,TAS1,898.71,0.12
4,2025-10-01T00:00:00+10:00,VIC1,4893.49,8.95
5,2025-10-01T00:05:00+10:00,NSW1,7170.68,80.01
6,2025-10-01T00:05:00+10:00,QLD1,5920.4,67.3
7,2025-10-01T00:05:00+10:00,SA1,1565.38,0.01
8,2025-10-01T00:05:00+10:00,TAS1,897.18,0.2
9,2025-10-01T00:05:00+10:00,VIC1,4889.73,0.01


In [8]:
df_fac_wide = (
    df_fac_wide
    .merge(
        df_market_wide[["timestamp", "network_region", "demand", "price"]],
        on=["timestamp", "network_region"],
        how="left"
    )
)
df_fac_wide = df_fac_wide.rename(columns={"network_region": "market(network_region)"})
df_fac_wide.head()

Unnamed: 0,timestamp,facility_code,emissions,power,facility_name,location.lat,location.lng,market(network_region),demand,price
0,2025-10-01T00:00:00+10:00,0MREH,0.0,0.0,Melbourne A1,-37.661274,144.726302,VIC1,4893.49,8.95
1,2025-10-01T00:00:00+10:00,0MREHA2,0.0,0.0,Melbourne A2,-37.663934,144.726927,VIC1,4893.49,8.95
2,2025-10-01T00:00:00+10:00,0TARONGBESS,0.0,0.0,Tarong,-26.780051,151.912068,QLD1,5989.24,54.82
3,2025-10-01T00:00:00+10:00,0WAMBOWF,0.0,65.23,Wambo,-26.603045,151.246876,QLD1,5989.24,54.82
4,2025-10-01T00:00:00+10:00,ADP,0.0,0.0,Adelaide Desalination,-35.096948,138.484061,SA1,1564.92,8.11


# 2. Data Preprocessing

In [9]:
df_fac_wide.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 668353 entries, 0 to 668352
Data columns (total 10 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   timestamp               668353 non-null  object 
 1   facility_code           668353 non-null  object 
 2   emissions               668353 non-null  float64
 3   power                   668353 non-null  float64
 4   facility_name           668353 non-null  object 
 5   location.lat            668353 non-null  float64
 6   location.lng            668353 non-null  float64
 7   market(network_region)  668353 non-null  object 
 8   demand                  667033 non-null  float64
 9   price                   654823 non-null  float64
dtypes: float64(6), object(4)
memory usage: 51.0+ MB


In [10]:
#把时间转换成澳东时区
df_fac_wide["timestamp"] = pd.to_datetime(df_fac_wide["timestamp"], utc=True)
df_fac_wide["timestamp"] = df_fac_wide["timestamp"].dt.tz_convert("Australia/Sydney")

df_fac_wide["facility_code"]  = df_fac_wide["facility_code"].astype("string")
df_fac_wide["facility_name"]  = df_fac_wide["facility_name"].astype("string")
df_fac_wide["market(network_region)"]  = df_fac_wide["market(network_region)"].astype("string")

In [11]:
neg_demand = df_fac_wide[df_fac_wide["demand"] < 0]
print(f"Negative demand rows: {len(neg_demand)}")
display(neg_demand.head())

Negative demand rows: 290


Unnamed: 0,timestamp,facility_code,emissions,power,facility_name,location.lat,location.lng,market(network_region),demand,price
333607,2025-10-04 12:15:00+10:00,ADP,0.0,0.088,Adelaide Desalination,-35.096948,138.484061,SA1,-12.17,-31.19
333608,2025-10-04 12:15:00+10:00,AGLHAL,0.0,0.0,Hallett,-33.34931,138.752633,SA1,-12.17,-31.19
333611,2025-10-04 12:15:00+10:00,ANGASTON,0.0,0.0,Angaston,-34.503948,139.024296,SA1,-12.17,-31.19
333620,2025-10-04 12:15:00+10:00,BARKIPS,0.0,0.0,Barker Inlet,-34.804,138.524,SA1,-12.17,-31.19
333632,2025-10-04 12:15:00+10:00,BLUFF,0.0,0.0,The Bluff,-33.367741,138.79379,SA1,-12.17,-31.19


In [12]:
neg_demand = df_fac_wide[df_fac_wide["demand"] < 0]
print(f"Negative demand rows: {len(neg_demand)}")

ratio = (len(neg_demand) / len(df_fac_wide) * 100)
print(f"Negative demand ratio (%): {round(ratio, 2)}")

display(neg_demand.head())

Negative demand rows: 290
Negative demand ratio (%): 0.04


Unnamed: 0,timestamp,facility_code,emissions,power,facility_name,location.lat,location.lng,market(network_region),demand,price
333607,2025-10-04 12:15:00+10:00,ADP,0.0,0.088,Adelaide Desalination,-35.096948,138.484061,SA1,-12.17,-31.19
333608,2025-10-04 12:15:00+10:00,AGLHAL,0.0,0.0,Hallett,-33.34931,138.752633,SA1,-12.17,-31.19
333611,2025-10-04 12:15:00+10:00,ANGASTON,0.0,0.0,Angaston,-34.503948,139.024296,SA1,-12.17,-31.19
333620,2025-10-04 12:15:00+10:00,BARKIPS,0.0,0.0,Barker Inlet,-34.804,138.524,SA1,-12.17,-31.19
333632,2025-10-04 12:15:00+10:00,BLUFF,0.0,0.0,The Bluff,-33.367741,138.79379,SA1,-12.17,-31.19


In [13]:
neg_price = df_fac_wide[df_fac_wide["price"] < 0]
print(f"Negative price rows: {len(neg_price)}")
display(neg_price.head())

Negative price rows: 279826


Unnamed: 0,timestamp,facility_code,emissions,power,facility_name,location.lat,location.lng,market(network_region),demand,price
7260,2025-10-01 01:50:00+10:00,0MREH,0.0,1.4704,Melbourne A1,-37.661274,144.726302,VIC1,4300.21,-0.06
7261,2025-10-01 01:50:00+10:00,0MREHA2,0.0,0.0,Melbourne A2,-37.663934,144.726927,VIC1,4300.21,-0.06
7264,2025-10-01 01:50:00+10:00,ADP,0.0,0.0,Adelaide Desalination,-35.096948,138.484061,SA1,1513.45,-0.05
7265,2025-10-01 01:50:00+10:00,AGLHAL,0.0,0.0,Hallett,-33.34931,138.752633,SA1,1513.45,-0.05
7266,2025-10-01 01:50:00+10:00,AGLSOM,0.0,0.0,Somerton,-37.630949,144.953098,VIC1,4300.21,-0.06


In [14]:
df_fac_wide.to_csv("nem_per_facility_power_emissions_5m_2025-10-01_to_08.csv", index=False)

# 4.Data Subscription and Visualisation

In [15]:
from IPython.display import clear_output
import json
import threading
import time
from urllib.parse import quote_plus

import pandas as pd
from sqlalchemy import create_engine, text
import paho.mqtt.client as mqtt
import folium
from ipywidgets import interact, SelectMultiple
from IPython.display import display

In [28]:
# =========================
# COMP5339 A2 - Task 4
# Realtime MQTT Subscriber + Region & Fuel Filters + Folium Map
# =========================
BROKER_HOST = "test.mosquitto.org"
BROKER_PORT = 1883
MQTT_TOPIC  = "nem/yjia0057/power_emissions"
MAP_HTML_PATH = "dashboard_map.html"
MAP_REFRESH_INTERVAL = 15.0
LOCAL_TZ = "Australia/Sydney"

# ===================== 后台预过滤条件 =====================
ACTIVE_REGION_FILTERS = []  # 例如 ["NSW1", "QLD1"]
ACTIVE_FUEL_FILTERS   = []  # 例如 ["wind", "solar_utility"]
# =========================================================

# ----------------------------------------------------------------------
# 1. 从 units_df 构建 facility_code -> fueltech_id 映射
# ----------------------------------------------------------------------

facility_fuel_map = {}
try:
    fac_col = "facility_code"
    fuel_col = "fueltech_id"

    tmp = (
        units_df.dropna(subset=[fac_col, fuel_col])
                .groupby(fac_col)[fuel_col]
                .agg(lambda s: s.mode().iat[0] if not s.mode().empty else s.iloc[0])
    )

    facility_fuel_map = tmp.to_dict()

    print(f"[INIT] Built facility_fuel_map with {len(facility_fuel_map)} facilities.")
    print("[INIT] Example mapping (first 5):",
          list(facility_fuel_map.items())[:5])

except Exception as e:
    print("[INIT] Failed to build facility_fuel_map:", e)
    facility_fuel_map = {}

# ----------------------------------------------------------------------
# 2. 全局最新状态：latest_by_facility
# ----------------------------------------------------------------------

latest_lock = threading.Lock()
latest_by_facility: dict[str, dict] = {}


# ----------------------------------------------------------------------
# 3. MQTT 回调：合并 fuel 信息 + 存入 latest_by_facility
# ----------------------------------------------------------------------

def _to_local_sydney(ts_val):
    ts = pd.to_datetime(ts_val, errors="coerce")
    if pd.isna(ts):
        return None
    if getattr(ts, "tz", None) is None:
        try:
            return ts.tz_localize(LOCAL_TZ, ambiguous="infer", nonexistent="shift_forward")
        except Exception:
            return ts.tz_localize(LOCAL_TZ)
    else:
        return ts.tz_convert(LOCAL_TZ)


def merge_with_facility_meta(msg: dict) -> dict:
    """
    将 MQTT 消息融合 facility 的 fuel 信息，生成统一 record.
    """
    fac_code = msg.get("facility_code")
    facility_name = str(msg.get("facility_name", "")).strip()
    region = msg.get("region")

    lat = msg.get("lat")
    lon = msg.get("lon")

    fueltech = facility_fuel_map.get(fac_code)

    ts_raw = msg.get("timestamp")
    try:
        ts_loc = _to_local_sydney(ts_raw)
        ts_iso_loc = ts_loc.isoformat() if ts_loc is not None else ts_raw
    except Exception:
        ts_iso_loc = ts_raw

    record = {
        "timestamp": ts_iso_loc,
        "facility_code": fac_code,
        "facility_name": facility_name or fac_code,
        "nem_region": region,
        "lat": lat,
        "lon": lon,
        "power_mw": msg.get("power_mw"),
        "co2_t": msg.get("co2_t"),
        "price": msg.get("price"),
        "demand": msg.get("demand"),
        "seq": msg.get("seq"),
        "fueltech": fueltech,
    }
    return record


def on_connect(client, userdata, flags, reason_code, properties=None):
    print("[MQTT] Connected:", reason_code)
    client.subscribe(MQTT_TOPIC, qos=1)
    print(f"[MQTT] Subscribed to topic: {MQTT_TOPIC}")


def on_message(client, userdata, msg):
    global latest_by_facility
    try:
        payload = msg.payload.decode("utf-8")
        data = json.loads(payload)
    except Exception as e:
        print("[MQTT] Failed to decode message:", e)
        return

    record = merge_with_facility_meta(data)
    fac_code = record.get("facility_code")

    with latest_lock:
        latest_by_facility[fac_code] = record

    # 调试输出前几条
    if record.get("seq") in (1, 2, 3, 4, 5):
        print("[MQTT] sample record:", record)


def start_mqtt_subscriber():
    client = mqtt.Client()
    client.on_connect = on_connect
    client.on_message = on_message

    client.connect(BROKER_HOST, BROKER_PORT, keepalive=60)

    t = threading.Thread(target=client.loop_forever, daemon=True)
    t.start()
    print("[MQTT] Subscriber loop started (background).")
    return client


# ----------------------------------------------------------------------
# 4. 地图生成：Region + Fuel 双图层
# ----------------------------------------------------------------------

def make_map_from_latest(latest_records: list[dict]) -> folium.Map:
    """
    使用 latest_by_facility 里的记录生成 folium 地图。
    图层设计：
      - Region 图层：Region: NSW1 / Region: QLD1 / ...
      - Fuel 图层：Fuel: wind / Fuel: solar_utility / ...
      - 每个点会被加到对应的 region 图层 + fuel 图层
      - 所有图层在 HTML 中默认 show=False（进入页面时全部不勾选）
    """
    # 0. 没数据的情况
    if not latest_records:
        center_lat, center_lon = -25.0, 135.0
        fmap = folium.Map(location=[center_lat, center_lon],
                          zoom_start=5, tiles="cartodbpositron")
        folium.Marker(
            location=[center_lat, center_lon],
            popup="No MQTT data received yet...",
            icon=folium.Icon(color="gray", icon="info-sign")
        ).add_to(fmap)
        return fmap

    df = pd.DataFrame(latest_records)

    # 1. 后台预过滤
    cond = pd.Series(True, index=df.index)
    if ACTIVE_REGION_FILTERS:
        cond &= df["nem_region"].isin(ACTIVE_REGION_FILTERS)
    if ACTIVE_FUEL_FILTERS:
        cond &= df["fueltech"].isin(ACTIVE_FUEL_FILTERS)

    df = df[cond].copy()

    if df.empty:
        center_lat, center_lon = -25.0, 135.0
        fmap = folium.Map(location=[center_lat, center_lon],
                          zoom_start=5, tiles="cartodbpositron")
        folium.Marker(
            location=[center_lat, center_lon],
            popup="No facilities match the selected backend filters.",
            icon=folium.Icon(color="red", icon="info-sign")
        ).add_to(fmap)
        return fmap

    # 2. 地图中心
    lat_valid = df["lat"].dropna()
    lon_valid = df["lon"].dropna()
    if len(lat_valid) and len(lon_valid):
        center_lat = lat_valid.mean()
        center_lon = lon_valid.mean()
    else:
        center_lat, center_lon = -25.0, 135.0

    fmap = folium.Map(location=[center_lat, center_lon],
                      zoom_start=5, tiles="cartodbpositron")

    # 3. Header 信息（时间 + 价格 + 需求 + 当前后台过滤）
    ts_series = pd.to_datetime(df["timestamp"], errors="coerce")
    latest_ts = ts_series.max()
    if pd.notna(latest_ts):
        df_latest = df[ts_series == latest_ts]
    else:
        df_latest = df

    avg_price = df_latest["price"].dropna().mean()
    sum_demand = df_latest["demand"].dropna().sum()

    region_text = ", ".join(ACTIVE_REGION_FILTERS) if ACTIVE_REGION_FILTERS else "ALL"
    fuel_text = ", ".join(ACTIVE_FUEL_FILTERS) if ACTIVE_FUEL_FILTERS else "ALL"

    header_html = f"""
    <div style="position: fixed; 
                top: 10px; left: 50px; z-index: 9999; 
                background-color: white;
                padding: 8px 12px; 
                border: 1px solid #999;
                border-radius: 4px;
                box-shadow: 1px 1px 4px rgba(0,0,0,0.3);">
        <b>NEM Dashboard</b><br/>
        Latest timestamp: {latest_ts}<br/>
        Avg Price: {avg_price:.2f} $/MWh (approx.)<br/>
        Total Demand: {sum_demand:.2f} MW (approx.)<br/>
        Backend Region filter: {region_text}<br/>
        Backend Fuel filter: {fuel_text}
    </div>
    """
    fmap.get_root().html.add_child(folium.Element(header_html))

    # 4. fueltech → 颜色映射（按 fuel 上色）
    fuel_colors = {
        "wind": "green",
        "solar_utility": "orange",
        "solar_rooftop": "lightred",
        "battery": "purple",
        "battery_charging": "gray",
        "hydro": "blue",
        "coal_black": "black",
        "coal_brown": "darkred",
        "gas_ccgt": "red",
        "gas_ocgt": "darkblue",
        "gas_recip": "pink",
        "gas_steam": "lightblue",
        "gas_wcmg": "cadetblue",
        "bioenergy_biogas": "darkgreen",
        "bioenergy_biomass": "lightgreen",
        "pumps": "beige",
    }

    # 5. Region 图层（默认不勾选）
    region_groups = {}
    for reg in sorted(df["nem_region"].dropna().unique()):
        fg = folium.FeatureGroup(name=f"Region: {reg}", show=False)
        fg.add_to(fmap)
        region_groups[reg] = fg

    # 6. Fuel 图层
    fuel_groups = {}
    if "fueltech" in df.columns:
        fuels = sorted(df["fueltech"].dropna().unique())
        for fuel in fuels:
            fg = folium.FeatureGroup(name=f"Fuel: {fuel}", show=False)
            fg.add_to(fmap)
            fuel_groups[fuel] = fg

    # 7. 往两个图层里加点
    for _, row in df.iterrows():
        lat = row.get("lat")
        lon = row.get("lon")
        if pd.isna(lat) or pd.isna(lon):
            continue

        ps_name = row.get("facility_name")
        fac_code = row.get("facility_code")
        reg = row.get("nem_region")
        fuel = row.get("fueltech")
        power_mw = row.get("power_mw")
        co2_t = row.get("co2_t")
        price = row.get("price")
        demand = row.get("demand")
        ts = row.get("timestamp")

        popup_html = f"""
        <b>{ps_name}</b><br/>
        Facility code: {fac_code}<br/>
        NEM Region: {reg}<br/>
        Fueltech: {fuel}<br/>
        <br/>
        Timestamp: {ts}<br/>
        Power: {power_mw} MW<br/>
        Emissions: {co2_t} tCO₂<br/>
        Price: {price} $/MWh<br/>
        Demand: {demand} MW
        """

        color = fuel_colors.get(str(fuel), "gray")

        # 这个点对应的 region / fuel 图层
        targets = []

        reg_group = region_groups.get(reg)
        if reg_group is not None:
            targets.append(reg_group)

        fuel_group = fuel_groups.get(fuel)
        if fuel_group is not None:
            targets.append(fuel_group)

        if not targets:
            targets = [fmap]

        for g in targets:
            folium.Marker(
                location=[lat, lon],
                popup=folium.Popup(popup_html, max_width=300, min_width=200),
                icon=folium.Icon(color=color, icon="bolt", prefix="fa")
            ).add_to(g)

    # 8. LayerControl
    folium.LayerControl(collapsed=False).add_to(fmap)
    return fmap

def map_refresher_loop():
    print(f"[MAP] Refresher started, interval = {MAP_REFRESH_INTERVAL}s")
    while True:
        time.sleep(MAP_REFRESH_INTERVAL)
        with latest_lock:
            records = list(latest_by_facility.values())
        try:
            fmap = make_map_from_latest(records)
            fmap.save(MAP_HTML_PATH)
            #clear_output(wait=True)
            print(f"[MAP] Updated {len(records)} facilities -> {MAP_HTML_PATH}")
        except Exception as e:
            print("[MAP] Failed to update map:", e)

# ----------------------------------------------------------------------
# 5. main
# ----------------------------------------------------------------------

def main(run_duration=300):
    print("=== COMP5339 Assignment 2 - Task 4 Dashboard ===")
    print(f"[INFO] Region filters (backend): {ACTIVE_REGION_FILTERS or 'ALL'}")
    print(f"[INFO] Fuel filters   (backend): {ACTIVE_FUEL_FILTERS or 'ALL'}")

    start_mqtt_subscriber()

    t_map = threading.Thread(target=map_refresher_loop, daemon=True)
    t_map.start()

    print(f"[INFO] Running for {run_duration} seconds. "
          f"Open '{MAP_HTML_PATH}' in your browser and refresh periodically.")
    try:
        time.sleep(run_duration)
    except KeyboardInterrupt:
        print("\n[MAIN] Stopped by user.")
    print("[MAIN] Dashboard finished.")

[INIT] Built facility_fuel_map with 420 facilities.
[INIT] Example mapping (first 5): [('0MREH', 'battery'), ('0MREHA2', 'battery'), ('0TARONGBESS', 'battery'), ('0WAMBOWF', 'wind'), ('ADP', 'solar_utility')]
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html


In [29]:
main(run_duration=30)  # 运行n分钟后自动退出

=== COMP5339 Assignment 2 - Task 4 Dashboard ===
[INFO] Region filters (backend): ALL
[INFO] Fuel filters   (backend): ALL


  client = mqtt.Client()


[MQTT] Subscriber loop started (background).
[MAP] Refresher started, interval = 15.0s
[INFO] Running for 30 seconds. Open 'dashboard_map.html' in your browser and refresh periodically.
[MQTT] Connected: 0
[MQTT] Subscribed to topic: nem/yjia0057/power_emissions
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAIN] Dashboard finished.
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 facilities -> dashboard_map.html
[MAP] Updated 0 faciliti