Referencias:

https://stats.swiss/?lc=en

https://www.bfs.admin.ch/bfs/en/home/statistics/construction-housing/buildings/energy-sector.gnpdetail.2025-0428.html

https://www.bfs.admin.ch/bfs/en/home/statistics/construction-housing/surveys/gws2009.html

https://www.bfs.admin.ch/bfs/en/home/statistics/construction-housing/buildings/energy-sector.assetdetail.36162970.html

https://github.com/sdmx-twg/sdmx-rest/blob/v1.5.0/v2_1/ws/rest/docs/4_1_introduction.md





# Proyecto de visualización

## Sistemas de calefacción (incluye bombas de calor)

**Objetivo**

Analizar la distribución de los sistemas de calefacción en edificios residenciales por cantón en Suiza, con especial atención a la adopción de bombas de calor, utilizando datos oficiales del BFS (GWS).
https://stats.swiss/vis?lc=en&df[ds]=disseminate&df[id]=DF_GWS_REG3&df[ag]=CH1.GWS&df[vs]=1.0.0&dq=A.8011%2B8012%2B8013%2B8014%2B8015%2B8016%2B8017%2B8018%2B8019%2B8020%2B8021%2B8022%2B8023%2B8024%2B_T.1021%2B1025%2B1030%2B1040.1%2B2%2B3%2B4%2B5%2B6%2B7%2B8%2B9.8100%2BZH%2BBE%2BLU%2BUR%2BSZ%2BOW%2BNW%2BGL%2BZG%2BFR%2BSO%2BBS%2BBL%2BSH%2BAR%2BAI%2BSG%2BGR%2BAG%2BTG%2BTI%2BVD%2BVS%2BNE%2BGE%2BJU&pd=2021%2C2024&to[TIME_PERIOD]=true&vw=tb

In [11]:
import pandas as pd

path = "data/heating_systems/raw/CH1.GWS_DF_GWS_REG3_2021_2024.csv"
df_raw = pd.read_csv(path)

df_raw.head()

Unnamed: 0,STRUCTURE,STRUCTURE_ID,STRUCTURE_NAME,ACTION,FREQ,Unnamed: 5,GBAUPS,Unnamed: 7,GKATS,Unnamed: 9,...,DIFF_LAST_UPDATE,Unnamed: 21,DIFF_EMBARGO_DATE,Unnamed: 23,DIFF_DB_STATE,Unnamed: 25,OBS_STATUS,Unnamed: 27,DIFF_REGION_REF,Unnamed: 29
0,DATAFLOW,CH1.GWS:DF_GWS_REG3(1.0.0),,I,A,,8020,,1040,,...,2025-09-22T08:30,,2025-09-22T08:30,,2025-09-22,,A,,POLG,
1,DATAFLOW,CH1.GWS:DF_GWS_REG3(1.0.0),,I,A,,8020,,1040,,...,2025-09-22T08:30,,2025-09-22T08:30,,2025-09-22,,A,,POLG,
2,DATAFLOW,CH1.GWS:DF_GWS_REG3(1.0.0),,I,A,,8020,,1040,,...,2025-09-22T08:30,,2025-09-22T08:30,,2025-09-22,,A,,POLG,
3,DATAFLOW,CH1.GWS:DF_GWS_REG3(1.0.0),,I,A,,8020,,1040,,...,2025-09-22T08:30,,2025-09-22T08:30,,2025-09-22,,A,,POLG,
4,DATAFLOW,CH1.GWS:DF_GWS_REG3(1.0.0),,I,A,,8022,,1040,,...,2025-09-22T08:30,,2025-09-22T08:30,,2025-09-22,,A,,POLG,


In [13]:
heating_dict = {
    1: "Heating oil",
    2: "Gas",
    3: "Electricity",
    4: "District heating",
    5: "Heat pump",
    6: "Biomass",
    7: "Solar thermal",
    8: "Other",
    9: "No heating system"
}

In [None]:
######################################
# Agregación y porcentajes
######################################

df = df_raw[["GEMEINDENAME", "TIME_PERIOD", "GWAERZH", "OBS_VALUE"]].copy()

df.rename(columns={
    "GEMEINDENAME": "canton",
    "TIME_PERIOD": "year",
    "GWAERZH": "heating_code",
    "OBS_VALUE": "n_buildings"
}, inplace=True)

df["heating_system"] = df["heating_code"].map(heating_dict)


In [24]:
totals = df.groupby(["canton", "year"])["n_buildings"].sum().reset_index()
df = df.merge(totals, on=["canton", "year"], suffixes=("", "_total"))

df["share"] = df["n_buildings"] / df["n_buildings_total"]

In [None]:
#####################################
# Export final
#####################################

out_path = "data/heating_systems/heating_systems_by_canton_year.csv"
df.to_csv(out_path, index=False)


## Consumo energético residencial (por uso)

## Clima (para HDD/CDD) — 

la vía realmente abierta

https://opendatadocs.meteoswiss.ch/

https://opendatadocs.meteoswiss.ch/a-data-groundbased/a1-automatic-weather-stations#data-download

In [1]:
import requests
import pandas as pd

COLL = "ch.meteoschweiz.ogd-smn"
BASE = "https://data.geo.admin.ch/api/stac/v1"

def stac_search(collection=COLL, limit=100, **kwargs):
    url = f"{BASE}/search"
    payload = {"collections": [collection], "limit": limit, **kwargs}
    r = requests.post(url, json=payload, timeout=60)
    r.raise_for_status()
    return r.json()

# 1) Trae algunos items (primeras páginas) para inspeccionar qué campos trae cada item
data = stac_search(limit=50)
items = data["features"]
items[0].keys(), items[0]["properties"].keys()

(dict_keys(['id', 'collection', 'type', 'stac_version', 'geometry', 'bbox', 'properties', 'stac_extensions', 'links', 'assets']),
 dict_keys(['datetime', 'title', 'created', 'updated']))

In [2]:
def find_daily_historical_item(items, station_code=None):
    matches = []
    for it in items:
        p = it["properties"]
        # Ajusta estas condiciones según lo que veas en properties
        ok_station = True if station_code is None else (p.get("station_code") == station_code or p.get("station") == station_code)
        ok_daily = "daily" in str(p).lower() or p.get("time_resolution") == "d"
        ok_hist = "historical" in str(p).lower() or p.get("update_interval") == "historical"
        if ok_station and ok_daily and ok_hist:
            matches.append(it)
    return matches

matches = find_daily_historical_item(items, station_code="SMA")  # ejemplo: cambia el código
len(matches)


0

In [3]:
def get_csv_url_from_item(item):
    # Suele haber un asset con key "data" o similar. Inspecciona item["assets"].keys()
    assets = item["assets"]
    # intenta detectar automáticamente un CSV
    for k, a in assets.items():
        href = a.get("href", "")
        if href.lower().endswith(".csv"):
            return href
    raise ValueError("No encontré un asset CSV en este item.")

csv_url = get_csv_url_from_item(matches[0])
df = pd.read_csv(csv_url)
df.head(), df.columns[:10]


IndexError: list index out of range

In [4]:
def get_csv_url_from_item(item):
    # Suele haber un asset con key "data" o similar. Inspecciona item["assets"].keys()
    assets = item["assets"]
    # intenta detectar automáticamente un CSV
    for k, a in assets.items():
        href = a.get("href", "")
        if href.lower().endswith(".csv"):
            return href
    raise ValueError("No encontré un asset CSV en este item.")

csv_url = get_csv_url_from_item(matches[0])
df = pd.read_csv(csv_url)
df.head(), df.columns[:10]


IndexError: list index out of range

In [None]:
import numpy as np

BASE_H = 18.0
BASE_C = 24.0

# Supongamos que ya tienes:
# df["date"] con fechas y df["Tmean"] con temperatura media diaria

df["date"] = pd.to_datetime(df["date"])
df["year"] = df["date"].dt.year

df["HDD"] = np.maximum(0, BASE_H - df["Tmean"])
df["CDD"] = np.maximum(0, df["Tmean"] - BASE_C)

annual = df.groupby("year")[["HDD", "CDD"]].sum().reset_index()
annual.head()


In [3]:
import os
import re
import time
import requests
import pandas as pd

# =========================
# 0) Rutas a tus metadatos
# =========================
META_PARAMS   = "data/data_info/ogd-smn_meta_parameters.csv"
META_STATIONS = "data/data_info/ogd-smn_meta_stations.csv"
META_INV      = "data/data_info/ogd-smn_meta_datainventory.csv"

OUT_DIR = "data/meteo/smn_daily_historical_by_station"
os.makedirs(OUT_DIR, exist_ok=True)

TEMP_PARAM = "tre200d0"

# =========================
# 1) Cargar metadatos
# =========================
params   = pd.read_csv(META_PARAMS, sep=";", encoding="cp1252")
stations = pd.read_csv(META_STATIONS, sep=";", encoding="cp1252")
inv      = pd.read_csv(META_INV, sep=";", encoding="cp1252")

if TEMP_PARAM not in set(params["parameter_shortname"]):
    raise ValueError(f"No encuentro {TEMP_PARAM} en ogd-smn_meta_parameters.csv")

stations_with_temp = (
    inv.loc[inv["parameter_shortname"].eq(TEMP_PARAM), "station_abbr"]
      .dropna()
      .unique()
      .tolist()
)

print(f"Estaciones con {TEMP_PARAM}: {len(stations_with_temp)}")


Estaciones con tre200d0: 150


In [7]:
import re

# Usa el mismo "items" que ya tienes cargado del STAC (los 158)
# Si no lo tienes en memoria, vuelve a ejecutar tu fetch_all_items.

def find_item_for_station(items, station_abbr: str):
    st = station_abbr.lower()
    for it in items:
        props = it.get("properties", {}) or {}

        # 1) buscar coincidencia exacta en valores de properties
        for v in props.values():
            if isinstance(v, str) and v.lower() == st:
                return it

        # 2) buscar el código dentro del id (a veces el id incluye la estación)
        it_id = (it.get("id") or "").lower()
        if st in it_id:
            return it

        # 3) buscar en cualquier string de properties (más permisivo)
        for v in props.values():
            if isinstance(v, str) and st in v.lower():
                return it

    return None

def print_item_debug(it):
    print("ITEM ID:", it.get("id"))
    props = it.get("properties", {}) or {}
    print("PROPERTIES KEYS:", sorted(list(props.keys())))

    # muestra posibles campos relevantes
    for k in sorted(props.keys()):
        v = props[k]
        if isinstance(v, (str, int, float)) and len(str(v)) < 80:
            if "station" in k.lower() or "abbr" in k.lower() or "site" in k.lower():
                print(f"  {k}: {v}")

    assets = it.get("assets", {}) or {}
    print("\nASSETS (CSV hrefs):")
    csv_hrefs = []
    for ak, av in assets.items():
        href = (av.get("href") or "")
        if href.lower().endswith(".csv"):
            csv_hrefs.append((ak, href))
    for ak, href in csv_hrefs:
        print(f"  - {ak}: {href}")
    print("\nTotal CSV assets:", len(csv_hrefs))

# Prueba con una estación (ABO)
st = "ABO"
it = find_item_for_station(items, st)

if it is None:
    print(f"No encontré item para {st} ni por properties ni por id.")
else:
    print_item_debug(it)


ITEM ID: abo
PROPERTIES KEYS: ['created', 'datetime', 'title', 'updated']

ASSETS (CSV hrefs):
  - ogd-smn_abo_d_historical.csv: https://data.geo.admin.ch/ch.meteoschweiz.ogd-smn/abo/ogd-smn_abo_d_historical.csv
  - ogd-smn_abo_d_recent.csv: https://data.geo.admin.ch/ch.meteoschweiz.ogd-smn/abo/ogd-smn_abo_d_recent.csv
  - ogd-smn_abo_h_historical_1980-1989.csv: https://data.geo.admin.ch/ch.meteoschweiz.ogd-smn/abo/ogd-smn_abo_h_historical_1980-1989.csv
  - ogd-smn_abo_h_historical_1990-1999.csv: https://data.geo.admin.ch/ch.meteoschweiz.ogd-smn/abo/ogd-smn_abo_h_historical_1990-1999.csv
  - ogd-smn_abo_h_historical_2000-2009.csv: https://data.geo.admin.ch/ch.meteoschweiz.ogd-smn/abo/ogd-smn_abo_h_historical_2000-2009.csv
  - ogd-smn_abo_h_historical_2010-2019.csv: https://data.geo.admin.ch/ch.meteoschweiz.ogd-smn/abo/ogd-smn_abo_h_historical_2010-2019.csv
  - ogd-smn_abo_h_historical_2020-2029.csv: https://data.geo.admin.ch/ch.meteoschweiz.ogd-smn/abo/ogd-smn_abo_h_historical_2020-202

In [8]:
import os
import pandas as pd

OUT_DIR = "data/meteo/smn_daily_by_station"
os.makedirs(OUT_DIR, exist_ok=True)

def pick_best_daily_csv(csv_hrefs):
    """
    Elige el mejor candidato daily.
    Regla:
      1) si hay alguno que contenga 'historical' y parezca daily -> ese
      2) si no, el que contenga '/d/' o 'daily'
      3) si no, el primer CSV (y luego validamos por columnas)
    """
    def score(h):
        hl = h.lower()
        s = 0
        if "historical" in hl: s += 50
        if re.search(r"/d/", hl): s += 20
        if "daily" in hl: s += 20
        if "recent" in hl: s -= 5
        if "now" in hl: s -= 10
        return s

    return sorted(csv_hrefs, key=lambda x: score(x[1]), reverse=True)[0]

def download_and_check(station_abbr, item):
    assets = item.get("assets", {}) or {}
    csv_hrefs = [(k, (v.get("href") or "")) for k, v in assets.items() if (v.get("href") or "").lower().endswith(".csv")]

    if not csv_hrefs:
        return station_abbr, "NO_CSV_ASSETS", None

    asset_key, href = pick_best_daily_csv(csv_hrefs)

    out_path = os.path.join(OUT_DIR, f"smn_{station_abbr}_{asset_key}.csv".replace("/", "_"))
    df = pd.read_csv(href, sep=";", encoding="cp1252")

    # Validación mínima para HDD/CDD
    ok_ref = "ReferenceTS" in df.columns
    ok_tmp = "tre200d0" in df.columns

    df.to_csv(out_path, index=False)
    return station_abbr, f"DOWNLOADED ok_ref={ok_ref} ok_tmp={ok_tmp}", out_path

# prueba con las 5 estaciones
test_stations = ['ABO', 'AIG', 'ALT', 'AND', 'ANT']
results = []

for st in test_stations:
    it = find_item_for_station(items, st)
    if it is None:
        results.append((st, "NO_ITEM", None))
        print(f"[{st}] ❌ NO_ITEM")
        continue

    st, status, path = download_and_check(st, it)
    results.append((st, status, path))
    print(f"[{st}] ✅ {status} -> {path}")

pd.DataFrame(results, columns=["station", "status", "file"])


[ABO] ✅ DOWNLOADED ok_ref=False ok_tmp=True -> data/meteo/smn_daily_by_station\smn_ABO_ogd-smn_abo_d_historical.csv.csv
[AIG] ✅ DOWNLOADED ok_ref=False ok_tmp=True -> data/meteo/smn_daily_by_station\smn_AIG_ogd-smn_aig_d_historical.csv.csv
[ALT] ✅ DOWNLOADED ok_ref=False ok_tmp=True -> data/meteo/smn_daily_by_station\smn_ALT_ogd-smn_alt_d_historical.csv.csv
[AND] ✅ DOWNLOADED ok_ref=False ok_tmp=True -> data/meteo/smn_daily_by_station\smn_AND_ogd-smn_and_d_historical.csv.csv
[ANT] ✅ DOWNLOADED ok_ref=False ok_tmp=True -> data/meteo/smn_daily_by_station\smn_ANT_ogd-smn_ant_d_historical.csv.csv


Unnamed: 0,station,status,file
0,ABO,DOWNLOADED ok_ref=False ok_tmp=True,data/meteo/smn_daily_by_station\smn_ABO_ogd-sm...
1,AIG,DOWNLOADED ok_ref=False ok_tmp=True,data/meteo/smn_daily_by_station\smn_AIG_ogd-sm...
2,ALT,DOWNLOADED ok_ref=False ok_tmp=True,data/meteo/smn_daily_by_station\smn_ALT_ogd-sm...
3,AND,DOWNLOADED ok_ref=False ok_tmp=True,data/meteo/smn_daily_by_station\smn_AND_ogd-sm...
4,ANT,DOWNLOADED ok_ref=False ok_tmp=True,data/meteo/smn_daily_by_station\smn_ANT_ogd-sm...


In [9]:
import os
import requests
import pandas as pd

STAC_BASE  = "https://data.geo.admin.ch/api/stac/v1"
COLLECTION = "ch.meteoschweiz.ogd-smn"

OUT_DIR = "data/meteo/smn_daily_historical_by_station"
os.makedirs(OUT_DIR, exist_ok=True)

N_TEST = 5
test_stations = stations_with_temp[:N_TEST]   # ['ABO','AIG',...]
print("Estaciones de prueba:", test_stations)

def stac_get_item(station_abbr: str):
    # item id = station en minúsculas (ej: "abo")
    item_id = station_abbr.lower()
    url = f"{STAC_BASE}/collections/{COLLECTION}/items/{item_id}"
    r = requests.get(url, timeout=60)
    r.raise_for_status()
    return r.json()

def download_daily_historical(station_abbr: str):
    item = stac_get_item(station_abbr)
    assets = item.get("assets", {}) or {}

    asset_key = f"ogd-smn_{station_abbr.lower()}_d_historical.csv"
    if asset_key not in assets:
        # fallback por si algún caso raro no sigue exactamente ese nombre
        raise KeyError(f"{station_abbr}: no existe asset {asset_key}. Assets disponibles: {list(assets.keys())[:10]}...")

    href = assets[asset_key]["href"]

    out_path = os.path.join(OUT_DIR, f"smn_{station_abbr}_d_historical.csv")
    if os.path.exists(out_path):
        return station_abbr, "SKIP", out_path

    df = pd.read_csv(href, sep=";", encoding="cp1252")
    df.to_csv(out_path, index=False)

    # sanity check
    ok_ref = "ReferenceTS" in df.columns
    ok_tmp = "tre200d0" in df.columns

    return station_abbr, f"OK (ReferenceTS={ok_ref}, tre200d0={ok_tmp})", out_path

results = []
for st in test_stations:
    try:
        results.append(download_daily_historical(st))
        print(results[-1][0], "->", results[-1][1])
    except Exception as e:
        results.append((st, f"ERROR: {e}", None))
        print(st, "-> ERROR", e)

results_df = pd.DataFrame(results, columns=["station_abbr", "status", "file"])
results_df

Estaciones de prueba: ['ABO', 'AIG', 'ALT', 'AND', 'ANT']
ABO -> OK (ReferenceTS=False, tre200d0=True)
AIG -> OK (ReferenceTS=False, tre200d0=True)
ALT -> OK (ReferenceTS=False, tre200d0=True)
AND -> OK (ReferenceTS=False, tre200d0=True)
ANT -> OK (ReferenceTS=False, tre200d0=True)


Unnamed: 0,station_abbr,status,file
0,ABO,"OK (ReferenceTS=False, tre200d0=True)",data/meteo/smn_daily_historical_by_station\smn...
1,AIG,"OK (ReferenceTS=False, tre200d0=True)",data/meteo/smn_daily_historical_by_station\smn...
2,ALT,"OK (ReferenceTS=False, tre200d0=True)",data/meteo/smn_daily_historical_by_station\smn...
3,AND,"OK (ReferenceTS=False, tre200d0=True)",data/meteo/smn_daily_historical_by_station\smn...
4,ANT,"OK (ReferenceTS=False, tre200d0=True)",data/meteo/smn_daily_historical_by_station\smn...


In [10]:
# descargar 150

import os
import time
import requests
import pandas as pd

STAC_BASE  = "https://data.geo.admin.ch/api/stac/v1"
COLLECTION = "ch.meteoschweiz.ogd-smn"

OUT_DIR = "data/meteo/smn_daily_historical_by_station"
os.makedirs(OUT_DIR, exist_ok=True)

# stations_with_temp ya lo tienes de antes (150 estaciones con tre200d0)
print("Total estaciones a descargar:", len(stations_with_temp))

session = requests.Session()

def stac_get_item(station_abbr: str):
    item_id = station_abbr.lower()
    url = f"{STAC_BASE}/collections/{COLLECTION}/items/{item_id}"
    r = session.get(url, timeout=60)
    r.raise_for_status()
    return r.json()

def download_daily_historical(station_abbr: str):
    out_path = os.path.join(OUT_DIR, f"smn_{station_abbr}_d_historical.csv")
    if os.path.exists(out_path) and os.path.getsize(out_path) > 0:
        return station_abbr, "SKIP", out_path

    item = stac_get_item(station_abbr)
    assets = item.get("assets", {}) or {}

    asset_key = f"ogd-smn_{station_abbr.lower()}_d_historical.csv"
    if asset_key not in assets:
        return station_abbr, f"FAIL: missing asset {asset_key}", None

    href = assets[asset_key].get("href")
    if not href:
        return station_abbr, "FAIL: missing href", None

    # Descargar CSV (MeteoSwiss: sep=';' y encoding cp1252)
    df = pd.read_csv(href, sep=";", encoding="cp1252")
    # sanity check mínimo
    if "ReferenceTS" not in df.columns or "tre200d0" not in df.columns:
        # aun así guardamos para inspección, pero marcamos warning
        df.to_csv(out_path, index=False)
        return station_abbr, "WARN: missing ReferenceTS/tre200d0", out_path

    df.to_csv(out_path, index=False)
    return station_abbr, "OK", out_path

results = []
ok = skip = warn = fail = 0

for i, st in enumerate(stations_with_temp, start=1):
    try:
        st, status, path = download_daily_historical(st)
        results.append((st, status, path))

        if status == "OK":
            ok += 1
        elif status == "SKIP":
            skip += 1
        elif status.startswith("WARN"):
            warn += 1
        else:
            fail += 1

        if i % 10 == 0 or i == len(stations_with_temp):
            print(f"[{i}/{len(stations_with_temp)}] OK={ok} SKIP={skip} WARN={warn} FAIL={fail}")

        time.sleep(0.1)  # pausa suave
    except Exception as e:
        fail += 1
        results.append((st, f"ERROR: {e}", None))
        print(f"[{i}/{len(stations_with_temp)}] {st} ERROR: {e}")
        time.sleep(0.2)

results_df = pd.DataFrame(results, columns=["station_abbr", "status", "file"])
results_df.to_csv(os.path.join(OUT_DIR, "_download_log.csv"), index=False)

print("\nResumen final:")
print("OK  :", ok)
print("SKIP:", skip)
print("WARN:", warn)
print("FAIL:", fail)
print("Log:", os.path.join(OUT_DIR, "_download_log.csv"))

results_df.head(15)


Total estaciones a descargar: 150
[10/150] OK=0 SKIP=5 WARN=5 FAIL=0
[20/150] OK=0 SKIP=5 WARN=15 FAIL=0
[30/150] OK=0 SKIP=5 WARN=25 FAIL=0
[40/150] OK=0 SKIP=5 WARN=35 FAIL=0
[50/150] OK=0 SKIP=5 WARN=45 FAIL=0
[60/150] OK=0 SKIP=5 WARN=55 FAIL=0
[70/150] OK=0 SKIP=5 WARN=65 FAIL=0
[80/150] OK=0 SKIP=5 WARN=75 FAIL=0
[90/150] OK=0 SKIP=5 WARN=85 FAIL=0
[100/150] OK=0 SKIP=5 WARN=95 FAIL=0
[110/150] OK=0 SKIP=5 WARN=105 FAIL=0
[120/150] OK=0 SKIP=5 WARN=115 FAIL=0
[130/150] OK=0 SKIP=5 WARN=125 FAIL=0
[140/150] OK=0 SKIP=5 WARN=135 FAIL=0
[150/150] OK=0 SKIP=5 WARN=145 FAIL=0

Resumen final:
OK  : 0
SKIP: 5
WARN: 145
FAIL: 0
Log: data/meteo/smn_daily_historical_by_station\_download_log.csv


Unnamed: 0,station_abbr,status,file
0,ABO,SKIP,data/meteo/smn_daily_historical_by_station\smn...
1,AIG,SKIP,data/meteo/smn_daily_historical_by_station\smn...
2,ALT,SKIP,data/meteo/smn_daily_historical_by_station\smn...
3,AND,SKIP,data/meteo/smn_daily_historical_by_station\smn...
4,ANT,SKIP,data/meteo/smn_daily_historical_by_station\smn...
5,ARH,WARN: missing ReferenceTS/tre200d0,data/meteo/smn_daily_historical_by_station\smn...
6,ARO,WARN: missing ReferenceTS/tre200d0,data/meteo/smn_daily_historical_by_station\smn...
7,ATT,WARN: missing ReferenceTS/tre200d0,data/meteo/smn_daily_historical_by_station\smn...
8,BAS,WARN: missing ReferenceTS/tre200d0,data/meteo/smn_daily_historical_by_station\smn...
9,BEH,WARN: missing ReferenceTS/tre200d0,data/meteo/smn_daily_historical_by_station\smn...


In [11]:
import os
import time
import requests
import pandas as pd

STAC_BASE  = "https://data.geo.admin.ch/api/stac/v1"
COLLECTION = "ch.meteoschweiz.ogd-smn"

OUT_DIR = "data/meteo/smn_daily_historical_by_station"
os.makedirs(OUT_DIR, exist_ok=True)

session = requests.Session()

def stac_get_item(station_abbr: str):
    item_id = station_abbr.lower()
    url = f"{STAC_BASE}/collections/{COLLECTION}/items/{item_id}"
    r = session.get(url, timeout=60)
    r.raise_for_status()
    return r.json()

def download_raw_csv(url: str, out_path: str):
    with session.get(url, stream=True, timeout=120) as r:
        r.raise_for_status()
        with open(out_path, "wb") as f:
            for chunk in r.iter_content(chunk_size=1024 * 1024):
                if chunk:
                    f.write(chunk)

def check_columns_quick(path: str):
    """Detecta separador probando ; y , y devuelve (sep, cols)."""
    for sep in [";", ","]:
        try:
            df = pd.read_csv(path, sep=sep, encoding="cp1252", nrows=5)
            cols = [c.lstrip("\ufeff").strip() for c in df.columns]
            if "ReferenceTS" in cols:
                return sep, cols
        except Exception:
            pass
    return None, []

results = []
ok = skip = fail = 0

for i, st in enumerate(stations_with_temp, start=1):
    out_path = os.path.join(OUT_DIR, f"smn_{st}_d_historical.csv")

    if os.path.exists(out_path) and os.path.getsize(out_path) > 0:
        sep, cols = check_columns_quick(out_path)
        results.append((st, "SKIP", out_path, sep, ("tre200d0" in cols)))
        skip += 1
        continue

    try:
        item = stac_get_item(st)
        asset_key = f"ogd-smn_{st.lower()}_d_historical.csv"
        href = item["assets"][asset_key]["href"]

        download_raw_csv(href, out_path)

        sep, cols = check_columns_quick(out_path)
        has_temp = ("tre200d0" in cols)
        results.append((st, "OK", out_path, sep, has_temp))
        ok += 1

        if i % 10 == 0 or i == len(stations_with_temp):
            print(f"[{i}/{len(stations_with_temp)}] OK={ok} SKIP={skip} FAIL={fail}")

        time.sleep(0.1)

    except Exception as e:
        results.append((st, f"FAIL: {e}", None, None, False))
        fail += 1
        print(f"[{i}/{len(stations_with_temp)}] {st} FAIL: {e}")
        time.sleep(0.2)

log_df = pd.DataFrame(results, columns=["station_abbr", "status", "file", "detected_sep", "has_tre200d0"])
log_path = os.path.join(OUT_DIR, "_download_log_v2.csv")
log_df.to_csv(log_path, index=False)

print("\nResumen final:")
print("OK  :", ok)
print("SKIP:", skip)
print("FAIL:", fail)
print("Log:", log_path)
log_df.head(15)



Resumen final:
OK  : 0
SKIP: 150
FAIL: 0
Log: data/meteo/smn_daily_historical_by_station\_download_log_v2.csv


Unnamed: 0,station_abbr,status,file,detected_sep,has_tre200d0
0,ABO,SKIP,data/meteo/smn_daily_historical_by_station\smn...,,False
1,AIG,SKIP,data/meteo/smn_daily_historical_by_station\smn...,,False
2,ALT,SKIP,data/meteo/smn_daily_historical_by_station\smn...,,False
3,AND,SKIP,data/meteo/smn_daily_historical_by_station\smn...,,False
4,ANT,SKIP,data/meteo/smn_daily_historical_by_station\smn...,,False
5,ARH,SKIP,data/meteo/smn_daily_historical_by_station\smn...,,False
6,ARO,SKIP,data/meteo/smn_daily_historical_by_station\smn...,,False
7,ATT,SKIP,data/meteo/smn_daily_historical_by_station\smn...,,False
8,BAS,SKIP,data/meteo/smn_daily_historical_by_station\smn...,,False
9,BEH,SKIP,data/meteo/smn_daily_historical_by_station\smn...,,False


In [None]:
# Paso 1
path = "data/meteo/smn_daily_historical_by_station/smn_ABO_d_historical.csv"

with open(path, "rb") as f:
    raw = f.read(4000)

print("Primeros 200 bytes (repr):")
print(repr(raw[:200]))

print("\nPrimeras líneas decodificadas (cp1252, ignore):")
text = raw.decode("cp1252", errors="ignore")
for i, line in enumerate(text.splitlines()[:8], start=1):
    print(f"{i:02d}: {line}")


Primeros 200 bytes (repr):
b'station_abbr,reference_timestamp,tre200d0,tre200dx,tre200dn,tre005d0,tre005dx,tre005dn,ure200d0,pva200d0,prestad0,pp0qffd0,ppz850d0,ppz700d0,pp0qnhd0,fkl010d0,fkl010d1,fu3010d0,fu3010d1,fkl010d3,fu301'

Primeras líneas decodificadas (cp1252, ignore):
01: station_abbr,reference_timestamp,tre200d0,tre200dx,tre200dn,tre005d0,tre005dx,tre005dn,ure200d0,pva200d0,prestad0,pp0qffd0,ppz850d0,ppz700d0,pp0qnhd0,fkl010d0,fkl010d1,fu3010d0,fu3010d1,fkl010d3,fu3010d3,wcc006d0,rre150d0,rka150d0,htoautd0,gre000d0,oli000d0,olo000d0,osr000d0,ods000d0,sre000d0,sremaxdv,erefaod0,xcd000d0,dkl010d0,xno000d0,xno012d0,rreetsd0,tso005d0,tso010d0,tso020d0
02: ABO,01.01.1901 00:00,,,,,,,,,,,,,,,,,,,,,0.6,,,,,,,,,,,,,,,,,,
03: ABO,02.01.1901 00:00,,,,,,,,,,,,,,,,,,,,,1.8,,,,,,,,,,,,,,,,,,
04: ABO,03.01.1901 00:00,,,,,,,,,,,,,,,,,,,,,0.2,,,,,,,,,,,,,,,,,,
05: ABO,04.01.1901 00:00,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,
06: ABO,05.01.1901 00:00,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,

In [None]:
# Paso 2
import pandas as pd

path = "data/meteo/smn_daily_historical_by_station/smn_ABO_d_historical.csv"

# Intento 1 (lo normal)
try:
    df = pd.read_csv(path, sep=";", encoding="cp1252", nrows=5)
    cols = [c.lstrip("\ufeff").strip() for c in df.columns]
    print("Intento 1 OK. Columnas:", cols[:20])
    print("ReferenceTS:", "ReferenceTS" in cols)
    print("tre200d0:", "tre200d0" in cols)
except Exception as e:
    print("Intento 1 FALLÓ:", e)

# Intento 2 (más tolerante)
try:
    df = pd.read_csv(path, sep=";", encoding="cp1252", engine="python", nrows=5)
    cols = [c.lstrip("\ufeff").strip() for c in df.columns]
    print("\nIntento 2 OK. Columnas:", cols[:20])
    print("ReferenceTS:", "ReferenceTS" in cols)
    print("tre200d0:", "tre200d0" in cols)
except Exception as e:
    print("\nIntento 2 FALLÓ:", e)

# Intento 3 (autodetectar separador)
try:
    df = pd.read_csv(path, sep=None, encoding="cp1252", engine="python", nrows=5)
    cols = [c.lstrip("\ufeff").strip() for c in df.columns]
    print("\nIntento 3 OK (sep autodetect). Columnas:", cols[:20])
    print("ReferenceTS:", "ReferenceTS" in cols)
    print("tre200d0:", "tre200d0" in cols)
except Exception as e:
    print("\nIntento 3 FALLÓ:", e)


Intento 1 OK. Columnas: ['station_abbr,reference_timestamp,tre200d0,tre200dx,tre200dn,tre005d0,tre005dx,tre005dn,ure200d0,pva200d0,prestad0,pp0qffd0,ppz850d0,ppz700d0,pp0qnhd0,fkl010d0,fkl010d1,fu3010d0,fu3010d1,fkl010d3,fu3010d3,wcc006d0,rre150d0,rka150d0,htoautd0,gre000d0,oli000d0,olo000d0,osr000d0,ods000d0,sre000d0,sremaxdv,erefaod0,xcd000d0,dkl010d0,xno000d0,xno012d0,rreetsd0,tso005d0,tso010d0,tso020d0']
ReferenceTS: False
tre200d0: False

Intento 2 OK. Columnas: ['station_abbr,reference_timestamp,tre200d0,tre200dx,tre200dn,tre005d0,tre005dx,tre005dn,ure200d0,pva200d0,prestad0,pp0qffd0,ppz850d0,ppz700d0,pp0qnhd0,fkl010d0,fkl010d1,fu3010d0,fu3010d1,fkl010d3,fu3010d3,wcc006d0,rre150d0,rka150d0,htoautd0,gre000d0,oli000d0,olo000d0,osr000d0,ods000d0,sre000d0,sremaxdv,erefaod0,xcd000d0,dkl010d0,xno000d0,xno012d0,rreetsd0,tso005d0,tso010d0,tso020d0']
ReferenceTS: False
tre200d0: False

Intento 3 OK (sep autodetect). Columnas: ['station_abbr', 'reference_timestamp', 'tre200d0', 'tre200dx',

In [14]:
import os
import pandas as pd
from glob import glob

IN_DIR = "data/meteo/smn_daily_historical_by_station"
paths = sorted(glob(os.path.join(IN_DIR, "smn_*_d_historical.csv")))

rows = []
for path in paths:
    st = os.path.basename(path).split("_")[1]  # smn_ABO_d_historical.csv -> ABO
    try:
        df = pd.read_csv(path, sep=",", encoding="cp1252", nrows=3)
        cols = [c.strip().lower() for c in df.columns]
        has_ts = "reference_timestamp" in cols
        has_temp = "tre200d0" in cols
        rows.append((st, "OK", path, ",", has_ts, has_temp))
    except Exception as e:
        rows.append((st, f"FAIL: {e}", path, None, False, False))

log_fixed = pd.DataFrame(rows, columns=["station_abbr","status","file","sep","has_reference_timestamp","has_tre200d0"])
log_path = os.path.join(IN_DIR, "_download_log_fixed.csv")
log_fixed.to_csv(log_path, index=False)

print("Guardado:", log_path)
print("OK:", (log_fixed["status"]=="OK").sum(), "FAIL:", (log_fixed["status"]!="OK").sum())
log_fixed.head(10)


Guardado: data/meteo/smn_daily_historical_by_station\_download_log_fixed.csv
OK: 150 FAIL: 0


Unnamed: 0,station_abbr,status,file,sep,has_reference_timestamp,has_tre200d0
0,ABO,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
1,AIG,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
2,ALT,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
3,AND,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
4,ANT,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
5,ARH,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
6,ARO,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
7,ATT,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
8,BAS,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
9,BEH,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True


In [15]:
import os
import pandas as pd
from glob import glob

IN_DIR = "data/meteo/smn_daily_historical_by_station"
paths = sorted(glob(os.path.join(IN_DIR, "smn_*_d_historical.csv")))

rows = []
for path in paths:
    st = os.path.basename(path).split("_")[1]  # smn_ABO_d_historical.csv -> ABO
    try:
        df = pd.read_csv(path, sep=",", encoding="cp1252", nrows=3)
        cols = [c.strip().lower() for c in df.columns]
        has_ts = "reference_timestamp" in cols
        has_temp = "tre200d0" in cols
        rows.append((st, "OK", path, ",", has_ts, has_temp))
    except Exception as e:
        rows.append((st, f"FAIL: {e}", path, None, False, False))

log_fixed = pd.DataFrame(rows, columns=["station_abbr","status","file","sep","has_reference_timestamp","has_tre200d0"])
log_path = os.path.join(IN_DIR, "_download_log_fixed.csv")
log_fixed.to_csv(log_path, index=False)

print("Guardado:", log_path)
print("OK:", (log_fixed["status"]=="OK").sum(), "FAIL:", (log_fixed["status"]!="OK").sum())
log_fixed.head(10)


Guardado: data/meteo/smn_daily_historical_by_station\_download_log_fixed.csv
OK: 150 FAIL: 0


Unnamed: 0,station_abbr,status,file,sep,has_reference_timestamp,has_tre200d0
0,ABO,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
1,AIG,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
2,ALT,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
3,AND,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
4,ANT,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
5,ARH,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
6,ARO,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
7,ATT,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
8,BAS,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True
9,BEH,OK,data/meteo/smn_daily_historical_by_station\smn...,",",True,True


In [16]:
import os
import pandas as pd
import numpy as np
from glob import glob

IN_DIR = "data/meteo/smn_daily_historical_by_station"
paths = sorted(glob(os.path.join(IN_DIR, "smn_*_d_historical.csv")))

BASE_HDD = 18.0
BASE_CDD = 24.0
YEAR_START = 2000
YEAR_END = 2024  # ajusta si tu histórico llega a 2025

out_rows = []

for path in paths:
    st = os.path.basename(path).split("_")[1]

    df = pd.read_csv(path, sep=",", encoding="cp1252", usecols=["station_abbr","reference_timestamp","tre200d0"])
    df["reference_timestamp"] = pd.to_datetime(df["reference_timestamp"], dayfirst=True, errors="coerce")
    df = df.dropna(subset=["reference_timestamp"])

    df["year"] = df["reference_timestamp"].dt.year
    df = df[(df["year"] >= YEAR_START) & (df["year"] <= YEAR_END)].copy()

    # tre200d0 puede venir con NaN si no hay dato
    t = pd.to_numeric(df["tre200d0"], errors="coerce")
    df = df.dropna(subset=["tre200d0"])
    t = pd.to_numeric(df["tre200d0"], errors="coerce")

    df["HDD"] = np.maximum(0, BASE_HDD - t)
    df["CDD"] = np.maximum(0, t - BASE_CDD)

    annual = df.groupby(["station_abbr","year"], as_index=False)[["HDD","CDD"]].sum()
    out_rows.append(annual)

hdd_station_year = pd.concat(out_rows, ignore_index=True)

OUT_PATH = "data/meteo/smn_hdd_cdd_station_year_2000_2024.csv"
hdd_station_year.to_csv(OUT_PATH, index=False)
print("Exportado:", OUT_PATH)
hdd_station_year.head()


Exportado: data/meteo/smn_hdd_cdd_station_year_2000_2024.csv


Unnamed: 0,station_abbr,year,HDD,CDD
0,ABO,2000,4167.5,0.0
1,ABO,2001,4385.6,0.0
2,ABO,2002,4092.1,0.0
3,ABO,2003,4097.2,0.0
4,ABO,2004,4437.0,0.0


In [None]:
# agregar HDD/CDD por cantón y año
import pandas as pd

# =========================
# 1) Cargar datasets
# =========================
hdd_station = pd.read_csv(
    "data/meteo/smn_hdd_cdd_station_year_2000_2024.csv"
)

stations_meta = pd.read_csv(
    "data/data_info/ogd-smn_meta_stations.csv",
    sep=";",
    encoding="cp1252"
)

# =========================
# 2) Unir estación -> cantón
# =========================
df = hdd_station.merge(
    stations_meta[["station_abbr", "station_canton"]],
    on="station_abbr",
    how="left"
)

# Comprobación rápida
assert df["station_canton"].isna().sum() == 0, "Hay estaciones sin cantón"

# =========================
# 3) Agregar por cantón y año (MEDIA)
# =========================
hdd_canton_year = (
    df
    .groupby(["station_canton", "year"], as_index=False)
    .agg(
        HDD_mean=("HDD", "mean"),
        CDD_mean=("CDD", "mean"),
        n_stations=("station_abbr", "nunique")
    )
)

hdd_canton_year.head()


Unnamed: 0,station_canton,year,HDD_mean,CDD_mean,n_stations
0,AG,2000,2835.9,0.375,4
1,AG,2001,3053.575,0.15,4
2,AG,2002,2824.35,5.2,4
3,AG,2003,3099.025,35.425,4
4,AG,2004,3092.95,0.0,4


In [None]:
# Export final para Flourish
OUT_PATH = "data/meteo/smn_hdd_cdd_canton_year_2000_2024.csv"
hdd_canton_year.to_csv(OUT_PATH, index=False)
print("Exportado:", OUT_PATH)

Exportado: data/meteo/smn_hdd_cdd_canton_year_2000_2024.csv
