# New Metadata parser
Instead of looping though each individually, build the metadata dict all at once.

In [1]:
import synoptic
from synoptic.json_parsers import (
    parse_raw_variable_column,
    station_metadata_to_dataframe,
)

import polars as pl

In [2]:
df = synoptic.Latest(
    # vars="air_temp,wind_speed,ozone_concentration",
    state="ut",
    # network=1,
    complete=True,
    qc=True,
    qc_checks="all",
).df()
df

🚚💨 Speedy delivery from Synoptic's [32mlatest[0m service.
📦 Received data from 1,449 stations.
col='stid', schema=String


stid,variable,sensor_index,is_derived,value,date_time,qc_passed,qc_flags,value_string,units,id,name,elevation,latitude,longitude,mnet_id,state,timezone,elev_dem,nwszone,nwsfirezone,gacc,shortname,sgid,county,country,wims_id,cwa,period_of_record_start,period_of_record_end,providers,qc_flagged,is_restricted,is_active
str,str,u32,bool,f64,"datetime[μs, UTC]",bool,list[i64],str,str,u32,str,f64,f64,f64,u32,str,str,f64,str,str,str,str,str,str,str,str,str,"datetime[μs, UTC]","datetime[μs, UTC]",list[struct[2]],bool,bool,bool
"""WBB""","""pressure""",1,false,85892.0,2024-11-20 05:29:00 UTC,true,,,"""Pascals""",1,"""U of U William Browning Buildi…",4806.0,40.76623,-111.84755,153,"""UT""","""America/Denver""",4727.7,"""UT105""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Salt Lake""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 04:50:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""U-ATAQ"",""http://air.utah.edu/""}]",false,false,true
"""GNI""","""pressure""",1,false,87058.0,2024-10-31 07:40:00 UTC,true,,,"""Pascals""",34,"""Gunnison Island""",4242.0,41.33216,-112.85432,153,"""UT""","""America/Denver""",4202.8,"""UT101""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Box Elder""","""US""",,"""SLC""",1998-05-22 00:00:00 UTC,2024-11-20 04:45:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""Utah Department of Natural Resources"",""http://www.dnr.utah.gov""}, {""SLC WFO/NWS Western Region"",""http://www.wrh.noaa.gov/slc""}]",false,false,true
"""HATUT""","""pressure""",1,false,87580.0,2024-11-20 05:30:00 UTC,true,,,"""Pascals""",35,"""Hat Island""",4242.0,41.07073,-112.58621,153,"""UT""","""America/Denver""",4245.4,"""UT101""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Box Elder""","""US""",,"""SLC""",1998-09-02 00:00:00 UTC,2024-11-20 04:45:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""Utah Department of Natural Resources"",""http://www.dnr.utah.gov""}, {""SLC WFO/NWS Western Region"",""http://www.wrh.noaa.gov/slc""}]",false,false,true
"""LMS""","""pressure""",1,false,87841.0,2024-11-20 05:15:00 UTC,true,,,"""Pascals""",36,"""Locomotive Springs""",4242.0,41.701,-112.86181,153,"""UT""","""America/Denver""",4215.9,"""UT101""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Box Elder""","""US""",,"""SLC""",1999-07-02 00:00:00 UTC,2024-11-20 04:45:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""SLC WFO/NWS Western Region"",""http://www.wrh.noaa.gov/slc""}]",false,false,true
"""LMR""","""pressure""",1,false,84737.0,2024-11-20 05:15:00 UTC,true,,,"""Pascals""",39,"""Lakeside Mountain""",5039.0,41.06084,-112.89173,153,"""UT""","""America/Denver""",5150.9,"""UT101""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Box Elder""","""US""",,"""SLC""",1999-12-16 00:00:00 UTC,2024-11-20 04:45:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""SLC WFO/NWS Western Region"",""http://www.wrh.noaa.gov/slc""}]",false,false,true
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""K41U""","""cloud_layer_3""",1,true,2590.83,2024-11-19 07:35:00 UTC,,,"""overcast""","""Meters""",46328,"""Manti-Ephraim Airport""",5500.0,39.33133,-111.61273,1,"""UT""","""America/Denver""",5505.2,"""UT118""","""SLC492""","""GBCC""","""ASOS/AWOS""","""GB27""","""Sanpete""","""US""",,"""SLC""",2015-06-03 18:59:00 UTC,2024-11-20 04:35:00 UTC,"[{""National Weather Service"",""http://www.weather.gov""}]",true,false,true
"""UUSYR""","""cloud_layer_3""",1,true,2225.07,2024-11-18 15:40:00 UTC,,,"""N/A""","""Meters""",62231,"""Syracuse""",4217.0,41.08847,-112.1188,153,"""UT""","""America/Denver""",4215.9,"""UT104""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Davis""","""US""",,"""SLC""",2017-03-17 19:46:00 UTC,2024-11-20 04:50:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}]",false,false,true
"""KSPK""","""cloud_layer_3""",1,true,2286.03,2024-11-19 15:35:00 UTC,,,"""broken""","""Meters""",160898,"""Spanish Fork Municipal Airport""",4530.0,40.145,-111.6677,1,"""UT""","""America/Denver""",4534.1,"""UT106""","""SLC478""","""GBCC""","""ASOS/AWOS""","""GB25""","""Utah""","""US""",,"""SLC""",2021-01-19 19:28:00 UTC,2024-11-20 04:35:00 UTC,"[{""National Weather Service"",""http://www.weather.gov""}]",false,false,true
"""KU64""","""cloud_layer_3""",1,true,2590.83,2024-11-19 11:55:00 UTC,,,"""broken""","""Meters""",207121,"""Monticello Airport""",6970.0,37.93243,-109.34122,1,"""UT""","""America/Denver""",,"""UT028""","""GJT491""","""GBCC""","""ASOS/AWOS""","""GB32""","""San Juan""","""US""",,"""GJT""",2023-04-06 13:59:00 UTC,2024-11-20 04:35:00 UTC,"[{""National Weather Service"",""http://www.weather.gov""}]",false,false,true


In [2]:
df = synoptic.TimeSeries(
    # vars="air_temp,wind_speed,ozone_concentration",
    state="ut",
    recent="30m",
    # network=1,
    complete=True,
    qc=True,
    qc_checks="all",
).df()
df


🚚💨 Speedy delivery from Synoptic's [32mtimeseries[0m service.
📦 Received data from 849 stations.


stid,date_time,variable,sensor_index,is_derived,value,value_sting,units,id,name,elevation,latitude,longitude,mnet_id,state,timezone,elev_dem,nwszone,nwsfirezone,gacc,shortname,sgid,county,country,wims_id,cwa,period_of_record_start,period_of_record_end,providers,qc_flagged,is_restricted,is_active
str,"datetime[μs, UTC]",str,u32,bool,f64,str,str,u32,str,f64,f64,f64,u32,str,str,f64,str,str,str,str,str,str,str,str,str,"datetime[μs, UTC]","datetime[μs, UTC]",list[struct[2]],bool,bool,bool
"""WBB""",2024-11-20 06:05:00 UTC,"""ozone_concentration""",1,false,36.85,,"""ppb""",1,"""U of U William Browning Buildi…",4806.0,40.76623,-111.84755,153,"""UT""","""America/Denver""",4727.7,"""UT105""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Salt Lake""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 05:55:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""U-ATAQ"",""http://air.utah.edu/""}]",false,false,true
"""WBB""",2024-11-20 06:06:00 UTC,"""ozone_concentration""",1,false,,,"""ppb""",1,"""U of U William Browning Buildi…",4806.0,40.76623,-111.84755,153,"""UT""","""America/Denver""",4727.7,"""UT105""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Salt Lake""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 05:55:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""U-ATAQ"",""http://air.utah.edu/""}]",false,false,true
"""WBB""",2024-11-20 06:07:00 UTC,"""ozone_concentration""",1,false,,,"""ppb""",1,"""U of U William Browning Buildi…",4806.0,40.76623,-111.84755,153,"""UT""","""America/Denver""",4727.7,"""UT105""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Salt Lake""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 05:55:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""U-ATAQ"",""http://air.utah.edu/""}]",false,false,true
"""WBB""",2024-11-20 06:08:00 UTC,"""ozone_concentration""",1,false,,,"""ppb""",1,"""U of U William Browning Buildi…",4806.0,40.76623,-111.84755,153,"""UT""","""America/Denver""",4727.7,"""UT105""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Salt Lake""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 05:55:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""U-ATAQ"",""http://air.utah.edu/""}]",false,false,true
"""WBB""",2024-11-20 06:09:00 UTC,"""ozone_concentration""",1,false,,,"""ppb""",1,"""U of U William Browning Buildi…",4806.0,40.76623,-111.84755,153,"""UT""","""America/Denver""",4727.7,"""UT105""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Salt Lake""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 05:55:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""U-ATAQ"",""http://air.utah.edu/""}]",false,false,true
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""G5565""",2024-11-20 06:08:00 UTC,"""weather_condition""",1,true,,,"""weather_condition""",252735,"""GW5565 SANDY""",4383.0,40.6,-111.9,65,"""UT""","""America/Denver""",,"""UT105""","""SLC478""","""GBCC""","""APRSWXNET/CWOP""","""GB25""","""Salt Lake""","""US""",,"""SLC""",2024-11-19 19:43:00 UTC,2024-11-20 05:53:00 UTC,"[{""APRSWXNET/Citizen Weather Observer Program"",""http://www.findu.com/citizenweather""}]",false,false,
"""G5565""",2024-11-20 06:13:00 UTC,"""weather_condition""",1,true,,,"""weather_condition""",252735,"""GW5565 SANDY""",4383.0,40.6,-111.9,65,"""UT""","""America/Denver""",,"""UT105""","""SLC478""","""GBCC""","""APRSWXNET/CWOP""","""GB25""","""Salt Lake""","""US""",,"""SLC""",2024-11-19 19:43:00 UTC,2024-11-20 05:53:00 UTC,"[{""APRSWXNET/Citizen Weather Observer Program"",""http://www.findu.com/citizenweather""}]",false,false,
"""G5565""",2024-11-20 06:18:00 UTC,"""weather_condition""",1,true,,,"""weather_condition""",252735,"""GW5565 SANDY""",4383.0,40.6,-111.9,65,"""UT""","""America/Denver""",,"""UT105""","""SLC478""","""GBCC""","""APRSWXNET/CWOP""","""GB25""","""Salt Lake""","""US""",,"""SLC""",2024-11-19 19:43:00 UTC,2024-11-20 05:53:00 UTC,"[{""APRSWXNET/Citizen Weather Observer Program"",""http://www.findu.com/citizenweather""}]",false,false,
"""G5565""",2024-11-20 06:23:00 UTC,"""weather_condition""",1,true,,,"""weather_condition""",252735,"""GW5565 SANDY""",4383.0,40.6,-111.9,65,"""UT""","""America/Denver""",,"""UT105""","""SLC478""","""GBCC""","""APRSWXNET/CWOP""","""GB25""","""Salt Lake""","""US""",,"""SLC""",2024-11-19 19:43:00 UTC,2024-11-20 05:53:00 UTC,"[{""APRSWXNET/Citizen Weather Observer Program"",""http://www.findu.com/citizenweather""}]",false,false,


In [3]:
S = synoptic.SynopticAPI(
    "timeseries",
    # vars="air_temp,wind_speed,ozone_concentration",
    # stid="wbb,ukbkb,kslc",
    recent="30m",
    state="ut",
    # network=1,
    complete=True,
    qc=True,
    qc_checks="all",
)
S

🚚💨 Speedy delivery from Synoptic's [32mtimeseries[0m service.
📦 Received data from 930 stations.


╭─ Synoptic timeseries service ─────
│ Stations : 930
│ QC Checks: 86
╰──────────────────────────────────────╯

In [None]:

# TODO: Need to do something with the list of qc data
# TODO: Need to implement parsing cloud_layer

observations = []
qc = []
latency = []
sensor_variables = []

for s in S.STATION:
    observations.append({"stid": s["STID"]} | s.pop("OBSERVATIONS", {}))
    qc.append({"stid": s["STID"]} | s.pop("QC", {}))
    latency.append({"stid": s["STID"]} | s.pop("LATENCY", {}))
    sensor_variables.append({"stid": s["STID"]} | s.pop("SENSOR_VARIABLES", {}))

df = pl.DataFrame(observations, infer_schema_length=None)

In [112]:
cols_with_float = []
cols_with_string = []
cols_with_cloud_layer = []
cols_with_other = []

for col, schema in df.schema.items():
    if col in {"date_time", "stid"}:
        continue
    elif schema == pl.List(pl.Float64):
        cols_with_float.append(col)
    elif schema == pl.List(pl.String):
        cols_with_string.append(col)
    elif col.startswith("cloud_layer"):
        cols_with_cloud_layer.append(col)
    else:
        cols_with_other.append(col)
        print(f"WARNING: Unknown schema for {col=} {schema=}")

cols_with_cloud_layer

['cloud_layer_1_set_1d']

In [113]:
to_concat = []

# Unpack the float observations
if cols_with_float:
    observed_float = (
        df.select(["stid", "date_time"] + cols_with_float)
        .with_columns(
            pl.col(cols_with_float).fill_null(
                pl.lit(None, dtype=pl.Float64).repeat_by(
                    pl.col("date_time").list.len()
                )  # https://stackoverflow.com/q/78810432/2383070
            )
        )
        .explode(["date_time"] + cols_with_float)
        .unpivot(cols_with_float, index=["stid", "date_time"])
    )
    to_concat.append(observed_float)

observed_float

stid,date_time,variable,value
str,str,str,f64
"""WBB""","""2024-11-20T05:49:00Z""","""air_temp_set_1""",0.4
"""WBB""","""2024-11-20T05:50:00Z""","""air_temp_set_1""",0.433
"""WBB""","""2024-11-20T05:51:00Z""","""air_temp_set_1""",0.567
"""WBB""","""2024-11-20T05:52:00Z""","""air_temp_set_1""",0.628
"""WBB""","""2024-11-20T05:53:00Z""","""air_temp_set_1""",0.606
…,…,…,…
"""G5565""","""2024-11-20T05:53:00Z""","""wind_chill_set_2d""",
"""G5565""","""2024-11-20T05:58:00Z""","""wind_chill_set_2d""",
"""G5565""","""2024-11-20T06:04:00Z""","""wind_chill_set_2d""",
"""G5565""","""2024-11-20T06:08:00Z""","""wind_chill_set_2d""",


In [114]:
# Unpack the string observations
#   Put values in column 'value_string'

if cols_with_string:
    observed_string = (
        df.select(["stid", "date_time"] + cols_with_string)
        .with_columns(
            pl.col(cols_with_string).fill_null(
                pl.lit(None, dtype=pl.String).repeat_by(
                    pl.col("date_time").list.len()
                )  # https://stackoverflow.com/q/78810432/2383070
            )
        )
        .explode(["date_time"] + cols_with_string)
        .unpivot(cols_with_string, index=["stid", "date_time"])
        .rename({"value": "value_sting"})
    )
    to_concat.append(observed_string)

observed_string

stid,date_time,variable,value_sting
str,str,str,str
"""WBB""","""2024-11-20T05:49:00Z""","""wind_cardinal_direction_set_1d""","""NE"""
"""WBB""","""2024-11-20T05:50:00Z""","""wind_cardinal_direction_set_1d""","""NE"""
"""WBB""","""2024-11-20T05:51:00Z""","""wind_cardinal_direction_set_1d""","""ENE"""
"""WBB""","""2024-11-20T05:52:00Z""","""wind_cardinal_direction_set_1d""","""ENE"""
"""WBB""","""2024-11-20T05:53:00Z""","""wind_cardinal_direction_set_1d""","""E"""
…,…,…,…
"""G5565""","""2024-11-20T05:53:00Z""","""wind_cardinal_direction_set_2d""",
"""G5565""","""2024-11-20T05:58:00Z""","""wind_cardinal_direction_set_2d""",
"""G5565""","""2024-11-20T06:04:00Z""","""wind_cardinal_direction_set_2d""",
"""G5565""","""2024-11-20T06:08:00Z""","""wind_cardinal_direction_set_2d""",


In [115]:
# Unpack the cloud layer.
#   Put sky_condition in 'value_sting' column
#   and height_agl in 'value' column

if cols_with_cloud_layer:
    observed_cloud_layer = (
        df.select(["stid", "date_time"] + cols_with_cloud_layer)
        .with_columns(
            pl.col(cols_with_cloud_layer).fill_null(
                pl.lit(None).repeat_by(
                    pl.col("date_time").list.len()
                )  # https://stackoverflow.com/q/78810432/2383070
            )
        )
        .explode(["date_time"] + cols_with_cloud_layer)
        .unpivot(cols_with_cloud_layer, index=["stid", "date_time"])
        .rename({"value": "value_sting"})
    )
    to_concat.append(observed_cloud_layer)

observed_cloud_layer


InvalidOperationError: `repeat_by` operation not supported for dtype `null`

In [116]:
# Join all observation values
observed = pl.concat(to_concat, how="diagonal_relaxed")

# Cast 'date_time' column from string to datetime
observed = observed.with_columns(pl.col("date_time").str.to_datetime())

# Parse the variable name
observed = observed.pipe(parse_raw_variable_column, S.UNITS)

# Join the metadata to the observed values
metadata = station_metadata_to_dataframe(S.STATION)
observed = observed.join(metadata, on="stid", how="full", coalesce=True)


In [122]:
S.url

'https://api.synopticdata.com/v2/stations/timeseries?recent=30&state=ut&complete=1&qc=on&qc_checks=all&token=0bbe0e9fda7945a68951cc1bdebb2b0d'

In [120]:
observed.filter(pl.col("qc_flagged"))

stid,date_time,variable,sensor_index,is_derived,value,value_sting,units,id,name,elevation,latitude,longitude,mnet_id,state,timezone,elev_dem,nwszone,nwsfirezone,gacc,shortname,sgid,county,country,wims_id,cwa,period_of_record_start,period_of_record_end,providers,qc_flagged,is_restricted,is_active
str,"datetime[μs, UTC]",str,u32,bool,f64,str,str,u32,str,f64,f64,f64,u32,str,str,f64,str,str,str,str,str,str,str,str,str,"datetime[μs, UTC]","datetime[μs, UTC]",list[struct[2]],bool,bool,bool
"""KPVU""",2024-11-20 05:50:00 UTC,"""air_temp""",1,false,-3.0,,"""Celsius""",58,"""Provo Municipal Airport""",4495.0,40.2239,-111.7253,1,"""UT""","""America/Denver""",4491.5,"""UT106""","""SLC478""","""GBCC""","""ASOS/AWOS""","""GB25""","""Utah""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 05:45:00 UTC,"[{""National Weather Service"",""http://www.weather.gov""}]",true,false,true
"""KPVU""",2024-11-20 05:55:00 UTC,"""air_temp""",1,false,-3.0,,"""Celsius""",58,"""Provo Municipal Airport""",4495.0,40.2239,-111.7253,1,"""UT""","""America/Denver""",4491.5,"""UT106""","""SLC478""","""GBCC""","""ASOS/AWOS""","""GB25""","""Utah""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 05:45:00 UTC,"[{""National Weather Service"",""http://www.weather.gov""}]",true,false,true
"""KPVU""",2024-11-20 05:56:00 UTC,"""air_temp""",1,false,-3.9,,"""Celsius""",58,"""Provo Municipal Airport""",4495.0,40.2239,-111.7253,1,"""UT""","""America/Denver""",4491.5,"""UT106""","""SLC478""","""GBCC""","""ASOS/AWOS""","""GB25""","""Utah""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 05:45:00 UTC,"[{""National Weather Service"",""http://www.weather.gov""}]",true,false,true
"""KPVU""",2024-11-20 06:00:00 UTC,"""air_temp""",1,false,-4.0,,"""Celsius""",58,"""Provo Municipal Airport""",4495.0,40.2239,-111.7253,1,"""UT""","""America/Denver""",4491.5,"""UT106""","""SLC478""","""GBCC""","""ASOS/AWOS""","""GB25""","""Utah""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 05:45:00 UTC,"[{""National Weather Service"",""http://www.weather.gov""}]",true,false,true
"""KPVU""",2024-11-20 06:05:00 UTC,"""air_temp""",1,false,-5.0,,"""Celsius""",58,"""Provo Municipal Airport""",4495.0,40.2239,-111.7253,1,"""UT""","""America/Denver""",4491.5,"""UT106""","""SLC478""","""GBCC""","""ASOS/AWOS""","""GB25""","""Utah""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 05:45:00 UTC,"[{""National Weather Service"",""http://www.weather.gov""}]",true,false,true
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""AV997""",2024-11-20 06:07:00 UTC,"""wind_cardinal_direction""",2,true,,,"""wind_cardinal_direction""",232230,"""NW5W-12 Suncrest""",6201.0,40.47583,-111.84533,65,"""UT""","""America/Denver""",,"""UT111""","""SLC478""","""GBCC""","""APRSWXNET/CWOP""","""GB25""","""Utah""","""US""",,"""SLC""",2024-03-03 20:40:00 UTC,2024-11-20 05:47:00 UTC,"[{""APRSWXNET/Citizen Weather Observer Program"",""http://www.findu.com/citizenweather""}]",true,false,true
"""AV997""",2024-11-20 06:12:00 UTC,"""wind_cardinal_direction""",2,true,,,"""wind_cardinal_direction""",232230,"""NW5W-12 Suncrest""",6201.0,40.47583,-111.84533,65,"""UT""","""America/Denver""",,"""UT111""","""SLC478""","""GBCC""","""APRSWXNET/CWOP""","""GB25""","""Utah""","""US""",,"""SLC""",2024-03-03 20:40:00 UTC,2024-11-20 05:47:00 UTC,"[{""APRSWXNET/Citizen Weather Observer Program"",""http://www.findu.com/citizenweather""}]",true,false,true
"""G5229""",2024-11-20 05:50:00 UTC,"""wind_cardinal_direction""",2,true,,,"""wind_cardinal_direction""",242416,"""GW5229 LINDON""",4629.0,40.33317,-111.72767,65,"""UT""","""America/Denver""",,"""UT106""","""SLC478""","""GBCC""","""APRSWXNET/CWOP""","""GB25""","""Utah""","""US""",,"""SLC""",2024-08-06 17:36:00 UTC,2024-11-20 05:50:00 UTC,"[{""APRSWXNET/Citizen Weather Observer Program"",""http://www.findu.com/citizenweather""}]",true,false,true
"""G5229""",2024-11-20 05:55:00 UTC,"""wind_cardinal_direction""",2,true,,,"""wind_cardinal_direction""",242416,"""GW5229 LINDON""",4629.0,40.33317,-111.72767,65,"""UT""","""America/Denver""",,"""UT106""","""SLC478""","""GBCC""","""APRSWXNET/CWOP""","""GB25""","""Utah""","""US""",,"""SLC""",2024-08-06 17:36:00 UTC,2024-11-20 05:50:00 UTC,"[{""APRSWXNET/Citizen Weather Observer Program"",""http://www.findu.com/citizenweather""}]",true,false,true


In [None]:
(
    df.select(["stid", "date_time"] + cols_with_cloud_layer).with_columns(
        pl.col(cols_with_cloud_layer).fill_null(
            pl.struct({'sky_condition':None, "height_agl":None})), dtype=pl.Boolean).repeat_by(
                pl.col("date_time").list.len()
            )  # https://stackoverflow.com/q/78810432/2383070
        )
    )
)

SyntaxError: unmatched ')' (1834039484.py, line 9)

In [3]:
import polars as pl


def unnest_period_of_record(
    df: pl.DataFrame | pl.LazyFrame,
) -> pl.DataFrame | pl.LazyFrame:
    """Un-nest the PERIOD_OF_RECORD column struct."""
    return df.with_columns(
        pl.struct(
            pl.col("PERIOD_OF_RECORD")
            .struct.field("start")
            .cast(pl.String)
            .str.to_datetime(time_zone="UTC")
            .alias("PERIOD_OF_RECORD_START"),
            pl.col("PERIOD_OF_RECORD")
            .struct.field("end")
            .cast(pl.String)
            .str.to_datetime(time_zone="UTC")
            .alias("PERIOD_OF_RECORD_END"),
        ).alias("PERIOD_OF_RECORD"),
    ).unnest("PERIOD_OF_RECORD")


def station_metadata_to_dataframe(STATION: list[dict]):
    """From STATION, produce the metadata DataFrame."""
    a = []
    for metadata in STATION:
        metadata = metadata.copy()
        metadata.pop("OBSERVATIONS", None)
        metadata.pop("SENSOR_VARIABLES", None)
        metadata.pop("LATENCY", None)
        metadata.pop("QC", None)
        a.append(metadata)
    df = pl.DataFrame(a, infer_schema_length=None).lazy()
    df = df.with_columns(
        pl.col("STID").cast(pl.String),
        pl.col("ID", "MNET_ID").cast(pl.UInt32),
        pl.col("ELEVATION", "LATITUDE", "LONGITUDE").cast(pl.Float64),
        is_active=pl.when(pl.col("STATUS") == "ACTIVE")
        .then(True)
        .otherwise(pl.when(pl.col("STATUS") == "INACTIVE").then(False)),
    ).drop("UNITS", "STATUS")

    if "RESTRICTED" in df.collect_schema().names():
        df = df.rename({"RESTRICTED": "is_restricted"})

    if "ELEV_DEM" in df.collect_schema().names():
        # This isn't in the Latency request
        df = df.with_columns(pl.col("ELEV_DEM").cast(pl.Float64))

    df = df.pipe(unnest_period_of_record)
    df = df.rename({i: i.lower() for i in df.collect_schema().names()})

    return df.collect()


def NEW_parse_stations_latest_nearesttime(STATION):
    pass


In [4]:
# Unpack Latest/Nearest time JSON into parts

observations = []
qc = []
latency = []
sensor_variables = []

for s in S.STATION:
    observations.append({"stid": s["STID"]} | s.pop("OBSERVATIONS", {}))
    qc.append({"stid": s["STID"]} | s.pop("qc", {}))
    latency.append({"stid": s["STID"]} | s.pop("latency", {}))
    sensor_variables.append({"stid": s["STID"]} | s.pop("sensor_variables", {}))


# Get Metadata DataFrame
metadata = station_metadata_to_dataframe(S.STATION)

# Get Observations DataFrame (needs more processing)
df = pl.DataFrame(observations, infer_schema_length=None)

In [5]:
# BUG: Synoptic API ozone_concentration_value_1, the value is returned as string and not float
df = df.with_columns(
    pl.struct(
        [
            pl.col("ozone_concentration_value_1")
            .struct.field("value")
            .replace("", None)
            .cast(pl.Float64),
            pl.col("ozone_concentration_value_1").struct.field("date_time"),
        ]
    ).alias("ozone_concentration_value_1")
)


In [6]:
# Separate columns by value type
# TODO: Still need to handle sky_condition types

cols_with_float = []
cols_with_string = []
cols_with_cloud_layer = []
cols_with_other = []
for col, schema in df.schema.items():
    if hasattr(schema, "fields"):
        if pl.Field("value", pl.Float64) in schema.fields:
            cols_with_float.append(col)
        elif pl.Field("value", pl.String) in schema.fields:
            cols_with_string.append(col)
        elif col.startswith("cloud_layer"):
            cols_with_cloud_layer.append(col)
        elif pl.Field("value", pl.Struct) in schema.fields:
            cols_with_other.append(col)
            print(f"WARNING: Unknown struct for {col=} {schema=}")
    else:
        print(f"{col=}, {schema=}")

col='stid', schema=String


In [9]:
# Unpack the Float observations
observed_float = (
    df.select(["stid"] + cols_with_float)
    .select("stid", "^.*value.*$")
    .unpivot(index="stid")
    # .with_columns(
    #    pl.col("variable").str.extract_groups(
    #        r"(?<variable>.+)_value_(?<sensor_index>\d)(?<is_derived>d?)"
    #    )
    # )
    # .unnest("variable")
    # .with_columns(
    #    pl.col("is_derived") == "d",
    #    pl.col("sensor_index").cast(pl.UInt32),
    #    pl.col("variable").replace(S.UNITS).alias("units"),
    # )
    .unnest("value")
    .with_columns(pl.col("date_time").str.to_datetime())
    .drop_nulls()
)
observed_float

stid,variable,value,date_time,qc
str,str,f64,"datetime[μs, UTC]",struct[2]
"""WBB""","""pressure_value_1""",85878.0,2024-11-20 04:55:00 UTC,"{""passed"",null}"
"""GNI""","""pressure_value_1""",87058.0,2024-10-31 07:40:00 UTC,"{""passed"",null}"
"""HATUT""","""pressure_value_1""",87580.0,2024-11-20 04:45:00 UTC,"{""passed"",null}"
"""LMS""","""pressure_value_1""",87821.0,2024-11-20 04:45:00 UTC,"{""passed"",null}"
"""LMR""","""pressure_value_1""",84728.0,2024-11-20 04:45:00 UTC,"{""passed"",null}"
…,…,…,…,…
"""UGSPG""","""evapotranspiration_value_2""",0.0,2024-11-20 04:30:00 UTC,"{""passed"",null}"
"""UUCMF""","""evapotranspiration_value_2""",0.0051,2024-11-20 04:30:00 UTC,"{""passed"",null}"
"""UUPYF""","""evapotranspiration_value_2""",0.0,2024-11-20 04:30:00 UTC,"{""passed"",null}"
"""NGLO161462""","""precip_interval_value_1""",0.0,2024-11-15 19:16:00 UTC,"{""passed"",null}"


In [11]:
# Unpack the string observations
observed_string = (
    df.select(["stid"] + cols_with_string)
    .select("stid", "^.*value.*$")
    .unpivot(index="stid")
    # .with_columns(
    #    pl.col("variable").str.extract_groups(
    #        r"(?<variable>.+)_value_(?<sensor_index>\d)(?<is_derived>d?)"
    #    )
    # )
    # .unnest("variable")
    # .with_columns(
    #    pl.col("is_derived") == "d",
    #    pl.col("sensor_index").cast(pl.UInt32),
    #    pl.col("variable").replace(S.UNITS).alias("units"),
    # )
    .unnest("value")
    .rename({"value": "value_string"})
    .with_columns(pl.col("date_time").str.to_datetime())
    .drop_nulls()
)
observed_string


stid,variable,value_string,date_time,qc
str,str,str,"datetime[μs, UTC]",struct[1]
"""KSLC""","""metar_value_1""","""METAR KSLC 200454Z 18006KT 10S…",2024-11-20 04:54:00 UTC,"{""passed""}"
"""KU42""","""metar_value_1""","""METAR KU42 200435Z AUTO 17005K…",2024-11-20 04:35:00 UTC,"{""passed""}"
"""KHIF""","""metar_value_1""","""METAR KHIF 200455Z AUTO 12011K…",2024-11-20 04:55:00 UTC,"{""passed""}"
"""KOGD""","""metar_value_1""","""METAR KOGD 200453Z AUTO 16007K…",2024-11-20 04:53:00 UTC,"{""passed""}"
"""KBMC""","""metar_value_1""","""METAR KBMC 200435Z AUTO 13003K…",2024-11-20 04:35:00 UTC,"{""passed""}"
…,…,…,…,…
"""KFOM""","""metar_value_1""","""METAR KFOM 200435Z AUTO 10SM C…",2024-11-20 04:35:00 UTC,"{""passed""}"
"""K41U""","""metar_value_1""","""METAR K41U 200435Z AUTO 03004K…",2024-11-20 04:35:00 UTC,"{""passed""}"
"""KSPK""","""metar_value_1""","""METAR KSPK 200435Z AUTO 13009K…",2024-11-20 04:35:00 UTC,"{""passed""}"
"""KU64""","""metar_value_1""","""KU64 200435Z AUTO 24004KT 10SM…",2024-11-20 04:35:00 UTC,"{""passed""}"


In [12]:
# Unpack the cloud layer
observed_cloud_layer = (
    (
        df.select(["stid"] + cols_with_cloud_layer)
        .select("stid", "^.*value.*$")
        .unpivot(index="stid")
        # .with_columns(
        #    pl.col("variable").str.extract_groups(
        #        r"(?<variable>.+)_value_(?<sensor_index>\d)(?<is_derived>d?)"
        #    )
        # )
        # .unnest("variable")
        # .with_columns(
        #    pl.col("is_derived") == "d",
        #    pl.col("sensor_index").cast(pl.UInt32),
        #    pl.col("variable").replace(S.UNITS).alias("units"),
        # )
        .unnest("value")
        .rename({"value": "value_cloud_layer"})
        .with_columns(pl.col("date_time").str.to_datetime())
        .drop_nulls()
    )
    .unnest("value_cloud_layer")
    .rename({"sky_condition": "value_string", "height_agl": "value"})
)
observed_cloud_layer


stid,variable,date_time,value_string,value
str,str,"datetime[μs, UTC]",str,f64
"""KSLC""","""cloud_layer_1_value_1d""",2024-11-20 04:54:00 UTC,"""clear""",
"""KU42""","""cloud_layer_1_value_1d""",2024-11-20 04:35:00 UTC,"""clear""",
"""KHIF""","""cloud_layer_1_value_1d""",2024-11-20 04:55:00 UTC,"""clear""",
"""KOGD""","""cloud_layer_1_value_1d""",2024-11-20 04:53:00 UTC,"""clear""",
"""KBMC""","""cloud_layer_1_value_1d""",2024-11-20 04:35:00 UTC,"""clear""",
…,…,…,…,…
"""K41U""","""cloud_layer_3_value_1d""",2024-11-19 07:35:00 UTC,"""overcast""",2590.83
"""UUSYR""","""cloud_layer_3_value_1d""",2024-11-18 15:40:00 UTC,"""N/A""",2225.07
"""KSPK""","""cloud_layer_3_value_1d""",2024-11-19 15:35:00 UTC,"""broken""",2286.03
"""KU64""","""cloud_layer_3_value_1d""",2024-11-19 11:55:00 UTC,"""broken""",2590.83


In [17]:
# Join all observation values

observed = pl.concat(
    [observed_float, observed_string, observed_cloud_layer], how="diagonal_relaxed"
)

In [18]:
# Join the metadata to the observed values

observed = observed.join(metadata, on="stid", how="full", coalesce=True)

# Pase the variable name
observed = (
    observed.with_columns(
        pl.col("variable").str.extract_groups(
            r"(?<variable>.+)_value_(?<sensor_index>\d)(?<is_derived>d?)"
        )
    )
    .unnest("variable")
    .with_columns(
        pl.col("is_derived") == "d",
        pl.col("sensor_index").cast(pl.UInt32),
        pl.col("variable").replace(S.UNITS).alias("units"),
    )
)

if "qc" in observed.columns:
    observed = (
        observed.unnest("qc")
        .rename({"status": "qc_passed"})
        .with_columns(
            pl.col("qc_passed").replace_strict({"failed": False, "passed": True})
        )
    )

observed

stid,variable,sensor_index,is_derived,value,date_time,qc_passed,qc_flags,value_string,id,name,elevation,latitude,longitude,mnet_id,state,timezone,elev_dem,nwszone,nwsfirezone,gacc,shortname,sgid,county,country,wims_id,cwa,period_of_record_start,period_of_record_end,providers,qc_flagged,is_restricted,is_active,units
str,str,u32,bool,f64,"datetime[μs, UTC]",bool,list[i64],str,u32,str,f64,f64,f64,u32,str,str,f64,str,str,str,str,str,str,str,str,str,"datetime[μs, UTC]","datetime[μs, UTC]",list[struct[2]],bool,bool,bool,str
"""WBB""","""pressure""",1,false,85878.0,2024-11-20 04:55:00 UTC,true,,,1,"""U of U William Browning Buildi…",4806.0,40.76623,-111.84755,153,"""UT""","""America/Denver""",4727.7,"""UT105""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Salt Lake""","""US""",,"""SLC""",1997-01-01 00:00:00 UTC,2024-11-20 04:50:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""U-ATAQ"",""http://air.utah.edu/""}]",false,false,true,"""Pascals"""
"""GNI""","""pressure""",1,false,87058.0,2024-10-31 07:40:00 UTC,true,,,34,"""Gunnison Island""",4242.0,41.33216,-112.85432,153,"""UT""","""America/Denver""",4202.8,"""UT101""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Box Elder""","""US""",,"""SLC""",1998-05-22 00:00:00 UTC,2024-11-20 04:45:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""Utah Department of Natural Resources"",""http://www.dnr.utah.gov""}, {""SLC WFO/NWS Western Region"",""http://www.wrh.noaa.gov/slc""}]",false,false,true,"""Pascals"""
"""HATUT""","""pressure""",1,false,87580.0,2024-11-20 04:45:00 UTC,true,,,35,"""Hat Island""",4242.0,41.07073,-112.58621,153,"""UT""","""America/Denver""",4245.4,"""UT101""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Box Elder""","""US""",,"""SLC""",1998-09-02 00:00:00 UTC,2024-11-20 04:45:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""Utah Department of Natural Resources"",""http://www.dnr.utah.gov""}, {""SLC WFO/NWS Western Region"",""http://www.wrh.noaa.gov/slc""}]",false,false,true,"""Pascals"""
"""LMS""","""pressure""",1,false,87821.0,2024-11-20 04:45:00 UTC,true,,,36,"""Locomotive Springs""",4242.0,41.701,-112.86181,153,"""UT""","""America/Denver""",4215.9,"""UT101""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Box Elder""","""US""",,"""SLC""",1999-07-02 00:00:00 UTC,2024-11-20 04:45:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""SLC WFO/NWS Western Region"",""http://www.wrh.noaa.gov/slc""}]",false,false,true,"""Pascals"""
"""LMR""","""pressure""",1,false,84728.0,2024-11-20 04:45:00 UTC,true,,,39,"""Lakeside Mountain""",5039.0,41.06084,-112.89173,153,"""UT""","""America/Denver""",5150.9,"""UT101""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Box Elder""","""US""",,"""SLC""",1999-12-16 00:00:00 UTC,2024-11-20 04:45:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}, {""SLC WFO/NWS Western Region"",""http://www.wrh.noaa.gov/slc""}]",false,false,true,"""Pascals"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""K41U""","""cloud_layer_3""",1,true,2590.83,2024-11-19 07:35:00 UTC,,,"""overcast""",46328,"""Manti-Ephraim Airport""",5500.0,39.33133,-111.61273,1,"""UT""","""America/Denver""",5505.2,"""UT118""","""SLC492""","""GBCC""","""ASOS/AWOS""","""GB27""","""Sanpete""","""US""",,"""SLC""",2015-06-03 18:59:00 UTC,2024-11-20 04:35:00 UTC,"[{""National Weather Service"",""http://www.weather.gov""}]",false,false,true,"""Meters"""
"""UUSYR""","""cloud_layer_3""",1,true,2225.07,2024-11-18 15:40:00 UTC,,,"""N/A""",62231,"""Syracuse""",4217.0,41.08847,-112.1188,153,"""UT""","""America/Denver""",4215.9,"""UT104""","""SLC478""","""GBCC""","""UUNET""","""GB25""","""Davis""","""US""",,"""SLC""",2017-03-17 19:46:00 UTC,2024-11-20 04:50:00 UTC,"[{""U of U MesoWest Group"",""http://meso1.chpc.utah.edu/mesowest_overview/""}]",false,false,true,"""Meters"""
"""KSPK""","""cloud_layer_3""",1,true,2286.03,2024-11-19 15:35:00 UTC,,,"""broken""",160898,"""Spanish Fork Municipal Airport""",4530.0,40.145,-111.6677,1,"""UT""","""America/Denver""",4534.1,"""UT106""","""SLC478""","""GBCC""","""ASOS/AWOS""","""GB25""","""Utah""","""US""",,"""SLC""",2021-01-19 19:28:00 UTC,2024-11-20 04:35:00 UTC,"[{""National Weather Service"",""http://www.weather.gov""}]",false,false,true,"""Meters"""
"""KU64""","""cloud_layer_3""",1,true,2590.83,2024-11-19 11:55:00 UTC,,,"""broken""",207121,"""Monticello Airport""",6970.0,37.93243,-109.34122,1,"""UT""","""America/Denver""",,"""UT028""","""GJT491""","""GBCC""","""ASOS/AWOS""","""GB32""","""San Juan""","""US""",,"""GJT""",2023-04-06 13:59:00 UTC,2024-11-20 04:35:00 UTC,"[{""National Weather Service"",""http://www.weather.gov""}]",false,false,true,"""Meters"""


In [16]:
observed["units"]

units
str
"""Pascals"""
"""Pascals"""
"""Pascals"""
"""Pascals"""
"""Pascals"""
…
"""Meters"""
"""Meters"""
"""Meters"""
"""Meters"""
