In [1]:
import requests
from datetime import datetime, timedelta, UTC
import pandas as pd
from io import BytesIO
import json
from collections import defaultdict

session = requests.Session()
url = "http://ionbeam-ichange.ecmwf-ichange.f.ewcloud.host/api/v1/"
url = "http://localhost:5002/api/v1/"

def api_get(path, *args, **kwargs):
    r = session.get(url + path, *args, **kwargs)
    if not r.ok:
        print(f"API Error")
        print(json.dumps(r.json(), indent = 4))
    return r

In [2]:
def sort_by_platform(stations) -> dict[str, list[dict]]:
    by_platform = defaultdict(list)
    for s in stations:
        by_platform[s["platform"]].append(s)
    return by_platform
    
lookback = timedelta(hours = 5) 

all_stations = api_get("stations", params = {
}).json()

recent_stations = api_get("stations", params = {
    "start_time" : datetime.now(UTC) - lookback
}).json()

print("All stations")
all_by_platform = sort_by_platform(all_stations)
print({k : len(v) for k, v in all_by_platform.items()})
print()

print(f"Recent ({lookback})")
print({k : len(v) for k, v in sort_by_platform(recent_stations).items()})
print()

All stations
{}

Recent (5:00:00)
{}



## Total Meteotracker datasets

In [3]:
all_by_platform['meteotracker'][-1]

IndexError: list index out of range

In [None]:
dates = [datetime.fromisoformat(s['time_span'][1]) for s in all_by_platform['meteotracker']]
earliest, latest = min(dates), max(dates)
print(f"Meteotracker: {len(all_by_platform['meteotracker'])} tracks.")
print(f"Earliest ingested data: {earliest}")
print(f"Most recent ingested data: {latest}")

## Total Acronet datasets

In [None]:
def get_all_granules_by_platform(platform):
    granules = api_get("list", params = {'platform': platform}).json()
    dates = [datetime.fromisoformat(d["datetime"]) for d in granules]
    assert all(g["mars_request"]["platform"] == platform for g in granules)
    return granules, dates

acronet_granules, dates = get_all_granules_by_platform(platform = "acronet")
earliest, latest = min(dates), max(dates)
total_time = timedelta(minutes = 5) * len(acronet_granules)

print(f"Acronet: {len(all_by_platform['acronet'])} distinct stations.")
print(f"Earliest ingested data: {earliest} Most recent ingested data: {latest}")
print(f"Ingested data: {len(acronet_granules)} 5 minute acronet data granules.")
print(f"Total time: {total_time}")

## Total Smart Citizen Kit Datasets

In [None]:
sck_granules, dates = get_all_granules_by_platform(platform = "smart_citizen_kit")
earliest, latest = min(dates), max(dates)
total_time = timedelta(minutes = 5) * len(sck_granules)

print(f"Smart Citizen Kit: {len(all_by_platform['smart_citizen_kit'])} distinct stations.")
print(f"Earliest ingested data: {earliest} Most recent ingested data: {latest}")
print(f"Ingested data: {len(sck_granules)} 5 minute sck data granules.")
print(f"Total time: {total_time}")

## Retrieving chunked data for continuous data streams
Acronet, Smart Citizen Kit

In [None]:
def plot_example_station(station):
    latest = datetime.fromisoformat(example_station["time_span"][1])
    print(f"Latest data: {latest}")
    args = {
        "start_time" : (latest - timedelta(hours = 12)).isoformat(),
        "end_time": latest.isoformat(),
    }
    
    data = api_get("retrieve", params = example_station["mars_request"] | args | dict(format = "csv"))
    df = pd.read_csv(BytesIO(data.content))
    
    df["datetime"] = pd.to_datetime(df["datetime"])
    exclude_columns = {"external_id", "date", "lat", "lon", "altitude"}
    numeric_columns = [col for col, dtype in zip(df.columns, df.dtypes) 
                       if (dtype == "float64" or dtype == "int64")
                       and col not in exclude_columns]
    
    axes = df.plot(
        x = "datetime",
        y = numeric_columns,
        subplots=True, layout=(len(numeric_columns), 1), figsize=(8, 2*len(numeric_columns)), sharex=True, rot=90)

example_station = by_platform["smart_citizen_kit"][-1]
plot_example_station(example_station)

## Retrieve and plot a Meteotracker track

In [None]:
example_station = by_platform["meteotracker"][-1]

print(json.dumps(example_station["mars_request"], indent = 4))
granule_list = api_get("list", params = example_station["mars_request"]).json()

print(f"The above MARS request matches {len(granule_list)} data granule(s).")

# Sanity check
assert len(granule_list) == 1

args = {
    "format" : "json"
}

In [None]:
data = api_get("retrieve", params = example_station["mars_request"] | args)
df = pd.DataFrame.from_records(data.json())
df["datetime"] = pd.to_datetime(df["datetime"])

exclude_columns = {"external_id", "date", "lat", "lon", "altitude"}
numeric_columns = [col for col, dtype in zip(df.columns, df.dtypes) 
                   if (dtype == "float64" or dtype == "int64")
                   and col not in exclude_columns]

axes = df.plot(
    x = "datetime",
    y = numeric_columns,
    subplots=True, layout=(len(numeric_columns), 1), figsize=(8, 2*len(numeric_columns)), sharex=True, rot=90)


In [None]:
import geopandas as gpd
geo_df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=4326)
geo_df.explore(column = "altitude")