In [7]:
import requests
from datetime import datetime, timedelta, UTC
import pandas as pd
from io import BytesIO
import json
from collections import defaultdict

session = requests.Session()
url = "http://ionbeam-ichange.ecmwf-ichange.f.ewcloud.host/api/v1/"
url = "http://136.156.130.47/"
# url = "http://localhost:5002/api/v1/"

def api_get(path, *args, **kwargs):
    r = session.get(url + path, stream=True, *args, **kwargs)
    if not r.ok:
        print(f"API Error")
        print(json.dumps(r.json(), indent = 4))
    return r

In [8]:
def sort_by_platform(stations) -> dict[str, list[dict]]:
    by_platform = defaultdict(list)
    for s in stations:
        by_platform[s["platform"]].append(s)
    return by_platform
    
lookback = timedelta(hours = 5) 

all_stations = api_get("stations", params = {
}).json()

recent_stations = api_get("stations", params = {
    "start_time" : datetime.now(UTC) - lookback
}).json()

print("All stations")
all_by_platform = sort_by_platform(all_stations)
print({k : len(v) for k, v in all_by_platform.items()})
print()


print(f"Recent ({lookback})")
print({k : len(v) for k, v in sort_by_platform(recent_stations).items()})
print()

ConnectTimeout: HTTPConnectionPool(host='136.156.130.47', port=80): Max retries exceeded with url: /stations (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x10f654cb0>, 'Connection to 136.156.130.47 timed out. (connect timeout=None)'))

## Total Meteotracker datasets

In [None]:
dates = [datetime.fromisoformat(s['time_span']["end"]) for s in all_by_platform['meteotracker']]
earliest, latest = min(dates), max(dates)
print(f"Meteotracker: {len(all_by_platform['meteotracker'])} tracks.")
print(f"Earliest ingested data: {earliest}")
print(f"Most recent ingested data: {latest}")

## Total Acronet datasets

In [None]:
def get_all_granules_by_platform(platform):
    granules = api_get("list", params = {'platform': platform}).json()
    dates = [datetime.fromisoformat(d["datetime"]) for d in granules]
    # print(set(g["mars_request"]["platform"] for g in granules))
    assert all(g["mars_request"]["platform"] == platform for g in granules)
    return granules, dates

acronet_granules, dates = get_all_granules_by_platform(platform = "acronet")
if dates:
    earliest, latest = min(dates), max(dates)
    
    print(f"Acronet: {len(all_by_platform['acronet'])} distinct stations.")
    print(f"Earliest ingested data: {earliest.date()} Most recent ingested data: {latest.date()}")
    print(f"Ingested data: {len(acronet_granules)} acronet data granules.")

## Total Smart Citizen Kit Datasets

In [None]:
sck_granules, dates = get_all_granules_by_platform(platform = "smart_citizen_kit")
earliest, latest = min(dates), max(dates)

print(f"Smart Citizen Kit: {len(all_by_platform['smart_citizen_kit'])} distinct stations.")
print(f"Earliest ingested data: {earliest} Most recent ingested data: {latest}")
print(f"Ingested data: {len(sck_granules)} data granules.")

## Retrieving chunked data for continuous data streams
Acronet, Smart Citizen Kit

In [None]:
if all_by_platform["acronet"]:
    example_station = all_by_platform["acronet"][-1]
    print(example_station)
    args = {
        "start_time" : (latest - timedelta(hours = 5)).isoformat(),
        "end_time": latest.isoformat(),
    }
    data = api_get("retrieve", params = example_station["mars_selection"] | args | dict(format = "csv"))
    df = pd.read_csv(BytesIO(data.content))
    df

In [None]:
from matplotlib import pyplot as plt

def plot_example_station(example_station):
    latest = datetime.fromisoformat(example_station["time_span"]["end"])
    print(f"Latest data: {latest}")
    args = {
        "start_time" : (latest - timedelta(hours = 48)).isoformat(),
        "end_time": latest.isoformat(),
        "format": "parquet"
    }
    
    data = api_get("retrieve", params = example_station["mars_selection"] | args)
    df = pd.read_parquet(BytesIO(data.content))
    
    exclude_columns = {"external_id", "date", "lat", "lon", "altitude", "chunk_date", "chunk_time"}
    numeric_columns = [col for col, dtype in zip(df.columns, df.dtypes) 
                       if (dtype == "float64" or dtype == "int64")
                       and col not in exclude_columns]

    fig, axes = plt.subplots(len(numeric_columns), 1, figsize=(8, 2*len(numeric_columns)), squeeze = True, sharex=True)

    for ax, col in zip(axes, numeric_columns):
        ax.set(ylabel = col.replace("_", "\n"))
        ax.scatter(df.index, df[col], 5, alpha = 0.5)

        not_nan = ~df[col].isnull()
        ax.plot(df.index[not_nan], df[col][not_nan], alpha = 0.5)
        #ax.plot(df.index, df[col].rolling(window=20).mean(), label = "rolling mean", alpha = 0.5)
    
if all_by_platform["acronet"]:
    example_station = all_by_platform["acronet"][-1]
    print(example_station)
    plot_example_station(example_station)

In [None]:
if all_by_platform["smart_citizen_kit"]:
    example_station = all_by_platform["smart_citizen_kit"][-1]
    print(example_station)
    plot_example_station(example_station)

## Retrieve and plot a Meteotracker track

In [None]:
example_station = all_by_platform["meteotracker"][-1]
print("track time span", example_station["time_span"])

print(json.dumps(example_station["mars_selection"], indent = 4))
granule_list = api_get("list", params = example_station["mars_selection"]).json()

print(f"The above MARS request matches {len(granule_list)} data granule(s).")

args = {
    "format" : "parquet"
}

In [None]:
data = api_get("retrieve", params = example_station["mars_selection"] | args)
df = pd.read_parquet(BytesIO(data.content))

exclude_columns = {"external_id", "date", "lat", "lon", "altitude"}
numeric_columns = [col for col, dtype in zip(df.columns, df.dtypes) 
                   if (dtype == "float64" or dtype == "int64")
                   and col not in exclude_columns]

axes = df.plot(
    y = numeric_columns,
    subplots=True, layout=(len(numeric_columns), 1), figsize=(8, 2*len(numeric_columns)), sharex=True, rot=90)


In [None]:
import geopandas as gpd
geo_df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=4326)
geo_df.explore(column = "altitude")

## Retrieve ALL data for ALL platforms in the last few hours:

In [None]:
data = api_get("retrieve", params = dict(
    # platform = "acronet",
    start_time = datetime.now(UTC) - timedelta(hours = 48),
    end_time = datetime.now(UTC),
    format = "parquet"
))
df = pd.read_parquet(BytesIO(data.content))

In [None]:
import geopandas as gpd

# Just show one point from each station and track
deduped = df.drop_duplicates(subset=["station_id"])
gdf = gpd.GeoDataFrame(
    deduped,
    geometry=gpd.points_from_xy(deduped['lon'], deduped['lat']),
    crs="EPSG:4326"
)
gdf.explore()