In [None]:
# Allows code to live reload
%load_ext autoreload
%autoreload 2

## 1. Install
```sh
# Install prerequisites
pip install pyodc

```

## 2. Make an ECMWF account
- Go to ecmwf.int/, click login at the top right and click register to make a new account.
- Once logged in, go to api.ecmwf.int/v1/key/ to get your key. 
- Put it in `~/.ecmwfapirc` as directed.

In [None]:
# Load in the ECMWF token 
from pathlib import Path
import json
import requests
from IPython.display import JSON, display
from datetime import datetime as dt
from datetime import timedelta, timezone
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

with open(Path("~/.ecmwfapirc").expanduser(), "r") as f:
    api_creds = json.load(f)

print("Checking API credentials")
r = requests.get(f"https://api.ecmwf.int/v1/who-am-i?token={api_creds['key']}")
if r.status_code == 403: print("Your credentials are either wrong or need to be renewed at https://api.ecmwf.int/v1/key/")
r.raise_for_status()
JSON(r.json())

In [None]:
session = requests.Session()
session.headers["Authorization"] = f"Bearer {api_creds['key']}"

# API Endpoint

Documentation http://ionbeam-ichange.ecmwf-ichange.f.ewcloud.host/docs

In [None]:
# url = "http://ionbeam-ichange.ecmwf-ichange.f.ewcloud.host/api/v1/"
url = "http://localhost:5002/api/v1/"

### Get all stations

In [None]:
%%time
from datetime import datetime

stations = session.get(url + "stations", params = dict(platform = "smart_citizen_kit")).json()
print(f"{len(stations) = }")

In [None]:
stations[0]

### Get stations with recent data

In [None]:
%%time
from datetime import datetime, timedelta, UTC

recent_stations = session.get(url + "stations", params = {
    "start_time" : datetime.now(UTC) - timedelta(hours = 3) 
}).json()
print(f"data ingested in last hour: {len(recent_stations) = }")

In [None]:
granules = session.get(url + "list").json()
print(f"{len(granules) = }")

In [None]:
from collections import Counter
types = Counter(s["platform"] for s in stations)
types

In [None]:
from collections import defaultdict
by_platform = defaultdict(list)
for s in stations:
    by_platform[s["platform"]].append(s)

print("Most recently updated entry from each platform\n")
for platform, platform_stations in by_platform.items():
    if len(platform_stations) == 0: continue
    print(platform)
    print(json.dumps(
        sorted(platform_stations, key = lambda s : datetime.fromisoformat(s["time_span"]["end"]) )[-1],
    indent = 4))

## Obtain Stream data (Acronet, Smart Citizen Kit)

For the meteotracker data, each track is stored in a single data file. For continuous streams of data like the acronet stations, it is stored in 5 minute granules.

### Plot all Acronet stations in red and Smart Citizenkit in blue

In [None]:
import folium

map = folium.Map(location=[44.226414, 9.532785], zoom_start=7)

for station in by_platform["acronet"]:
    p = station["location"]
    folium.CircleMarker(
        location=(station["location"]["lat"], station["location"]["lon"]),
        popup = station,
        radius=5, fill_color="orange", fill_opacity=0.4, color="black", weight=1,
    ).add_to(map)


for station in by_platform["smart_citizen_kit"]:
    folium.CircleMarker(
        location=(station["location"]["lat"], station["location"]["lon"]),
        popup = station,
        radius=5, fill_color="blue", fill_opacity=0.4, color="black", weight=1,
    ).add_to(map)
map

In [None]:
[station["name"] for station in by_platform["acronet"]][:10] + [f"... {len(by_platform["acronet"])} in total"]

In [None]:
from random import choice

example_station = choice(recent_stations)
example_station

### Data granules corresponging to an acronet station

In [None]:
data_granules = session.get(url + "list", params = example_station["mars_request"]).json()
datetimes = [(g["mars_request"]["date"], g["mars_request"]["time"]) for g in data_granules]
assert len(datetimes) == len(set(datetimes))
datetimes

In [None]:
import folium
p = (example_station["location"]["lat"], example_station["location"]["lon"])
map = folium.Map(location=p, zoom_start=13)

# Add the point as a marker
folium.Marker(
    location=p,
    popup = example_station,
).add_to(map)
map

Giving the "mars_request" to the `list` endpoint gives the list of data granules available from the list endpoint. For the acronet data and other continuous streams, there is an additional key "time" which gives the begining of the interval in the format HHMM

In the MARS system one needs to deal with these separate `date` and `time` fields but for convenience two special arguments are added to the API: `start_time` and `end_time`, these ISO formatted fields will be used to return data granules within the defined temporal region.

* If `start_time` or `end_time` are supplied, `date` and `time` are ignored. 
* If `start_time` or `end_time` fall within the boundary of a data chunk, the whole chunk will be returned.
* If only one of `start_time` or `end_time` are supplied, the other is ignored.
  
* If neither `start_time` nor `end_time` are supplied, `date` and `time` will be used following the typical MARS language.

The `list` endpoint is not necessary, but can be useful to retrieve the available chunks for a mars request. The arguments to `list` and `retrieve` are the same, list returns what data granules exist and `retrieve` returns the data granules concatenated together.

### Getting data for an acronet station

In [None]:
example_station["mars_request"]

In [None]:
from io import BytesIO
example_station = by_platform["acronet"][-1]
now = datetime.now(UTC)
args = {
    "start_time" : (now - timedelta(hours = 5)).isoformat(),
    "end_time": (now - timedelta(hours = 0)).isoformat(),
}

granules = session.get(url + "list", params = example_station["mars_request"] | args).json()
data = session.get(url + "retrieve", params = example_station["mars_request"] | args | dict(format = "csv"))

if not data.ok:
    print("error: ", data.json())
else:
    df = pd.read_csv(BytesIO(data.content))
    df

In [None]:
df.columns

In [None]:
set(g["mars_request"]["internal_id"] for g in granules)
granules[-3:]

### Plot showing data granule boundaries and station time span along with data

Any missing granules will show up as white areas

- Green vertical lines: data granule boundaries
- Green shaded: places we have data
- Black vertical lines: Station time span
- Red vertical lines: User timespan request

In [None]:
show_granule_boundaries = True

df["datetime"] = pd.to_datetime(df["datetime"])

ys_to_plot = df.select_dtypes(include='number').columns

axes = df.plot(
    x = "datetime",
    y = ys_to_plot,
    subplots=True, layout=(len(ys_to_plot), 1), figsize=(8, 1*len(ys_to_plot)), sharex=True, rot=90)

for ax in axes[:, 0]:
    if show_granule_boundaries:
        for g in granules:
            date, time = g["mars_request"]["date"], g["mars_request"]["time"]
            dt = datetime.strptime(date + time, "%Y%m%d%H%M")
            dt2 = dt + timedelta(minutes = 60)
            ax.axvline(dt, linestyle = "dotted", color = "green")
            ax.axvline(dt2, linestyle = "dotted", color = "green")
            ax.axvspan(dt, dt2, alpha = 0.1, color = "green")

    ax.axvline(datetime.now(UTC), linestyle = "dotted", color = "black")

    # for d in example_station["time_span"].values():
    #     ax.axvline(datetime.fromisoformat(d), linestyle = "dotted", color = "black")

    # ax.axvline(args["start_time"], linestyle = "dotted", color = "red")
    # ax.axvline(args["end_time"], linestyle = "dotted", color = "red")
    



plt.tight_layout()