In [1]:
# Allows code to live reload
%load_ext autoreload
%autoreload 2

## 1. Install
```sh
# Install prerequisites
pip install pyodc

```

## 2. Make an ECMWF account
- Go to ecmwf.int/, click login at the top right and click register to make a new account.
- Once logged in, go to api.ecmwf.int/v1/key/ to get your key. 
- Put it in `~/.ecmwfapirc` as directed.

In [9]:
# Load in the ECMWF token 
from pathlib import Path
import json
import requests
from IPython.display import JSON
from datetime import datetime as dt
from datetime import timedelta, timezone
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

with open(Path("~/.ecmwfapirc").expanduser(), "r") as f:
    api_creds = json.load(f)

print("Checking API credentials")
r = requests.get(f"https://api.ecmwf.int/v1/who-am-i?token={api_creds['key']}")
if r.status_code == 403: print("Your credentials are either wrong or need to be renewed at https://api.ecmwf.int/v1/key/")
r.raise_for_status()
JSON(r.json())

Checking API credentials


<IPython.core.display.JSON object>

In [10]:
session = requests.Session()
session.headers["Authorization"] = f"Bearer {api_creds['key']}"

In [11]:
url = "https://ionbeam-dev.ecmwf.int/api/v1/"

## Metadata Catalogue

These endpoints are served separately from the data, which is served through polytope.

The stations endpoint returns an object for each station and track in the database. It contains the requested information.

Expected output:
```json
{
    "external_id": "62ab72c11d8e11061d32002a",
    "platform": "meteotracker",
    "location_point": "POINT (8.65711845 44.3934191)",
    "location_bbox": "POLYGON ((8.675219 44.386031, 8.675219 44.4008072, 8.6390179 44.4008072, 8.6390179 44.386031, 8.675219 44.386031))",
    "location_hull": "POLYGON ((8.6391694 44.386031, 8.6391037 44.3860898, 8.6390179 44.3863223, 8.6391489 44.3864548, 8.6393916 44.3866083, 8.6636556 44.3988833, 8.6672147 44.4002499, 8.6676504 44.4003547, 8.6690215 44.4005383, 8.6708526 44.4007344, 8.6736574 44.4008072, 8.6742229 44.4007635, 8.6746885 44.4006371, 8.6750503 44.4005098, 8.675219 44.400353, 8.6750815 44.4002186, 8.6655395 44.3914033, 8.665203 44.3911424, 8.664728 44.3910255, 8.6391694 44.386031))",
    "start_time": "2022-06-16T18:13:15Z",
    "stop_time": "2022-06-16T18:21:09Z",
    "authors": [
        {
            "name": "genova_living_lab_1",
            "description": null,
            "url": null
        },
        {
            "name": "meteotracker",
            "description": null,
            "url": null
        }
    ],
    "sensors": [
        {
            "name": "meteotracker Sensor",
            "description": "A placeholder sensor that is likely composed of multiple physical devices.",
            "url": null,
            "properties": [
                {
                    "key": "air_pressure_near_surface",
                    "name": "air_pressure_near_surface",
                    "unit": "Pa",
                    "description": null,
                    "url": null
                },
...
```

In [12]:
stations = session.get(url + "stations").json()
print(f"{len(stations) = }")
# print(json.dumps(stations[0], indent = 4))

len(stations) = 3853


## Requesting all the observed parameters from a station

Each station is associated with one or more sensor objects which each measure one or more observed property.

In [13]:
from datetime import datetime

# Obtain a station of interest
station = stations[-1]

for sensor in station["sensors"]:
    for property in sensor["properties"]:
        print(property)

{'key': 'track_id', 'name': 'track_id', 'unit': None, 'description': 'A unique identifer for a single recorded track from a mobile sensor.', 'url': None}
{'key': 'air_pressure_near_surface', 'name': 'air_pressure_near_surface', 'unit': 'Pa', 'description': None, 'url': None}
{'key': 'relative_humidity_near_surface', 'name': 'relative_humidity_near_surface', 'unit': '%', 'description': None, 'url': None}
{'key': 'solar_radiation_index', 'name': 'solar_radiation_index', 'unit': '1', 'description': None, 'url': None}
{'key': 'vertical_temperature_gradient', 'name': 'vertical_temperature_gradient', 'unit': '°C/100m', 'description': None, 'url': None}
{'key': 'air_temperature_near_surface', 'name': 'air_temperature_near_surface', 'unit': 'K', 'description': None, 'url': None}
{'key': 'time', 'name': 'time', 'unit': None, 'description': 'The time that the observation was made.', 'url': None}
{'key': 'offset_tz', 'name': 'offset_tz', 'unit': None, 'description': 'Uncertain what this means in 

## Retrieving data

To get data we need to construct a **MARS request** for the data and submit it via **polytope**. 

In this case the three pieces of information we need are: `platform`, `observation_variable` and a one hour time window of interest referred to by an ISO 8601 `datetime`. The data returned will correspond to the one hour window that the supplied datetime falls into if one is supplied.

The `format` keyword may be added to specify the return type as either "csv", "json" or "odb".

The `filter` keyword takes as an argument an sql query that must be directed at a table called "result". We use it here to return only rows that correspond to the station of interest.

### Setup the python frontend for the Polytope REST API

In [14]:
# Polytope can also be used as a pure REST API, instructions here: https://polytope-client.readthedocs.io/en/latest/client/rest_api.html
from polytope.api import Client
import pandas as pd
from io import BytesIO

polytope_client = Client(address='polytope-test.ecmwf.int', 
                verbose=False, insecure=False, 
                user_email = api_creds["email"], 
                user_key = api_creds["key"])

### Obtain a single data granule in its entirety

In [8]:
# Obtain a particular station by id


station = max(stations, key = lambda s : dt.fromisoformat(s["stop_time"]))

source_id = station["external_id"]
platform = station["platform"]
parameter = "air_temperature_near_surface"

start_time = datetime.fromisoformat(station["start_time"])
stop_time = datetime.fromisoformat(station["stop_time"])
timespan = stop_time - start_time

print(f"""
{source_id = }
{platform = }
{parameter = }
{start_time = }
{stop_time = }
{timespan = }
""")

mars_request = {
    'project' : 'public',
    'platform' : 'meteotracker',
    'variable' : parameter,
    'datetime' : start_time,
    'format' : 'csv' # json, csv or odb
}

data = polytope_client.retrieve('iot', mars_request)
print(f"{len(data) = }")
df = pd.read_csv(BytesIO(data[0]))

for column in df.columns:
    if df[column].nunique() < 10: print(f"{column} unique entries: {df[column].unique()}")

df

NameError: name 'dt' is not defined

In [None]:
geo_df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=4326)

geo_df.explore(column = "altitude")

## Filtering data granules with SQL queries

In [None]:
mars_request = {
    "project" : "public", # This key will be used for access control and will change to 'ichange' in future versions
    "platform" : platform,
    "observation_variable" : parameter,
    "datetime": start_time.isoformat(),
    "filter" : f"select * from result where source_id = '{source_id}';",
    "format" : "csv",
}

print(mars_request)


data = polytope_client.retrieve('iot', mars_request)
print(f"{len(data) = }")

df = pd.read_csv(BytesIO(data[0]))
# assert(list(df["source_id"].unique()) == [source_id,])

df

In [None]:
mars_request = {
    "project" : "public",
    "platform" : platform,
    "observation_variable" : parameter,
    "datetime": start_time.isoformat(),
    "filter" : f"select * from result where source_name like 'genova_living_lab%';",
    "format" : "csv",
}

data = polytope_client.retrieve('iot', mars_request)
print(f"{len(data) = }")

df = pd.read_csv(BytesIO(data[0]))
df

## Direct REST API


In [None]:
mars_request = {
    "project" : "public", # This key will be used for access control and will change to 'ichange' in future versions
    "platform" : platform,
    "observation_variable" : 'air_temperature_near_surface',
    "datetime": start_time.isoformat(),
    "filter" : f"select * from result where source_id = '{source_id}';",
    "format" : "csv",
}

list_resp = session.get(url + "list", params = mars_request)
list_resp.json()

In [None]:
data = session.get(url + "retrieve", params = mars_request)

In [None]:
data.content