In [None]:
%load_ext autoreload
%autoreload 2

## The Sensing Entities
The entities of the SensorThings API's Sensing component are illustrated in the following figure.

<img src="img/STA_schema.png" width="800">

To maintain proper references between elements, you must create them in a specific order:

1. Thing
2. Location
3. ObservedProperty
4. Sensor
5. Datastream
6. Observation (+FeatureOfInterest)



# STA API time-travel extension
This extension assists istSTA users in accessing historical time travel data. It enables users to retrieve information from a web service as it appeared at a specific moment in time, using a new query parameter called **as_of**.

Additionally, it introduces a new entity called Commit, which enables data lineage, allowing users to trace data changes. 
From a scientific perspective, this extension enables FAIR data management by allowing datasets to be permanently cited. This is achieved by using a combination of the service address (<font color='red'>in red</font>), the request that returns the dataset (<font color='green'>in green</font>), and the dataset's status at a specific time instant (<font color='orange'>in orange</font>) as a Persistent Identifier for reference.

Example: <font color='red'>https://&lt;base_url&gt;/&lt;version&gt;/</font><font color='green'>&lt;entity&gt;?$expand=&lt;entity&gt;</font><font color='orange'>&\$as_of=&lt;date_time&gt;</font>

## Definition
The *time-travel* extension adds the following optional query parameters to any STA request:

| Parameter | Type               | Description                                                                       |
| --------- | ------------------ | --------------------------------------------------------------------------------- |
| *as_of*   | ISO 8601 date-time | a date-time parameter to specify the exact moment for which the data is requested |
| *from_to* | ISO 8601 period    | a period parameter to specify the time interval for which the data is requested   |

The *time-travel* extension introduces a new entity, Commit, with the following properties:

| Properties     | Type               | Multiplicity and use | Description                                                                    |
| -------------- | ------------------ | -------------------- | ------------------------------------------------------------------------------ |
| *author*       | string             | One (mandatory)      | Authority, Username or User Profile Link                                       |
| *encodingType* | string             | One (optional)       | The encoding type of the message (default is `text`).                          |
| *message*      | string             | One (mandatory)      | Commit message detailing the scope, motivation, and method of the transaction. |
| *actionType*   | string             | One (mandatory)      | The action typer that applied the modifications (INSERT, UPDATE or DELETE)     |
| *date*         | ISO 8601 date-time | One (mandatory)      | A date-time that specifies the exact moment when the commit was executed.      |

Commits are related to SensorThings API entities with a one-to-zero-or-one (1:0..1) relationship.

### Preliminary Steps

This section contains the preliminary steps to set up the base URL, headers, and import necessary libraries.

In [None]:
!pip install saqc
!pip install folium

In [None]:
import json
import utils as st
from IPython.display import display, Markdown
from datetime import datetime, timedelta
import os
import random
import logging

# LOG CONFIGURATIONS and FORMATTING
DEFAULT_FORMAT = (
    "[%(levelname)1.1s %(asctime)s %(module)s:%(lineno)d] %(message)s"
)
DEFAULT_FORMAT = "[%(levelname)1.1s %(asctime)s] %(message)s"
formatter = logging.Formatter(fmt=DEFAULT_FORMAT)

# Get the root logger
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)  # Set the global logging level

# Configure the console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(formatter)

# Add the handler to the root logger
if not root_logger.handlers:  # Avoid adding multiple handlers
    root_logger.addHandler(console_handler)
# Reinitialize logging to enforce your configuration
logging.getLogger().handlers = []  # Clear any existing handlers
logging.basicConfig(
    level=logging.DEBUG,
    format=DEFAULT_FORMAT,
    handlers=[console_handler],
)
# base url
base_url = "https://istsos.org/v4/v1.1/"

In [None]:
STA = st.sta(base_url, verbose=True)

### 1. Create an account and get the TOKEN

Answer to the questions, better in lowercase. Please do not spaces or spacial chars like (e.g. *ç" etc.)

In [None]:
# Set your name to have a prefix to had to the station name
username = input("Enter your username: ")
password = input("Enter your password: ")
uri = input("Enter your uri (e.g. ORCID): ")
if STA.create_user(username, password, uri=uri, role="editor"):
    token_obj = STA.get_token(username, password)
    logging.info(f"Token expires at: {token_obj['expires_in']} s")
# Set the prefix
prefix = username + "-"
logging.info("Your station name will be prefixed with: " + prefix)

### Creating multiple related entities with deep insert

In [None]:
# 46.504235, 8.509085
# 46.507165, 8.513438

coords = [
    random.choice([8.509085, 8.513438]),
    random.choice([46.504235, 46.507165]),
]
body = {
    "description": "Meteo station recording temperature, humidity, and pressure",
    "name": prefix + "BED",
    "properties": {
        "keywords": "weather,station,temperature,humidity,pressure",
        "description": "Meteo station recording temperature, humidity, and pressure",
    },
    "Locations": [
        {
            "description": "Location of the BED meteo station",
            "name": prefix + "BED",
            "location": {
                "type": "Point",
                "coordinates": coords,
            },
            "encodingType": "application/vnd.geo+json",
        }
    ],
    "Datastreams": [
        {
            "unitOfMeasurement": {
                "name": prefix + "Celsius degree",
                "symbol": "°C",
                "definition": "",
            },
            "description": "Temperature measurement",
            "name": prefix + "T_BED",
            "observationType": "",
            "ObservedProperty": {
                "name": prefix + "atmosphere:temperature",
                "definition": "",
                "description": "Air temperature",
            },
            "Sensor": {
                "description": "Temperature sensor",
                "name": prefix + "TempSensor",
                "encodingType": "application/json",
                "metadata": '{"brand": "SensorBrand", "type": "Temperature sensor"}',
            },
        },
        {
            "unitOfMeasurement": {
                "name": prefix + "Percentage",
                "symbol": "%",
                "definition": "",
            },
            "description": "Humidity measurement",
            "name": prefix + "H_BED",
            "observationType": "",
            "ObservedProperty": {
                "name": prefix + "atmosphere:humidity",
                "definition": "",
                "description": "Air humidity",
            },
            "Sensor": {
                "description": "Humidity sensor",
                "name": prefix + "HumiditySensor",
                "encodingType": "application/json",
                "metadata": '{"brand": "SensorBrand", "type": "Humidity sensor"}',
            },
        },
        {
            "unitOfMeasurement": {
                "name": prefix + "Millimiters",
                "symbol": "mm",
                "definition": "",
            },
            "description": "Pressure measurement",
            "name": prefix + "P_BED",
            "observationType": "",
            "ObservedProperty": {
                "name": prefix + "atmosphere:rain",
                "definition": "",
                "description": "Rain quantity",
            },
            "Sensor": {
                "description": "Pluviometer sensor",
                "name": prefix + "PluviometerSensor",
                "encodingType": "application/json",
                "metadata": '{"brand": "SensorBrand", "type": "Pluviometer sensor"}',
            },
        },
    ],
}

STA.create_thing(body, commit_message="Create the BED station")

### Insert Observations from CSV

In [None]:
files = os.listdir("data")
for _file in files:
    logging.info("data" + os.sep + f"{_file}")
    datastream_name = _file.split(".")[0]
    dt = STA.query_api(
        "Datastreams", {"$filter": f"name eq '{prefix}{datastream_name}'"}
    )
    STA.csv2sta("data" + os.sep + f"{_file}", dt[0]["@iot.id"], max_rows=30000)

In [None]:
# GET the things based on the prefix using the function startswith


thing = STA.query_api(
    "Things", {"$filter": f"startswith(name,'{prefix}')"}
)
thing_id = thing[0]["@iot.id"]
json_data = (json.dumps(thing, indent=2))
md = f"```json\n{json_data}\n```"
display(Markdown(md))

### Update with time-travel extension

#### Retrieve Thing (With the as_of value set to the date of the request)

#### Update Thing

In [None]:
body = {
    "description": "Meteo station recording temperature, humidity, and rain",
    "properties": {
      "keywords": "weather,station,temperature,humidity,rain",
      "description": "Meteo station recording temperature, humidity, and rain"
    }
}
datetime_update = datetime.now()
STA.update_thing(thing_id, body, commit_message="updated description")

#### Retrieve Thing with Commit properties

In [None]:
r = STA.query_api(f"Things({thing_id})", {"$expand": "Commit"})
md = f"```json\n{json.dumps(r, indent=2)}\n```"
display(Markdown(md))

#### Retrieve Thing at a specific instant (with the as_of value set to one minute prior to the update date)

In [None]:
tmp_datetime = datetime_update - timedelta(hours=1, seconds=1)
tmp_datetime = tmp_datetime.isoformat() + "Z"

r = STA.query_api(
    f"Things({thing_id})",
    {"$expand": "Commit", "$as_of": tmp_datetime}
)
md = f"```json\n{json.dumps(r, indent=2)}\n```"
display(Markdown(md))

### FROM SensorThings API to Dataframe

In [None]:
# Copyright (c) 2024 SUPSI
# 
# This software is released under the MIT License.
# https://opensource.org/licenses/MIT

dfs, qcs = STA.get_dfs_by_datastreams(f"startswith(name,'{prefix}')")

### Plot Observations

In [None]:
for df in dfs:
    dfs[df].plot(
        y="result",
        title=df,
        figsize=(10, 4),
        grid=True,
        ylabel=f"{dfs[df]['ylabel'][0]}"
    )

### Some quality checks examples

In [None]:
for _qc in qcs:
    if "-T_" in _qc:
        logging.info(f"QC for {_qc} datastream temperature")
        qc = (
            qcs[_qc]
            .flagMissing("result", flag=90) # flag missing values
            .flagConstants("result", thresh=0.1, window="1D", flag=91) # flag constant values
            .flagZScore("result", window="1D", flag=92) # flag z-score
            .flagRange("result", min=-20, max=50, flag=93) # flag range
        )
        dfs[_qc].loc[qc.flags["result"] == 90, "resultQuality"] = 90
        dfs[_qc].loc[qc.flags["result"] == 91, "resultQuality"] = 91
        dfs[_qc].loc[qc.flags["result"] == 92, "resultQuality"] = 92
        dfs[_qc].loc[qc.flags["result"] == 93, "resultQuality"] = 93
        qc.plot(
            "result",
            ax_kwargs={
                "ylabel": f"Temperature (°C)",
            },
        )
        dfs[_qc].update
    elif "-H_" in _qc:
        logging.info(f"QC for {_qc} datastream humidity")
        qc = (
            qcs[_qc]
            .flagMissing("result", flag=90)
            .flagConstants("result", thresh=0.1, window="1D", flag=91)
            .flagZScore("result", window="1D", flag=92)
            .flagRange("result", min=0, max=100, flag=93)
        )
        dfs[_qc].loc[qc.flags["result"] == 90, "resultQuality"] = 90
        dfs[_qc].loc[qc.flags["result"] == 91, "resultQuality"] = 91
        dfs[_qc].loc[qc.flags["result"] == 92, "resultQuality"] = 92
        dfs[_qc].loc[qc.flags["result"] == 93, "resultQuality"] = 93
        qc.plot(
            "result",
            ax_kwargs={
                "ylabel": f"Humidity (%)",
            },
        )
    else:
        logging.info(f"QC for {_qc} datastream rain")
        qc = (
            qcs[_qc]
            .flagMissing("result", flag=90)
            .flagRange("result", min=0.2, max=0.2, flag=91)
        )
        dfs[_qc].loc[qc.flags["result"] == 90, "resultQuality"] = 90
        dfs[_qc].loc[qc.flags["result"] == 91, "resultQuality"] = 91
        qc.plot(
            "result",
            ax_kwargs={
                # "figsize": (10, 4),
                # "grid": True,
                "ylabel": f"Rain (mm)",
            },
        )

### Plots quality flags for each Datastream

In [None]:
for _df in dfs:
    dfs[_df].plot(y="resultQuality", title=_df, figsize=(10, 4), grid=True)

#### Update Observation outlier

In [None]:
datetime_obs_update = datetime.now()
for df in dfs:
    for row in dfs[df].loc[dfs[df]["resultQuality"]<100].itertuples():
        body = {
            "resultQuality": str(row[8])
        }
        STA.update_observation(
            row[1],
            body,
            commit_message="Quality flagging"
        )
    print(f"Observations of Datastream {df} updated")

#### Retrieve Observation after update

In [None]:

for df in dfs:
    for row in dfs[df].loc[dfs[df]["resultQuality"]<100].itertuples():
        response = STA.query_api(f"Observations({row[1]})?$expand=Commit", travel_time=True)
        json_data = json.dumps(response, indent=2)
        md = f"```json\n{json_data}\n```"
        display(Markdown(md))
        break
    break

#### Retrieve Observation outlier at a specific instant (with the as_of value set to one second prior to the update date)

In [None]:
datetime_before_update = datetime_obs_update - timedelta(hours=1, seconds=1)
datetime_before_update = datetime_before_update.isoformat() + "Z"
for df in dfs:
    for row in dfs[df].loc[dfs[df]["resultQuality"] < 100].itertuples():
        response = STA.query_api(
            f"Observations({row[1]})?$expand=Commit&$as_of={datetime_before_update}", travel_time=True
        )
        json_data = json.dumps(response, indent=2)
        md = f"```json\n{json_data}\n```"
        display(Markdown(md))
        break
    break

#### Plot Observations after update

In [None]:
for df in dfs:
    dfs[df].loc[dfs[df]["resultQuality"] >= 100].plot(
        y="result",
        title=df,
        figsize=(10, 4),
        grid=True,
        ylabel=f"{dfs[df]['ylabel'][0]}",
    )
    dfs[df].loc[dfs[df]["resultQuality"] < 100].plot.scatter(
        x="phenomenonTime",
        y="result",
        grid=True,
    )

#### Retrieve Observation outlier within a time interval (between the date prior to the update and the date following it)

In [None]:
# response = requests.get(
#     f"{base_url}Observations?$filter=id eq {observation_id}&$from_to={datetime_before_update}, {datetime_obs_update}"
# )
# json_data = json.dumps(response.json(), indent=2)
# md = f"```json\n{json_data}\n```"
# display(Markdown(md))

datetime_obs_update_str = datetime.now().isoformat() + "Z"
datetime_before_update = (datetime.now() - timedelta(hours=24, seconds=1)).isoformat() + "Z"
for df in dfs:
    for row in dfs[df].loc[dfs[df]["resultQuality"] < 100].itertuples():
        response = STA.query_api(
            f"Observations?$filter=id eq {row[1]}&$from_to={datetime_before_update}/{datetime_obs_update_str}",
            travel_time=True,
        )
        json_data = json.dumps(response, indent=2)
        md = f"```json\n{json_data}\n```"
        display(Markdown(md))

In [None]:
# Copyright (c) 2024 SUPSI
# 
# This software is released under the MIT License.
# https://opensource.org/licenses/MIT



In [None]:

STA.get_token(username, password)

t = STA.query_api("Things?$expand=Locations")
STA.map_things(t)