# How to work with istSOS
## An example of basic data consistency checks

In the next cell we are going to:

* load the autoreload extension useful to keep update the external code library
* activated the autoreload
* activate the plot of graph directly inside jupyter

In [None]:
%load_ext autoreload
%autoreload 2

import sys
import matplotlib
%matplotlib inline

try:
    from oatlib import sensor, oat_utils
except ImportError:
    !pip install oatlib
    from oatlib import sensor, oat_utils

### ⚒ Now we load some other libraries needed by this notebook

In [None]:
import requests
from dateutil import parser
import matplotlib.pyplot as plt
from statistics import stdev
import json

### ⚒ Set istSOS variables:

In [None]:
server = 'http://istsos.org/istsos'
service = 'demo'
user = None
pwd = None

### ⚒ List all the procedures

In [None]:
procedures = oat_utils.get_istSOS_Procedures(server, service, user, pwd)

The variable procedures is now a **LIST** of **DICTIONARIES**.   
Each object contains the information of a procedure such as:
*sensortype, name, assignedid, offerings, id,
samplingTime, observedproperties, description.*

### ⚒ Let's print some information on procedures

In [None]:
print(f"Number of the procedures fetched: {len(procedures)}")
print(f"\nKeys for each procedure: {procedures[0].keys()}")
print(f'\nNames of the procedures fetched:')
for procedure in procedures:
    print('\t' + procedure['name'])

### ⚒ Now we are going to choose one procedure from the list above
to retrieve the ***time_interval*** of the data and the observed properties

In [None]:
procedure = oat_utils.filter_procedures(procedures,'name','=','T_LUGANO')[0]
procedure

### ⚒ Take a look at the specific procedure properties

In [None]:
time_interval = '/'.join([
                         procedure['samplingTime']['beginposition'],
                         procedure['samplingTime']['endposition']
                        ])

print(f'Time interval: {time_interval}')

# the variable observed_properties is a list of dict
observed_properties = procedure['observedproperties']

print(
    '\nNumber of observed properties for the procedure {}: {}'.format(
        procedure['name'],
        len(observed_properties)
    )
)

print(
    '\nList of the observed properties: {}'.format(observed_properties)
)




### ⚒ Load data from *istSOS* using *OAT*:

In [None]:
event_time = '2018-05-01T00:10:00+0100/2018-06-01T02:00:00+0200'

observed_property_name = observed_properties[0]['name']
observed_property_uom = observed_properties[0]['uom']

istsos_url = '{}/{}'.format(server, service)

#basic_auth = (user, pwd)
basic_auth = None

SENSOR = sensor.Sensor.from_istsos(
    service=istsos_url,
    observed_property='temperature',
    procedure=procedure['name']
)

### ⚒ See what SENSOR looks like --> it has the metadata automatically filled

In [None]:
SENSOR

### ⚒ Now we gather the actual observations

In [None]:
SENSOR.ts_from_istsos(
    service=istsos_url,
    observed_property=':{}'.format(observed_property_name.replace('-',':')),
    procedure=procedure['name'],
    basic_auth=None,
    # aggregate_function='AVG',
    # aggregate_interval='PT10M',
    event_time=event_time
)
SENSOR

### ⚒ Plotting the TimeSeries

In [None]:
SENSOR.ts['data'].plot(legend=True, label=observed_property_name)
plt.ylabel('{} ({})'.format(observed_property_name, observed_property_uom))
plt.xlabel('Date')
plt.show()

## Quality check functions

[Click me](./qc_aws.pdf)

### ⚒ define the functions for quality checks

In [None]:
range_values = [-80, 60]

# verify that data are within a plausible range for the observed variable
def plausible_value_check( x, range_values):
    if x >= range_values[0] and x <= range_values[1]:
        return 300
    else:
        return False

# verify that data changes in consecutive steps are within 4 sigma
def time_consistency_check(x):
    if len(x) == 3:
        sum_abs_val = abs(x[1] - x[0]) + abs(x[1] - x[2])
        four_std = 4*stdev(x)
        if sum_abs_val <= four_std:
            return 400
        else:
            return False
    else:
        return False

### ⚒ apply the functions on a moving windows along the series and then update the series with the new indexes

- rolling(T) applies a moving windows of T width
- apply(X) applies to the moving window the function X

In [None]:
try:
    ts_tmp = SENSOR.ts['data'].rolling(1).apply(lambda x: plausible_value_check(x, range_values), raw=True)
    SENSOR.ts['quality'].update(ts_tmp.where(lambda x : x>0))
    ts_tmp2 = SENSOR.ts['data'].rolling('1440s').apply(lambda x: time_consistency_check(x), raw=True)
    SENSOR.ts['quality'].update(ts_tmp2.where(lambda x : x>0))
    print("Data checked.")
except:
    print("Can't check data.")

### ⚒ Uploading data to istSOS.

In [None]:
oat_utils.sensors_to_istsos(
    service=istsos_url,
    procedure=procedure['name'],
    obspro_sensor={'urn:ogc:def:parameter:x-istsos:1.0:meteo:air:temperature': SENSOR},
    how_merge='left'
)

print('Data loaded. Check istSOS.')

### ⚒ Filter values with QI == 400

In [None]:
SENSOR_400 = SENSOR.copy()
SENSOR_400.ts = SENSOR.ts[SENSOR.ts['quality'] == 400]

SENSOR_400.ts

### ⚒ Calculate statistics

In [None]:
MAX_COL = 'T_MAX'
TIME_MAX_COL = 'TIME_' + MAX_COL
MIN_COL = 'T_MIN'
TIME_MIN_COL = 'TIME_' + MIN_COL
MEAN_COL = 'T_MEAN'
COUNT = '{}_COUNT'.format(MEAN_COL)

daily_max = oat_utils.sensorStats(
    SENSOR_400, stat='max',
    column_name=MAX_COL
)
daily_min = oat_utils.sensorStats(
    SENSOR_400, stat='min',
    column_name=MIN_COL
)
daily_mean = oat_utils.sensorStats(
    SENSOR_400, stat='mean',
    column_name=MEAN_COL
)

In [None]:
daily_max

In [None]:
daily_min

### ⚒ calculate completness

In [None]:
daily_mean.ts['Completeness (%)'] = daily_mean.ts['T_MEAN_COUNT'] / 144*100
daily_mean

⚒  plot the series

In [None]:
daily_mean.ts['T_MEAN'].plot(legend=True, label='T_MEAN (celsius)')

In [None]:
daily_mean.ts['Completeness (%)'].plot(legend=True, label='Completeness (%)')