# Dataviews with SDS: Grouping  

Still working with the Deschutes Brewery dataset, this nootebook explains how grouping concept for dataviews allows the construction of CSV table from multiple similar assets. 

## Imports 

In [1]:
# To run this notebook outside Academic JupyterHub: pip install requests-futures
# 
# For parallel HTTP requests
from concurrent.futures import ThreadPoolExecutor
from requests_futures.sessions import FuturesSession
import requests
# Pandas dataframe
import pandas as pd
import io, datetime, json

## Tenant and client credentials

### NOTE: this info should be abstracted when we deploy for a course (TBD)

In [2]:
# Tenant is OSIsoft Samples, use your own credentials
tenant_id = '4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93'
client_id = 'ff8220f7-6b7c-4477-b21e-8e2ca20649d4'  # input('client_id:')  # 
client_secret = 'tRiVPtWc6kgcxEw090Qi/7nwA+JfI4cLlaL34Edgx+M='  # input('client_secret:')  # 

## Get an the autorization token and define the http for upcoming requests

In [3]:
# Request a fresh authorization bearer token 
authorization = requests.post('https://login.microsoftonline.com/%s/oauth2/token' % tenant_id,
                              data={'grant_type': 'client_credentials',
                                    'client_id': client_id,
                                    'client_secret': client_secret,
                                    'resource': 'https://pihomemain.onmicrosoft.com/ocsapi'
                                   })

In [4]:
# Required headers for SDS endpoint
headers = {'Authorization': 'bearer %s' % authorization.json()['access_token'],
           'Content-type': 'application/json',
           'Accept': 'text/plain',
           'Request-Timeout': '120000' }
headers

{'Authorization': 'bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IndVTG1ZZnNxZFF1V3RWXy1oeFZ0REpKWk00USIsImtpZCI6IndVTG1ZZnNxZFF1V3RWXy1oeFZ0REpKWk00USJ9.eyJhdWQiOiJodHRwczovL3BpaG9tZW1haW4ub25taWNyb3NvZnQuY29tL29jc2FwaSIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzRmYTg1ZGY0LTlmNWEtNDlmOC05NTRmLWRjZjBkNmUxZmY5My8iLCJpYXQiOjE1NDQ0OTY3OTQsIm5iZiI6MTU0NDQ5Njc5NCwiZXhwIjoxNTQ0NTAwNjk0LCJhaW8iOiI0MlJnWUhBc21YSm9xdWFaR1BuRnNyODNKZjBWQmdBPSIsImFwcGlkIjoiZmY4MjIwZjctNmI3Yy00NDc3LWIyMWUtOGUyY2EyMDY0OWQ0IiwiYXBwaWRhY3IiOiIxIiwiaWRwIjoiaHR0cHM6Ly9zdHMud2luZG93cy5uZXQvNGZhODVkZjQtOWY1YS00OWY4LTk1NGYtZGNmMGQ2ZTFmZjkzLyIsIm9pZCI6Ijc2MWNmMTdiLTAwMTctNGFlNy04MWE2LWZiNDc3YzNjZjAwMCIsInJvbGVzIjpbIm9jc19hY2Nlc3MiXSwic3ViIjoiNzYxY2YxN2ItMDAxNy00YWU3LTgxYTYtZmI0NzdjM2NmMDAwIiwidGlkIjoiNGZhODVkZjQtOWY1YS00OWY4LTk1NGYtZGNmMGQ2ZTFmZjkzIiwidXRpIjoiajNlbUdvZHY4RWUwLVlvVHBLV0xBQSIsInZlciI6IjEuMCJ9.IBZL_6RSaKiETC5imIi87H1I5nlNcF8i5eU4USQxfD11-fz65Lyk-bbRMh-7kaprvnHVX5kJHLwoQuq4BcCiiJZD7Zo4S-Kq0e76pezjASvjkrV4

## Endpoint on MAIN cluster, namespace Brewing (data from Deschutes)

In [5]:
# Endpoint for dataview access
version = 30
endpoint = 'https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/'

In [6]:
def or_clause(start, number=1):
    return { "Or" : [ { "Type": "StreamName", "Operator": "Contains", "Value": "%d" % i} for i in range(start, start+number)]}
# test
print(or_clause(31,4))

{'Or': [{'Type': 'StreamName', 'Operator': 'Contains', 'Value': '31'}, {'Type': 'StreamName', 'Operator': 'Contains', 'Value': '32'}, {'Type': 'StreamName', 'Operator': 'Contains', 'Value': '33'}, {'Type': 'StreamName', 'Operator': 'Contains', 'Value': '34'}]}


## Generate a JSON for a dataview

Here: 20 days worth of data for one fermenter, interpolated at 1 minute interval  

**TODO**: Documentation for dataview available at: TBD

**TODO**: show how data is stored in SDS before getting to this JSON  

In [7]:
# Grouping
# 
def generate_dataview_def(number_of_fermentors, version):
    dataview_id = 'fermentors_1min_group_num%d_v%d' % (number_of_fermentors, version)
    return dataview_id, {
        "Id": "%s" % dataview_id,
        "Queries": [
        {   
            "Id": "Fermentor",
            "Query": {
                "And": [ 
                    { "Type": "StreamName", "Value": "Fermentor", "Operator": "Contains" },
                    or_clause(31, number_of_fermentors)
                ]   
            }
        }
    ],
    "GroupRules": [
        {   
            "Id": "StreamName",
            "Type": "StreamName",
            "TokenRules":
            {
                "Tokens": [ "{number}" ],
                "Patterns": [
                    {
                        "QueryId": "*",
                        "Value": "Fermentor {number}"
                    }]
            }
        }
    ],
    "IndexDataType": "DateTime",
    "IndexConfig": {
            "IsDefault": False,
            "StartIndex": "2017-03-18T00:00:00Z",
            "EndIndex": "2017-03-19T00:00:00Z",
            "Mode": "Interpolated",
            "Interval": "00:01:00"
        }, 
    "Mappings": { "IsDefault": True }
   }

## Creation of the Group Dataviews, grouping fermentors 31 up to 36

In [8]:
dataviews = []
for n in range(1, 4): # 7 
    dataview_name, dataview_def = generate_dataview_def(n, 42)
    dataviews.append(dataview_name)
    response = requests.post(endpoint + dataview_name, headers=headers, json=dataview_def)
    print(response.status_code, dataview_name, response.text[:160]+'...')

409 fermentors_1min_group_num1_v42 {"OperationId":"fb14a038-b06b-4509-b9cf-493dfdbe9ddd","Error":"Data view with specified id already exists.","DataViewId":"fermentors_1min_group_num1_v42"}...
409 fermentors_1min_group_num2_v42 {"OperationId":"4bfb7e75-ff3d-4be0-97db-e66e43a0ef62","Error":"Data view with specified id already exists.","DataViewId":"fermentors_1min_group_num2_v42"}...


409 fermentors_1min_group_num3_v42 {"OperationId":"70690915-bca4-423f-8f99-490af88038ad","Error":"Data view with specified id already exists.","DataViewId":"fermentors_1min_group_num3_v42"}...


## List of Dataviews URLs 

In [9]:
# We want 20 days of data worth at 1 minute interval, for fermenter 31 up to 36
dataviews_url = [endpoint + '%s/preview/interpolated?form=csvh' % d for d in dataviews]
dataviews_url

['https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermentors_1min_group_num1_v42/preview/interpolated?form=csvh',
 'https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermentors_1min_group_num2_v42/preview/interpolated?form=csvh',
 'https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermentors_1min_group_num3_v42/preview/interpolated?form=csvh']

In [10]:
for dv in dataviews_url:
    t0 = datetime.datetime.now()
    r = requests.get(dv, headers=headers)
    df = pd.read_csv(io.StringIO(r.text), parse_dates=['_time'])
    df.info(max_cols=2)
    print('### Requests completed in', datetime.datetime.now() - t0, 'with status:', r.status_code, 'for DV:', dv, '###') 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1441 entries, 0 to 1440
Columns: 17 entries, _time to ADF
dtypes: datetime64[ns](1), float64(9), int64(4), object(3)
memory usage: 191.5+ KB
### Requests completed in 0:00:01.064962 with status: 200 for DV: https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermentors_1min_group_num1_v42/preview/interpolated?form=csvh ###


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2882 entries, 0 to 2881
Columns: 17 entries, _time to ADF
dtypes: datetime64[ns](1), float64(10), int64(3), object(3)
memory usage: 382.8+ KB
### Requests completed in 0:00:04.689856 with status: 200 for DV: https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermentors_1min_group_num2_v42/preview/interpolated?form=csvh ###


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4323 entries, 0 to 4322
Columns: 17 entries, _time to ADF
dtypes: datetime64[ns](1), float64(10), int64(3), object(3)
memory usage: 574.2+ KB
### Requests completed in 0:00:11.877725 with status: 200 for DV: https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermentors_1min_group_num3_v42/preview/interpolated?form=csvh ###


### List of column names with their type

Note that the `_time` column has the correct datetime panda data type 

In [11]:
for c in df.columns:
    print(c, df[c].dtype)

_time datetime64[ns]
StreamName_{number} int64
Quality_percent int64
Volume_barrel float64
Top_TIC_PV_degree Fahrenheit float64
Top_TIC_OUT_percent float64
Status object
Plato float64
Middle_TIC_PV_degree Fahrenheit float64
Middle_TIC_OUT_percent float64
FV_Full_Plato float64
Fermentation_ID object
Brand object
Bottom_TIC_PV_degree Fahrenheit float64
Bottom_TIC_OUT_percent float64
Bottom_Temperature_degree Fahrenheit int64
ADF float64


### List of unique Fermentation ID

We want to track the Apparent Degree of Fermentation (ADF) per fermentation batch tracked by ID 

In [12]:
for f in df.Fermentation_ID.unique():
    print(f, isinstance(f, str))

Fermentor 31201731179653 True
nan False
FV322016113055113 True
Fermentor 33201731511870 True
Fermentor 3320173183371 True


### Prepare ADF curve plots over time 

In [13]:
import plotly.graph_objs as go

figs = []
data = []
for f in df.Fermentation_ID.unique():
    trace = go.Scattergl(x = df[df.Fermentation_ID == f]['_time'], y = df[df.Fermentation_ID == f]['ADF'], mode='lines+markers', name=str(f))
    figs.append(go.FigureWidget(data=[trace]))
    data.append(trace)

### Add a range slider 

With a few time range selectors: 8 hours, 1 day and everything 

Note: range slider is grey now because of an incompatibility with ScatterGL: https://github.com/plotly/plotly.js/issues/2627

In [14]:
layout = dict(
    title='Brewing ADF with time range slider',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=8,
                     label='8h',
                     step='hour',
                     stepmode='backward'),
                dict(count=1,
                     label='1d',
                     step='day',
                     stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(
            visible = True
        ),
        type='date'
    )
)
        
fig = go.FigureWidget(data=data, layout=layout)
fig

FigureWidget({
    'data': [{'mode': 'lines+markers',
              'name': 'Fermentor 31201731179653',
      …

## Clean up: delete Dataviews  

* Code 204 if deletion is successful
* Code 404 if requested Dataview Id doesn't exist or already deleted

In [15]:
for dv in dataviews_url:
    dv_url = dv[:dv.find('/preview')]
    s = requests.delete(dv_url, headers=headers)
    print(s.status_code, dv_url)

204 https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermentors_1min_group_num1_v42
204 https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermentors_1min_group_num2_v42
204 https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermentors_1min_group_num3_v42
