# Dataviews with SDS: Grouping  

Still working with the Deschutes Brewery dataset, this nootebook explains how grouping concept for dataviews allows the construction of CSV table from multiple similar assets. 

## Imports 

In [1]:
# To run this notebook outside Academic JupyterHub: pip install requests-futures
# 
# For parallel HTTP requests
from concurrent.futures import ThreadPoolExecutor
from requests_futures.sessions import FuturesSession
import requests
# Pandas dataframe
import pandas as pd
import io, datetime, json, datetime, random
import papermill as pm

## Next is a parameter (tagged) cell for Papermill (https://github.com/nteract/papermill)

In [2]:
# Tenant is OSIsoft Samples, use your own credentials
tenant_id = '4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93'
client_id = 'ff8220f7-6b7c-4477-b21e-8e2ca20649d4'  
client_secret = 'tRiVPtWc6kgcxEw090Qi/7nwA+JfI4cLlaL34Edgx+M='
dv_start_time = '2017-03-18T00:00:00Z'
dv_days = 1
dv_interpolation_min = 1
dv_start_num_ferm = 1
dv_end_num_ferm = 6
dv_req_timeout = 240000  # in milliseconds
version = random.randint(0, 99)

In [3]:
# Parameters
dv_days = 3
dv_req_timeout = 240000
dv_end_num_ferm = 6
version = 20


In [4]:
format = '%Y-%m-%dT%H:%M:%SZ'
start_time = datetime.datetime.strptime(dv_start_time, format)
delta_time = datetime.timedelta(days=dv_days)
dv_end_time = datetime.datetime.strftime(start_time + delta_time, format)
dv_interp_time = '00:%02d:00' % dv_interpolation_min
test_specs = {'start_time': dv_start_time, 'end_time': dv_end_time, 'interp_time': dv_interp_time, 
              'start_num_ferm': dv_start_num_ferm, 'end_num_ferm': dv_end_num_ferm, 'version': version,
              'dv_days': dv_days } 
pm.record('test_specs', test_specs)

## Get an the autorization token and define the http for upcoming requests

In [5]:
# Request a fresh authorization bearer token 
authorization = requests.post('https://login.microsoftonline.com/%s/oauth2/token' % tenant_id,
                              data={'grant_type': 'client_credentials',
                                    'client_id': client_id,
                                    'client_secret': client_secret,
                                    'resource': 'https://pihomemain.onmicrosoft.com/ocsapi'
                                   })

In [6]:
# Required headers for SDS endpoint
headers = {'Authorization': 'bearer %s' % authorization.json()['access_token'],
           'Content-type': 'application/json',
           'Accept': 'text/plain',
           'Request-Timeout': str(dv_req_timeout) }
headers

{'Authorization': 'bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Im5iQ3dXMTF3M1hrQi14VWFYd0tSU0xqTUhHUSIsImtpZCI6Im5iQ3dXMTF3M1hrQi14VWFYd0tSU0xqTUhHUSJ9.eyJhdWQiOiJodHRwczovL3BpaG9tZW1haW4ub25taWNyb3NvZnQuY29tL29jc2FwaSIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzRmYTg1ZGY0LTlmNWEtNDlmOC05NTRmLWRjZjBkNmUxZmY5My8iLCJpYXQiOjE1NDcyMjA1NjcsIm5iZiI6MTU0NzIyMDU2NywiZXhwIjoxNTQ3MjI0NDY3LCJhaW8iOiI0MkpnWUdEcllWc214YTY5Nk52alN0dTh5Yk9rQUE9PSIsImFwcGlkIjoiZmY4MjIwZjctNmI3Yy00NDc3LWIyMWUtOGUyY2EyMDY0OWQ0IiwiYXBwaWRhY3IiOiIxIiwiaWRwIjoiaHR0cHM6Ly9zdHMud2luZG93cy5uZXQvNGZhODVkZjQtOWY1YS00OWY4LTk1NGYtZGNmMGQ2ZTFmZjkzLyIsIm9pZCI6Ijc2MWNmMTdiLTAwMTctNGFlNy04MWE2LWZiNDc3YzNjZjAwMCIsInJvbGVzIjpbIm9jc19hY2Nlc3MiXSwic3ViIjoiNzYxY2YxN2ItMDAxNy00YWU3LTgxYTYtZmI0NzdjM2NmMDAwIiwidGlkIjoiNGZhODVkZjQtOWY1YS00OWY4LTk1NGYtZGNmMGQ2ZTFmZjkzIiwidXRpIjoiZmdnTFZ4ZkRCMFNvNVMyMVJYaXlBQSIsInZlciI6IjEuMCJ9.Q2gJOjquMyUgvgS_WafAzFI0Th5hIJHZPNBXeKcHCSKEbhSjbkVYBhuRxt-Xkkk1KFvtFmUcYIWFa4FNOnJS1gcT3Csx3GUq2lyW2NpW_YCGMIBK

## Endpoint on MAIN cluster, namespace Brewing (data from Deschutes)

In [7]:
# Endpoint for dataview access
endpoint = 'https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/'

In [8]:
def or_clause(start, number=1):
    return { "Or" : [ { "Type": "StreamName", "Operator": "Contains", "Value": "%d" % i} for i in range(start, start+number)]}
# test
print(or_clause(31,4))

{'Or': [{'Type': 'StreamName', 'Operator': 'Contains', 'Value': '31'}, {'Type': 'StreamName', 'Operator': 'Contains', 'Value': '32'}, {'Type': 'StreamName', 'Operator': 'Contains', 'Value': '33'}, {'Type': 'StreamName', 'Operator': 'Contains', 'Value': '34'}]}


## Generate a JSON for a dataview

Here: 20 days worth of data for one fermenter, interpolated at 1 minute interval  

**TODO**: Documentation for dataview available at: TBD

**TODO**: show how data is stored in SDS before getting to this JSON  

In [9]:
# Grouping
# 
def generate_dataview_def(number_of_fermentors, version):
    dataview_id = 'fermenter_group_num%d_v%d' % (number_of_fermentors, version)
    return dataview_id, {
        "Id": "%s" % dataview_id,
        "Queries": [
        {   
            "Id": "Fermentor",
            "Query": {
                "And": [ 
                    { "Type": "StreamName", "Value": "Fermentor", "Operator": "Contains" },
                    or_clause(31, number_of_fermentors)
                ]   
            }
        }
    ],
    "GroupRules": [
        {   
            "Id": "StreamName",
            "Type": "StreamName",
            "TokenRules":
            {
                "Tokens": [ "{number}" ],
                "Patterns": [
                    {
                        "QueryId": "*",
                        "Value": "Fermentor {number}"
                    }]
            }
        }
    ],
    "IndexDataType": "DateTime",
    "IndexConfig": {
            "IsDefault": False,
            "StartIndex": dv_start_time,
            "EndIndex": dv_end_time,
            "Mode": "Interpolated",
            "Interval": dv_interp_time
        }, 
    "Mappings": { "IsDefault": True }
   }

## Creation of the Group Dataviews 

In [10]:
dataviews = []
for n in range(dv_start_num_ferm, dv_end_num_ferm+1): 
    dataview_name, dataview_def = generate_dataview_def(n, version)
    dataviews.append(dataview_name)
    response = requests.post(endpoint + dataview_name, headers=headers, json=dataview_def)
    print(response.status_code, dataview_name, response.text[:160]+'...')

201 fermenter_group_num1_v20 {"Id":"fermenter_group_num1_v20","Name":"fermenter_group_num1_v20","Queries":[{"Id":"Fermentor","Query":{"And":[{"Type":"StreamName","Value":"Fermentor","Operat...


201 fermenter_group_num2_v20 {"Id":"fermenter_group_num2_v20","Name":"fermenter_group_num2_v20","Queries":[{"Id":"Fermentor","Query":{"And":[{"Type":"StreamName","Value":"Fermentor","Operat...


201 fermenter_group_num3_v20 {"Id":"fermenter_group_num3_v20","Name":"fermenter_group_num3_v20","Queries":[{"Id":"Fermentor","Query":{"And":[{"Type":"StreamName","Value":"Fermentor","Operat...


201 fermenter_group_num4_v20 {"Id":"fermenter_group_num4_v20","Name":"fermenter_group_num4_v20","Queries":[{"Id":"Fermentor","Query":{"And":[{"Type":"StreamName","Value":"Fermentor","Operat...
201 fermenter_group_num5_v20 {"Id":"fermenter_group_num5_v20","Name":"fermenter_group_num5_v20","Queries":[{"Id":"Fermentor","Query":{"And":[{"Type":"StreamName","Value":"Fermentor","Operat...


201 fermenter_group_num6_v20 {"Id":"fermenter_group_num6_v20","Name":"fermenter_group_num6_v20","Queries":[{"Id":"Fermentor","Query":{"And":[{"Type":"StreamName","Value":"Fermentor","Operat...


## List of Dataviews URLs for previews

In [11]:
# We want 20 days of data worth at 1 minute interval, for fermenter 31 up to 36
dataviews_url = [endpoint + '%s/preview/interpolated?form=csvh' % d for d in dataviews]
dataviews_url

['https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num1_v20/preview/interpolated?form=csvh',
 'https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num2_v20/preview/interpolated?form=csvh',
 'https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num3_v20/preview/interpolated?form=csvh',
 'https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num4_v20/preview/interpolated?form=csvh',
 'https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num5_v20/preview/interpolated?form=csvh',
 'https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num6_v20/preview/inter

In [12]:
# If encounter timeout (408), assumes rest of DVs with greater fermentors will fail as well
#
last_status = 200
for nf, dv in enumerate(dataviews_url, dv_start_num_ferm):
    if last_status == 200: 
        t0 = datetime.datetime.now()
        r = requests.get(dv, headers=headers)
        if r.status_code == 200:
            df = pd.read_csv(io.StringIO(r.text), parse_dates=['_time'])
            df.info(max_cols=2)
        else:
            df = pd.DataFrame() 
            last_status = r.status_code
        dv_time = datetime.datetime.now() - t0
    else:
        dv_time = datetime.timedelta(seconds=int(dv_req_timeout)/1000)
    print(df.shape)
    print('### Requests completed in', dv_time.total_seconds(), 'seconds\n### with status:', r.status_code, '\n### #fermentors:', nf, '\n### DV:', dv, '###') 
    test_name = 'ferm_num%d_v%d' % (nf, version)
    test_result = {'num_fermentors': nf, 'total_time': dv_time.total_seconds(), 'status': r.status_code, 'df.shape' : df.shape}
    pm.record(test_name, test_result)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4321 entries, 0 to 4320
Columns: 17 entries, _time to ADF
dtypes: datetime64[ns](1), float64(9), int64(4), object(3)
memory usage: 574.0+ KB
(4321, 17)
### Requests completed in 3.195855 seconds
### with status: 200 
### #fermentors: 1 
### DV: https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num1_v20/preview/interpolated?form=csvh ###


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8642 entries, 0 to 8641
Columns: 17 entries, _time to ADF
dtypes: datetime64[ns](1), float64(10), int64(3), object(3)
memory usage: 1.1+ MB
(8642, 17)
### Requests completed in 32.326787 seconds
### with status: 200 
### #fermentors: 2 
### DV: https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num2_v20/preview/interpolated?form=csvh ###


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12963 entries, 0 to 12962
Columns: 17 entries, _time to ADF
dtypes: datetime64[ns](1), float64(10), int64(3), object(3)
memory usage: 1.7+ MB
(12963, 17)
### Requests completed in 102.667408 seconds
### with status: 200 
### #fermentors: 3 
### DV: https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num3_v20/preview/interpolated?form=csvh ###


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17284 entries, 0 to 17283
Columns: 17 entries, _time to ADF
dtypes: datetime64[ns](1), float64(10), int64(3), object(3)
memory usage: 2.2+ MB
(17284, 17)
### Requests completed in 220.423664 seconds
### with status: 200 
### #fermentors: 4 
### DV: https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num4_v20/preview/interpolated?form=csvh ###


(0, 0)
### Requests completed in 240.09907 seconds
### with status: 408 
### #fermentors: 5 
### DV: https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num5_v20/preview/interpolated?form=csvh ###


(0, 0)
### Requests completed in 240.0 seconds
### with status: 408 
### #fermentors: 6 
### DV: https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num6_v20/preview/interpolated?form=csvh ###


## Clean up: delete Dataviews  

* Code 204 if deletion is successful
* Code 404 if requested Dataview Id doesn't exist or already deleted

In [13]:
for dv in dataviews_url:
    dv_url = dv[:dv.find('/preview')]
    s = requests.delete(dv_url, headers=headers)
    print(s.status_code, dv_url)

204 https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num1_v20


204 https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num2_v20
204 https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num3_v20


204 https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num4_v20
204 https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num5_v20


204 https://historianmain.osipi.com/api/Tenants/4fa85df4-9f5a-49f8-954f-dcf0d6e1ff93/Namespaces/Brewing/dataviews/fermenter_group_num6_v20
