# Dataviews with SDS: Introduction 

Dataviews allows data scientist to create and use table views which can be vary from simple like the ones in this notebook, to very elaborate. 

The dataset is from the Deschutes Brewery, with many fermentors which are the assets with the data of interest. 

This notebook shows the steps involved in creating and using Dataviews. 

## Imports 

In [319]:
# To run this notebook outside Academic JupyterHub: pip install requests-futures
# 
# For parallel HTTP requests
from concurrent.futures import ThreadPoolExecutor
from requests_futures.sessions import FuturesSession
import requests
# Pandas dataframe
import pandas as pd
import io, datetime, json
from random import randint
from enum import Enum

## Tenant and client credentials

### NOTE: this info should be abstracted when we deploy for a course (TBD)

In [320]:
# Tenant is osisoft on production cluster, use your own credentials
tenant_id = 'd7847614-2e4a-4c1e-812b-e8de5fd06a0f'
client_id = 'ad48b3b2-cdf0-499e-9458-eb42a77edb85'  # input('client_id:')  # 
client_secret = '42cArto7tigc5ea5iJeNd9mjfQKQaXpph0EtiOlgyS0='  # input('client_secret:')  # 
resource_url = 'https://dat-b.osisoft.com'
namespace = 'beer_fermenters'
api_version = 'v1-preview'

## Get an the autorization token and define the http for upcoming requests

In [321]:
# Request a fresh authorization bearer token 
discovery = requests.get(resource_url + '/identity/.well-known/openid-configuration', headers={'Accept': 'application/json'})
if discovery.status_code < 200 or discovery.status_code >= 300:
    raise Exception(f'Failed to get access token endpoint from discovery URL: {discovery.status_code}: {discovery.text}')
token_endpoint = discovery.json()['token_endpoint']
authorization = requests.post(token_endpoint, 
                              data={'client_id': client_id,
                                    'client_secret': client_secret,
                                    'grant_type': 'client_credentials',
                                   })

In [322]:
# Required headers for SDS endpoint
headers = {'Authorization': 'bearer %s' % authorization.json()['access_token'],
           'Content-type': 'application/json',
           'Accept': 'text/plain', 
           'Request-Timeout': '60000' }
headers

{'Authorization': 'bearer eyJhbGciOiJSUzI1NiIsImtpZCI6IjJDQjI4MzFEREJFRDc1NzAyM0NCMTM5OUVBRjRDMjkxQzE3MkQ5RjQiLCJ0eXAiOiJKV1QiLCJ4NXQiOiJMTEtESGR2dGRYQWp5eE9aNnZUQ2tjRnkyZlEifQ.eyJuYmYiOjE1NTIzNDQ4NDYsImV4cCI6MTU1MjM0ODQ0NiwiaXNzIjoiaHR0cHM6Ly9kYXQtYi5vc2lzb2Z0LmNvbS9pZGVudGl0eSIsImF1ZCI6WyJodHRwczovL2RhdC1iLm9zaXNvZnQuY29tL2lkZW50aXR5L3Jlc291cmNlcyIsIm9jc2FwaSJdLCJjbGllbnRfaWQiOiJhZDQ4YjNiMi1jZGYwLTQ5OWUtOTQ1OC1lYjQyYTc3ZWRiODUiLCJ0aWQiOiJkNzg0NzYxNC0yZTRhLTRjMWUtODEyYi1lOGRlNWZkMDZhMGYiLCJqdGkiOiI4ZmI5OTZjN2EzODVhZmEzODVmYmZlMjU2YzM5NjRkZiIsInNjb3BlIjpbIm9jc2FwaSJdfQ.ULd2ctz1zXBn-EC8gy2naBCsTQS3ZSYkstQ5IKKtlw-BCiFJLTTPqmFd2N8AxdyxcdLXFrm1KqTJpHkmTNVABh3CwRGfHPW-g7t6hC9ew9mbEwIqGGCMlwPJIXfZKRPE1rTR9NVp1Lpiibn_xHrAOsJydiVTSHaekuCN0AwdmLZyDCJIubIjkm83u4htaWKNXpBgFVduSqrFYsWt1wRSVG7K3_8nrKMrbKniAdpifNBQxczBGo6MTz0sDFyfPEjBussnA_LNC6tioh5NWMAPTy_LR5FkSA9x4ETIAlwqooTKs9C3sLRpme5Ik0_8Iftk4NwBPP2-u9FRJ3BYx2avUw',
 'Content-type': 'application/json',
 'Accept': 'text/plain',
 'Request-Timeout

### Endpoint for namespace 'beer_fermenters' on production cluster

In [323]:
# Endpoint for dataview access
namespace_url = resource_url + f'/api/{api_version}/Tenants/{tenant_id}/Namespaces/{namespace}' 
endpoint = namespace_url + '/dataviews/'

## All data streams of a fermentor, here Fermentor 31
### (16 in total, listed in alphabetical order)

In [324]:
streams_url = namespace_url + '/Streams?query=name:*FV31*'
print('Stream URL:', streams_url)
fv31_streams = requests.get(streams_url, headers=headers)
print(json.dumps(fv31_streams.json(), indent=4))

Stream URL: https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/beer_fermenters/Streams?query=name:*FV31*
[
    {
        "TypeId": "PIFloat32",
        "Id": "PI_acad-pida-vm0_2592",
        "Name": "acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/OUT.CV",
        "Description": "FV31 Bottom Temperature Control Output"
    },
    {
        "TypeId": "PIDigital",
        "Id": "PI_acad-pida-vm0_2598",
        "Name": "acsbrew.BREWERY.B2_CL_C2_FV31/BRAND.CV",
        "Description": "FV31 Brand",
        "InterpolationMode": 1
    },
    {
        "TypeId": "PIFloat32",
        "Id": "PI_acad-pida-vm0_2639",
        "Name": "acsbrew.BREWERY.B2_CL_C2_FV31_PIC1362/SP.CV",
        "Description": "FV31 Pressure Control Setpoint"
    },
    {
        "TypeId": "PIDigital",
        "Id": "PI_acad-pida-vm0_2968",
        "Name": "acsbrew.BREWERY.B2_CL_C2_FV31/YEAST.CV",
        "Description": "FV31 Yeast"
    },
    {
        "TypeId": "PIFloat32",
        "I

In [325]:
streams = [stream['Name'] for stream in fv31_streams.json()]
streams

['acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/OUT.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31/BRAND.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_PIC1362/SP.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31/YEAST.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/SP.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/PV.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_PIC1362/PV.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31/HARVEST.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/OUT.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_LT1360/PV.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/SP.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31/STATUS.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31/PCD.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31/DcrsFvFullPlato',
 'acsbrew.BREWERY.B2_CL_C2_FV31/PULLSP.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31/PULL.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31/YEASTGEN.CV',
 'acsbrew.BREWERY.FV31.Fermentation ID.194fa814-869f-5f35-3501-0b9198ac52e1',
 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/PV.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31/T90PULL.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A

| Stream Name | DV Column Name | Description | 
|-------------|----------------|-------------|
| acsbrew.BREWERY.B2_CL_C2_FV31/BRAND.CV | `Brand` | Vessel Brand
| acsbrew.BREWERY.FV31.Fermentation ID.194fa814-869f-5f35-3501-0b9198ac52e1 | `Fermentation ID` | Unique ID for fermentation batch 
| acsbrew.BREWERY.B2_CL_C2_FV31/STATUS.CV | `Status` | * Vessel Status 
| acsbrew.BREWERY.B2_CL_C2_FV31/DcrsFvFullPlato | `FV Full Plato` | The specific gravity of the vessel in plato at the end of filling
| acsbrew.BREWERY.B2_CL_C2_FV31/Plato | `Plato` | The specific gravity of the vessel in plato
| acsbrew.BREWERY.B2_CL_C2_FV31_LT1360/PV.CV | `Volume` | * Vessel Volume 
| acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/OUT.CV |`Bottom TIC OUT` | * Vessel Bottom Temperature Indicator Controller Output
| acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/PV.CV | `Bottom TIC PV` | * Vessel Bottom Temperture Indicator Controller Process Value
| acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/OUT.CV | `Middle TIC OUT` | * Vessel Middle Temperature Indicator Controller Output
| acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/PV.CV | `Middle TIC PV` | * Vessel Middle Temperture Indicator Controller Process Value
| acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/OUT.CV | `Top TIC OUT` | * Vessel Top Temperature Indicator Controller Output
| acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/PV.CV | `Top TIC PV` | * Vessel Bottom Temperture Indicator Controller Process Value

In [326]:
bottom_out = [s for s in outcv_streams if 'A/' in s][0]
bottom_out

'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/OUT.CV'

In [327]:
class Pos(Enum):
    bottom = 1
    middle = 2
    top = 3

stream_key = {Pos.bottom: 'A/', Pos.middle: 'B/', Pos.top: 'C/'}
tic_out_streams = [s for s in streams if 'OUT.CV' in s]
tic_out_tags = { p: next(s for s in tic_out_streams if stream_key[p] in s) for p in Pos }
tic_out_tags

{<Pos.bottom: 1>: 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/OUT.CV',
 <Pos.middle: 2>: 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/OUT.CV',
 <Pos.top: 3>: 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/OUT.CV'}

In [328]:
tic_pv_streams = [s for s in streams if 'PV.CV' in s]
tic_pv_tags = { p: next(s for s in tic_pv_streams if stream_key[p] in s) for p in Pos }
tic_pv_tags

{<Pos.bottom: 1>: 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/PV.CV',
 <Pos.middle: 2>: 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/PV.CV',
 <Pos.top: 3>: 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/PV.CV'}

In [329]:
ticpv_streams = [s for s in streams if 'PV.CV' in s]
ticpv_streams

['acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/PV.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_PIC1362/PV.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_LT1360/PV.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/PV.CV',
 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/PV.CV']

In [330]:
[p.name for p in Pos]

['bottom', 'middle', 'top']

In [331]:
tic_out_map = [ (stream_key[p] + 'OUT.CV', p.name.capitalize() + ' TIC OUT') for p in Pos]
tic_out_map

[('A/OUT.CV', 'Bottom TIC OUT'),
 ('B/OUT.CV', 'Middle TIC OUT'),
 ('C/OUT.CV', 'Top TIC OUT')]

In [332]:
pm.bound.reset()
[ (s, m[1]) for s in streams for m in tic_out_map if pm.match(s, pm.anything + m[0])]

[('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/OUT.CV', 'Bottom TIC OUT'),
 ('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/OUT.CV', 'Middle TIC OUT'),
 ('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/OUT.CV', 'Top TIC OUT')]

In [333]:
tic_pv_map = [ (stream_key[p] + 'PV.CV', p.name.capitalize() + ' TIC PV') for p in Pos]
tic_pv_map

[('A/PV.CV', 'Bottom TIC PV'),
 ('B/PV.CV', 'Middle TIC PV'),
 ('C/PV.CV', 'Top TIC PV')]

In [334]:
[ (s, m[1]) for s in streams for m in tic_pv_map if pm.match(s, pm.anything + m[0])]

[('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/PV.CV', 'Middle TIC PV'),
 ('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/PV.CV', 'Top TIC PV'),
 ('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/PV.CV', 'Bottom TIC PV')]

In [335]:
rest_map = [
    ('_LT', 'Volume'),
    ('BRAND', 'Brand'),
    ('STATUS', 'Status'),
    ('FullPlato', 'FV Full Plato'),
    ('/Plato', 'Plato'),
    ('Fermentation ID', 'Fermentation ID')
]
rest_map

[('_LT', 'Volume'),
 ('BRAND', 'Brand'),
 ('STATUS', 'Status'),
 ('FullPlato', 'FV Full Plato'),
 ('/Plato', 'Plato'),
 ('Fermentation ID', 'Fermentation ID')]

In [336]:
full_map = [
    ('_LT', 'Volume'),
    ('C/PV.CV', 'Top TIC PV'),
    ('C/OUT.CV', 'Top TIC OUT'),
    ('STATUS', 'Status'),
    ('/Plato', 'Plato'),
    ('B/PV.CV', 'Middle TIC PV'),
    ('B/OUT.CV', 'Middle TIC OUT'),
    ('FullPlato', 'FV Full Plato'),
    ('Fermentation ID', 'Fermentation ID'),
    ('BRAND', 'Brand'),
    ('A/PV.CV', 'Bottom TIC PV'),
    ('A/OUT.CV', 'Bottom TIC OUT'),
]

In [337]:
# full_map = tic_out_map + tic_pv_map + rest_map
[i for i in enumerate(full_map)]

[(0, ('_LT', 'Volume')),
 (1, ('C/PV.CV', 'Top TIC PV')),
 (2, ('C/OUT.CV', 'Top TIC OUT')),
 (3, ('STATUS', 'Status')),
 (4, ('/Plato', 'Plato')),
 (5, ('B/PV.CV', 'Middle TIC PV')),
 (6, ('B/OUT.CV', 'Middle TIC OUT')),
 (7, ('FullPlato', 'FV Full Plato')),
 (8, ('Fermentation ID', 'Fermentation ID')),
 (9, ('BRAND', 'Brand')),
 (10, ('A/PV.CV', 'Bottom TIC PV')),
 (11, ('A/OUT.CV', 'Bottom TIC OUT'))]

In [338]:
dv_names = [ (s, m[1]) for m in full_map for s in streams if m[0] in s ]
[i for i in enumerate(dv_names)]

[(0, ('acsbrew.BREWERY.B2_CL_C2_FV31_LT1360/PV.CV', 'Volume')),
 (1, ('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/PV.CV', 'Top TIC PV')),
 (2, ('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/OUT.CV', 'Top TIC OUT')),
 (3, ('acsbrew.BREWERY.B2_CL_C2_FV31/STATUS.CV', 'Status')),
 (4, ('acsbrew.BREWERY.B2_CL_C2_FV31/Plato', 'Plato')),
 (5, ('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/PV.CV', 'Middle TIC PV')),
 (6, ('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360B/OUT.CV', 'Middle TIC OUT')),
 (7, ('acsbrew.BREWERY.B2_CL_C2_FV31/DcrsFvFullPlato', 'FV Full Plato')),
 (8,
  ('acsbrew.BREWERY.FV31.Fermentation ID.194fa814-869f-5f35-3501-0b9198ac52e1',
   'Fermentation ID')),
 (9, ('acsbrew.BREWERY.B2_CL_C2_FV31/BRAND.CV', 'Brand')),
 (10, ('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/PV.CV', 'Bottom TIC PV')),
 (11, ('acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/OUT.CV', 'Bottom TIC OUT'))]

In [339]:
'''
{
	"Id": "CDV7_FV31",
	"Name": "CDV7_FV31",
	"Description": "Fermenter dataview",
	"Queries": [
		{
			"Id": "Fermentor",
			"Query": {
				"Resource": "Streams",
				"Field": "Description",
				"Value": "FV31",
				"Operator": "Contains"
			}
		}
	],
	"GroupRules": [],
	"Mappings": {
		"Columns": [
			{
				"Name": "Timestamp",
				"MappingRule": {
					"PropertyPaths": [
						"Timestamp"
					]
				},
				"IsKey": true,
				"DataType": "DateTime"
			},
            
            ############## Columns ################## e.g.
            
            {
				"Name": "Bottom TIC OUT",
				"MappingRule": {
					"PropertyPaths": [
						"Value"
					],
					"ItemIdentifier": {
						"Resource": "Streams",
						"Field": "Name",
						"Value": "acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/OUT.CV",
						"Operator": "Equals"
					}
				},
				"IsKey": false
			}
	]
	},
	"IndexDataType": "DateTime",
	"IndexConfig": {
		"StartIndex": "2017-03-17T07:00:00Z",
		"EndIndex": "2017-03-18T07:00:00Z",
		"Mode": "Interpolated",
		"Interval": "00:01:00"
	}
}
'''

'\n{\n\t"Id": "CDV7_FV31",\n\t"Name": "CDV7_FV31",\n\t"Description": "Fermenter dataview",\n\t"Queries": [\n\t\t{\n\t\t\t"Id": "Fermentor",\n\t\t\t"Query": {\n\t\t\t\t"Resource": "Streams",\n\t\t\t\t"Field": "Description",\n\t\t\t\t"Value": "FV31",\n\t\t\t\t"Operator": "Contains"\n\t\t\t}\n\t\t}\n\t],\n\t"GroupRules": [],\n\t"Mappings": {\n\t\t"Columns": [\n\t\t\t{\n\t\t\t\t"Name": "Timestamp",\n\t\t\t\t"MappingRule": {\n\t\t\t\t\t"PropertyPaths": [\n\t\t\t\t\t\t"Timestamp"\n\t\t\t\t\t]\n\t\t\t\t},\n\t\t\t\t"IsKey": true,\n\t\t\t\t"DataType": "DateTime"\n\t\t\t},\n            \n            ############## Columns ################## e.g.\n            \n            {\n\t\t\t\t"Name": "Bottom TIC OUT",\n\t\t\t\t"MappingRule": {\n\t\t\t\t\t"PropertyPaths": [\n\t\t\t\t\t\t"Value"\n\t\t\t\t\t],\n\t\t\t\t\t"ItemIdentifier": {\n\t\t\t\t\t\t"Resource": "Streams",\n\t\t\t\t\t\t"Field": "Name",\n\t\t\t\t\t\t"Value": "acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/OUT.CV",\n\t\t\t\t\t\t"Operator": "Equals"

In [340]:
true = True
false = False
def generate_time_column():
    return [ {
                "Name": "Timestamp",
                "MappingRule": {
                    "PropertyPaths": [
                        "Timestamp"
                    ]
                },
                "IsKey": true,
                "DataType": "DateTime"
            } ] 

In [341]:
def generate_column_def(column_def):
    name = column_def[1]
    tag = column_def[0]
    return {
        "Name": name,
        "MappingRule": {
            "PropertyPaths": [
                "Value"
            ],
            "ItemIdentifier": {
                "Resource": "Streams",
                "Field": "Name",
                "Value": tag,
                "Operator": "Equals"
            }
        }
    }
generate_column_def(('TAGNAME', 'COLUMN_NAME'))

{'Name': 'COLUMN_NAME',
 'MappingRule': {'PropertyPaths': ['Value'],
  'ItemIdentifier': {'Resource': 'Streams',
   'Field': 'Name',
   'Value': 'TAGNAME',
   'Operator': 'Equals'}}}

In [342]:

def generate_column_mappings(): 
    return { "Columns": 
                generate_time_column() + 
                [generate_column_def(c) for c in dv_names]
           }
generate_column_mappings()

{'Columns': [{'Name': 'Timestamp',
   'MappingRule': {'PropertyPaths': ['Timestamp']},
   'IsKey': True,
   'DataType': 'DateTime'},
  {'Name': 'Volume',
   'MappingRule': {'PropertyPaths': ['Value'],
    'ItemIdentifier': {'Resource': 'Streams',
     'Field': 'Name',
     'Value': 'acsbrew.BREWERY.B2_CL_C2_FV31_LT1360/PV.CV',
     'Operator': 'Equals'}}},
  {'Name': 'Top TIC PV',
   'MappingRule': {'PropertyPaths': ['Value'],
    'ItemIdentifier': {'Resource': 'Streams',
     'Field': 'Name',
     'Value': 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/PV.CV',
     'Operator': 'Equals'}}},
  {'Name': 'Top TIC OUT',
   'MappingRule': {'PropertyPaths': ['Value'],
    'ItemIdentifier': {'Resource': 'Streams',
     'Field': 'Name',
     'Value': 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/OUT.CV',
     'Operator': 'Equals'}}},
  {'Name': 'Status',
   'MappingRule': {'PropertyPaths': ['Value'],
    'ItemIdentifier': {'Resource': 'Streams',
     'Field': 'Name',
     'Value': 'acsbrew.BREWERY.B2_CL_C2

In [349]:
version = 10
def generate_dataview_def(ferm_id):
    dataview_id = f'TestDV{version}_FV{ferm_id}'
    return dataview_id, {
    "Id": dataview_id, 
    "Name": dataview_id, 
    "Description": f'Fermentor {ferm_id} DV',
    "Queries": [
        {
            "Id": "Fermentor",
            "Query": {
                "Resource": "Streams",
                "Field": "Name",
                "Value": "FV31",
                "Operator": "Contains"
            }
        }
    ],
    "GroupRules": [],
    "Mappings": generate_column_mappings(),
    "IndexDataType": "DateTime",
    "IndexConfig": {
        "StartIndex": "2017-03-17T07:00:00Z",
        "EndIndex": "2017-03-18T07:00:00Z",
        "Mode": "Interpolated",
        "Interval": "00:01:00"
    }
    }       
    

In [350]:
generate_dataview_def(31)   

('TestDV10_FV31',
 {'Id': 'TestDV10_FV31',
  'Name': 'TestDV10_FV31',
  'Description': 'Fermentor 31 DV',
  'Queries': [{'Id': 'Fermentor',
    'Query': {'Resource': 'Streams',
     'Field': 'Name',
     'Value': 'FV31',
     'Operator': 'Contains'}}],
  'GroupRules': [],
  'Mappings': {'Columns': [{'Name': 'Timestamp',
     'MappingRule': {'PropertyPaths': ['Timestamp']},
     'IsKey': True,
     'DataType': 'DateTime'},
    {'Name': 'Volume',
     'MappingRule': {'PropertyPaths': ['Value'],
      'ItemIdentifier': {'Resource': 'Streams',
       'Field': 'Name',
       'Value': 'acsbrew.BREWERY.B2_CL_C2_FV31_LT1360/PV.CV',
       'Operator': 'Equals'}}},
    {'Name': 'Top TIC PV',
     'MappingRule': {'PropertyPaths': ['Value'],
      'ItemIdentifier': {'Resource': 'Streams',
       'Field': 'Name',
       'Value': 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/PV.CV',
       'Operator': 'Equals'}}},
    {'Name': 'Top TIC OUT',
     'MappingRule': {'PropertyPaths': ['Value'],
      'ItemIden

## NOTE: There are more values for Fermentor 31 (29 instead of 17) because the filter for Bifrost was on tag instead of AF structure which obviously didn't map all available on the Data Archive. But we don't want to filter out the extra data at this point, we can do it later on the dataframe itself. 

## Generate JSON for a Dataview

**TODO**: Documentation for dataview available at: TBD 

#### Function `generate_dataview_def` returns a pair: Dataview Id and Dataview JSON body definition.

#### The Dataview is built to return: 
* all data of Fermentor #`ferm_id` as rows
* each stream of the fermentor becomes a column _without the_ `Fermentor 3x` _prefix_ (important for dataframe concatenation)
* with data starting on UTC time `2017-03-18T00:00:00Z` up to 2017-03-19T00:00:00Z (1 day)
* with data interpolated at 1 minute (00:01:00) interval

## BELOW IS THE NEW DATAFRAME

In [351]:
dv_id, dv_def = generate_dataview_def(31)   
dv_url = namespace_url + f'/Dataviews/{dv_id}'
print('DV URL:', dv_url)
r = requests.post(dv_url, json=dv_def, headers=headers)
print(r.status_code, r.text)
# df = pd.read_csv(io.StringIO(r.text), parse_dates=['Timestamp'])

DV URL: https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/beer_fermenters/Dataviews/TestDV10_FV31
201 {"Id":"TestDV10_FV31","Name":"TestDV10_FV31","Description":"Fermentor 31 DV","Queries":[{"Id":"Fermentor","Query":{"Resource":"Streams","Field":"Name","Value":"FV31","Operator":"Contains"}}],"GroupRules":[],"Mappings":{"Columns":[{"Name":"Timestamp","MappingRule":{"PropertyPaths":["Timestamp"]},"IsKey":true,"DataType":"DateTime"},{"Name":"Volume","MappingRule":{"PropertyPaths":["Value"],"ItemIdentifier":{"Resource":"Streams","Field":"Name","Value":"acsbrew.BREWERY.B2_CL_C2_FV31_LT1360/PV.CV","Operator":"Equals"}}},{"Name":"Top TIC PV","MappingRule":{"PropertyPaths":["Value"],"ItemIdentifier":{"Resource":"Streams","Field":"Name","Value":"acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/PV.CV","Operator":"Equals"}}},{"Name":"Top TIC OUT","MappingRule":{"PropertyPaths":["Value"],"ItemIdentifier":{"Resource":"Streams","Field":"Name","Value":"acsbrew.BRE

## Creation of the Dataviews, for fermenters 31 up to 36

* Status 201 from POST request indicates success
* Status 401 indicates unauthorized (try refreshing authorization header)
* Status 409 when a Dataview with same Id already exists (go to last cell of this notebook to perform a clean up)

In [352]:
dataviews = []
for ferm_id in range(31, 32):  # ONLY FERMENTER 31 FOR TESTING
    dataview_name, dataview_def = generate_dataview_def(ferm_id)
    print(dataview_def)
    dataviews.append(dataview_name)
    response = requests.post(endpoint + dataview_name, headers=headers, json=dataview_def)
    print('Status:', response.status_code, dataview_name, response.text)

{'Id': 'TestDV10_FV31', 'Name': 'TestDV10_FV31', 'Description': 'Fermentor 31 DV', 'Queries': [{'Id': 'Fermentor', 'Query': {'Resource': 'Streams', 'Field': 'Name', 'Value': 'FV31', 'Operator': 'Contains'}}], 'GroupRules': [], 'Mappings': {'Columns': [{'Name': 'Timestamp', 'MappingRule': {'PropertyPaths': ['Timestamp']}, 'IsKey': True, 'DataType': 'DateTime'}, {'Name': 'Volume', 'MappingRule': {'PropertyPaths': ['Value'], 'ItemIdentifier': {'Resource': 'Streams', 'Field': 'Name', 'Value': 'acsbrew.BREWERY.B2_CL_C2_FV31_LT1360/PV.CV', 'Operator': 'Equals'}}}, {'Name': 'Top TIC PV', 'MappingRule': {'PropertyPaths': ['Value'], 'ItemIdentifier': {'Resource': 'Streams', 'Field': 'Name', 'Value': 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/PV.CV', 'Operator': 'Equals'}}}, {'Name': 'Top TIC OUT', 'MappingRule': {'PropertyPaths': ['Value'], 'ItemIdentifier': {'Resource': 'Streams', 'Field': 'Name', 'Value': 'acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360C/OUT.CV', 'Operator': 'Equals'}}}, {'Name': 'Status'

## List of Dataviews URLs 

In [353]:
# We want 20 days of data worth at 1 minute interval, for fermenter 31 up to 36
dataviews_url = [endpoint + '%s/preview/interpolated?form=csvh&maxcount=2000' % d for d in dataviews]
dataviews_url

['https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/beer_fermenters/dataviews/TestDV10_FV31/preview/interpolated?form=csvh&maxcount=2000']

## From a list of dataviews, gather them in parallel and return a single dataframe

In [356]:
def postproc(reply):
    s = io.StringIO(reply)
    # 255 Bad Input, 307 Bad, 313 Comm Fail, 246 I/O Timeout, 
    # print('Line 1:', s.readline())
    # print('Line 2:', s.readline())
    s.close()
    return reply 

# Request in parallel all the dataviews, return the concatenated dataframe
def get_ocs_dataframe(dataviews, headers, workers=8):
    ti = datetime.datetime.now()
    session = FuturesSession(executor=ThreadPoolExecutor(max_workers=workers))
    rs = [session.get(u, headers=headers, params={'count': '10000'}) for u in dataviews]
    resps = [r.result() for r in rs]
    print('Requests completed in', datetime.datetime.now() - ti) 
    print(resps)
    dfs = [pd.read_csv(io.StringIO(postproc(r.text)), parse_dates=['Timestamp']) for r in resps]
    return(pd.concat(dfs, sort=True))

## Get dataframe and time it (about 20 seconds, be patient)

All responses should HTTP code [200] if everything is ok 

In [357]:
t0 = datetime.datetime.now()
df = get_ocs_dataframe(dataviews_url, headers)
print('Dataframe obtained in', datetime.datetime.now() - t0) 
df.info

Requests completed in 0:00:00.903322
[<Response [200]>]
Line 1: Timestamp,Volume,Top TIC PV,Top TIC OUT,Status,Plato,Middle TIC PV,Middle TIC OUT,FV Full Plato,Fermentation ID,Brand,Bottom TIC PV,Bottom TIC OUT

Line 2: 2017-03-17T07:00:00Z,716.566,29.6131516,0,Maturation,168.919174,29.35638,0,171.893845,Fermentor 31201731179653,Kerberos,29.8845711,10.9353266

Dataframe obtained in 0:00:00.922018


<bound method DataFrame.info of       Bottom TIC OUT  Bottom TIC PV     Brand  FV Full Plato  \
0          10.935327      29.884571  Kerberos     171.893845   
1          12.445787      29.931492  Kerberos     172.065735   
2          13.956246      29.978569  Kerberos     172.237610   
3          18.254084      30.033358  Kerberos     172.409500   
4          25.958717      30.083404  Kerberos     172.581375   
5          33.663350      30.133188  Kerberos     172.753265   
6          41.367980      30.193794  Kerberos     172.925156   
7          45.145805      30.191100  Kerberos     173.097031   
8          48.042100      30.184795  Kerberos     173.268921   
9          50.938390      30.179586  Kerberos     173.440811   
10         53.834682      30.172728  Kerberos     173.612686   
11         56.730976      30.165604  Kerberos     173.784576   
12         55.699925      30.159050  Kerberos     173.956451   
13         50.994907      30.152640  Kerberos     174.128342   
14      

### Note the that resulting dataframe has almost 182K rows

## Save data into CSV file locally in directory FlashcARD

In [273]:
df.to_csv('beer_ocs_v1.csv')

---
## Clicking this [link](./beer_20_days.csv) opens up a CSV browser with the CSV above
---

### List of column names with their type

Note that the `_time` column (a new column on top of the 16 ones of a Fermentor) has the correct datetime panda data type for timestamps

In [15]:
for i, c in enumerate(df.columns, 1):
    print('%2d' % i, c, '((( type:', df[c].dtype, ')))')

 1 DefaultGroupRule ((( type: object )))
 2 DigitalStateName ((( type: float64 )))
 3 Quality ((( type: int64 )))
 4 StateCode ((( type: float64 )))
 5 Timestamp ((( type: datetime64[ns] )))
 6 Value ((( type: int64 )))


In [16]:
for row in df.iterrows():
    print(row)

(0, DefaultGroupRule    acsbrew.BREWERY.B2_CL_C2_FV31.Batch Active Tag
DigitalStateName                                               NaN
Quality                                                          0
StateCode                                                      NaN
Timestamp                                      2017-03-18 00:00:00
Value                                                            0
Name: 0, dtype: object)
(1, DefaultGroupRule    acsbrew.BREWERY.B2_CL_C2_FV31.Batch Active Tag
DigitalStateName                                               NaN
Quality                                                          0
StateCode                                                      NaN
Timestamp                                      2017-03-18 00:01:00
Value                                                            0
Name: 1, dtype: object)
(2, DefaultGroupRule    acsbrew.BREWERY.B2_CL_C2_FV31.Batch Active Tag
DigitalStateName                                               NaN
Qu

In [17]:
for f in df.Fermentation_ID.unique():
    print(f, isinstance(f, str))

AttributeError: 'DataFrame' object has no attribute 'Fermentation_ID'

### Prepare ADF curve plots over time 

In [None]:
import plotly.graph_objs as go

figs = []
data = []
for f in df.Fermentation_ID.unique():
    trace = go.Scattergl(x = df[df.Fermentation_ID == f]['_time'], y = df[df.Fermentation_ID == f]['ADF'], mode='lines+markers', name=str(f))
    figs.append(go.FigureWidget(data=[trace]))
    data.append(trace)

### Add a range slider 

With a few time range selectors: 8 hours, 1 day and everything 

Note: range slider is grey now because of an incompatibility with ScatterGL: https://github.com/plotly/plotly.js/issues/2627

In [None]:
layout = dict(
    title='Brewing ADF with time range slider',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=8,
                     label='8h',
                     step='hour',
                     stepmode='backward'),
                dict(count=1,
                     label='1d',
                     step='day',
                     stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(
            visible = True
        ),
        type='date'
    )
)
        
fig = go.FigureWidget(data=data, layout=layout)
fig

## Clean up: delete Dataviews  

* Code 204 if deletion is successful
* Code 404 if requested Dataview Id doesn't exist or already deleted

In [None]:
for dv in dataviews_url:
    dv_url = dv[:dv.find('/preview')]
    s = requests.delete(dv_url, headers=headers)
    print(s.status_code, dv_url)