# Demo: Dataviews on OSIsoft Cloud Service (OCS) - Uxie-500

Dataviews allows data scientists to create and use table views which can be vary from simple (e.g extracting values from a single sensor) to very elaborate by pulling data from multiple sources   The result of a Dataview is a standard CSV (comma separated value) table ready-to-use  

The dataset used below is from the Deschutes Brewery, with data coming from fermentor vessels. 

This notebook shows the steps involved in creating and using Dataviews. 

## Imports 

In [15]:
# In a CMD window, execute: pip install requests-futures plotly
# 
# For interaction with OCS
from ocs_datascience import OCSClient
# For HTTP requests
import requests
# Pandas dataframe to manipulate table data
import pandas as pd
# Utilities from Python standard library 
import configparser
import datetime as dt
from dateutil import parser 
import json
# For plots 
import plotly.graph_objs as go

![Tenant, namespace concepts](https://apimgmtstelkv30lahnuj362.blob.core.windows.net/content/MediaLibrary/lehigh/ocs/tenant-namespace2.png)

## Content of file `config.ini`

    ; IMPORTANT: those values are provided by OSIsoft, DO NOT CHANGE
    [Configurations]
    Namespace = fermenter_vessels

    [Access]
    Resource = https://dat-b.osisoft.com
    Tenant = d7847614-2e4a-4c1e-812b-e8de5fd06a0f
    ApiVersion = v1-preview

    [Credentials]
    ClientId = ec9d0232-fc61-4ee7-8316-6997954ad40c
    ClientSecret = OmY31XkODlimHSR5gDZqqE5PT8HrUm3liDsIjwNc5VQ=
    
## Read in configuration file and create OCS client object

In [2]:
config = configparser.ConfigParser()
config.read('config.ini')

ocs_client = OCSClient(config.get('Access', 'ApiVersion'),config.get('Access', 'Tenant'), config.get('Access', 'Resource'), 
                     config.get('Credentials', 'ClientId'), config.get('Credentials', 'ClientSecret'))

namespace_id = config.get('Configurations', 'Namespace')

## Get an the autorization header with bearer token for access to OCS API 

In [3]:
headers = ocs_client.authorization_headers()
headers

Authorization: <Response [200]> {"access_token":"eyJhbGciOiJSUzI1NiIsImtpZCI6IjJDQjI4MzFEREJFRDc1NzAyM0NCMTM5OUVBRjRDMjkxQzE3MkQ5RjQiLCJ0eXAiOiJKV1QiLCJ4NXQiOiJMTEtESGR2dGRYQWp5eE9aNnZUQ2tjRnkyZlEifQ.eyJuYmYiOjE1NTM1MTgwMDMsImV4cCI6MTU1MzUyMTYwMywiaXNzIjoiaHR0cHM6Ly9kYXQtYi5vc2lzb2Z0LmNvbS9pZGVudGl0eSIsImF1ZCI6WyJodHRwczovL2RhdC1iLm9zaXNvZnQuY29tL2lkZW50aXR5L3Jlc291cmNlcyIsIm9jc2FwaSJdLCJjbGllbnRfaWQiOiJhNDkxMjM1My00OWEzLTRlYzAtYTdkYS1mMGZmN2U0YjM0YmEiLCJ0aWQiOiJkNzg0NzYxNC0yZTRhLTRjMWUtODEyYi1lOGRlNWZkMDZhMGYiLCJqdGkiOiIxNTM4MGMwMmVjOWRiMzgyYzYxOGQxNWFkODIyZDU5NyIsInNjb3BlIjpbIm9jc2FwaSJdfQ.hU-RWPkYyGHfKqc9VlMMiLIDUEty1u2IZ5rydVSjg6trNZoZR-3ZS9Li81E7hgBr8flvLjyzyfQWZgMpUlT3s4K0Vd4tH2bsiPzSdCG-I_eaINZUX7psKArVHgooVggXdD4_JuowtZAxBPWwyPWS8cgqJTCRZ6mbSUmglynkt7OLD6qj8yeMD1t1wEqtDp0V1WX5eklIrDmSc2HLWdriCNz0n7NUTt7V6X45x5ea51At0k8d59w2NKSJ6E3zf3z0zYEv1wmhUxxYQWGVN1klvc8tWTVv2OP66IPXIgaYEDGgW3so2FEJgct1saRMDADf_YOfTjHTMHSemIgHXONFqg","expires_in":3600,"token_type":"Bearer"}


{'Authorization': 'bearer eyJhbGciOiJSUzI1NiIsImtpZCI6IjJDQjI4MzFEREJFRDc1NzAyM0NCMTM5OUVBRjRDMjkxQzE3MkQ5RjQiLCJ0eXAiOiJKV1QiLCJ4NXQiOiJMTEtESGR2dGRYQWp5eE9aNnZUQ2tjRnkyZlEifQ.eyJuYmYiOjE1NTM1MTgwMDMsImV4cCI6MTU1MzUyMTYwMywiaXNzIjoiaHR0cHM6Ly9kYXQtYi5vc2lzb2Z0LmNvbS9pZGVudGl0eSIsImF1ZCI6WyJodHRwczovL2RhdC1iLm9zaXNvZnQuY29tL2lkZW50aXR5L3Jlc291cmNlcyIsIm9jc2FwaSJdLCJjbGllbnRfaWQiOiJhNDkxMjM1My00OWEzLTRlYzAtYTdkYS1mMGZmN2U0YjM0YmEiLCJ0aWQiOiJkNzg0NzYxNC0yZTRhLTRjMWUtODEyYi1lOGRlNWZkMDZhMGYiLCJqdGkiOiIxNTM4MGMwMmVjOWRiMzgyYzYxOGQxNWFkODIyZDU5NyIsInNjb3BlIjpbIm9jc2FwaSJdfQ.hU-RWPkYyGHfKqc9VlMMiLIDUEty1u2IZ5rydVSjg6trNZoZR-3ZS9Li81E7hgBr8flvLjyzyfQWZgMpUlT3s4K0Vd4tH2bsiPzSdCG-I_eaINZUX7psKArVHgooVggXdD4_JuowtZAxBPWwyPWS8cgqJTCRZ6mbSUmglynkt7OLD6qj8yeMD1t1wEqtDp0V1WX5eklIrDmSc2HLWdriCNz0n7NUTt7V6X45x5ea51At0k8d59w2NKSJ6E3zf3z0zYEv1wmhUxxYQWGVN1klvc8tWTVv2OP66IPXIgaYEDGgW3so2FEJgct1saRMDADf_YOfTjHTMHSemIgHXONFqg',
 'Content-type': 'application/json',
 'Accept': 'text/plain',
 'Request-Timeout

### URL to access `fermenter_vessels` namespace and its dataviews 

In [4]:
# Endpoint for dataview access
namespace_url = ocs_client.namespace_url(namespace_id)  
dataview_url = namespace_url + '/dataviews/'
namespace_url

'https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/fermenter_vessels'

### Example of typical interaction with OCS: request and display all data streams of Fermentor Vessel 31

#### 1) Build a stream query URL

In [5]:
streams_url = namespace_url + '/Streams?query=name:*FV31*'
print('Stream Query URL:', streams_url)

Stream Query URL: https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/fermenter_vessels/Streams?query=name:*FV31*


#### 2) Make an HTTP GET request using URL and authorization header

In [6]:
fv31_streams = requests.get(streams_url, headers=headers)

#### 3) Verify that request status code indicates success (should be 200 for GET) 

In [7]:
fv31_streams.status_code

200

#### 4) Display result in JSON format (pretty-print)

In [8]:
print(json.dumps(fv31_streams.json(), indent=4))

[
    {
        "TypeId": "PIFloat32",
        "Id": "PI_acad-pida-vm0_2592",
        "Name": "acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/OUT.CV",
        "Description": "FV31 Bottom Temperature Control Output",
        "InterpolationMode": 0,
        "ExtrapolationMode": 2
    },
    {
        "TypeId": "PIDigital",
        "Id": "PI_acad-pida-vm0_2968",
        "Name": "acsbrew.BREWERY.B2_CL_C2_FV31/YEAST.CV",
        "Description": "FV31 Yeast",
        "InterpolationMode": 1,
        "ExtrapolationMode": 2
    },
    {
        "TypeId": "PIFloat32",
        "Id": "PI_acad-pida-vm0_2639",
        "Name": "acsbrew.BREWERY.B2_CL_C2_FV31_PIC1362/SP.CV",
        "Description": "FV31 Pressure Control Setpoint",
        "InterpolationMode": 0,
        "ExtrapolationMode": 2
    },
    {
        "TypeId": "PIDigital",
        "Id": "PI_acad-pida-vm0_2598",
        "Name": "acsbrew.BREWERY.B2_CL_C2_FV31/BRAND.CV",
        "Description": "FV31 Brand",
        "InterpolationMode": 1,
        "Ext

#### 5) Let's extract 2 hours of recorded data for one stream

    "Id": "PI_acad-pida-vm0_2593",
    "Name": "acsbrew.BREWERY.B2_CL_C2_FV31_TIC1360A/PV.CV",
    "Description": "FV31 Bottom Temperature Control Value",
    
First we build the URL for the data, starting at 2017-03-17T00:00 for two hours 

In [9]:
start_index = 'startIndex=2017-03-17T00:00'
end_index = 'endIndex=2017-03-17T02:00'
fv31_bottom_temp_url = namespace_url + f'/Streams/PI_acad-pida-vm0_2593/Data?{start_index}&{end_index}'
fv31_bottom_temp_url 

'https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/fermenter_vessels/Streams/PI_acad-pida-vm0_2593/Data?startIndex=2017-03-17T00:00&endIndex=2017-03-17T02:00'

#### 6) Perform HTTP GET for stream data and check status 

In [10]:
fv31_bottom_temp = requests.get(fv31_bottom_temp_url, headers=headers)
fv31_bottom_temp.status_code

200

#### 7) Check raw data in JSON format

In [11]:
fv31_bottom_temp.json()

[{'Timestamp': '2017-03-17T00:14:07Z', 'Value': 30.5642},
 {'Timestamp': '2017-03-17T00:22:36Z', 'Value': 30.2},
 {'Timestamp': '2017-03-17T00:53:25Z', 'Value': 30.2},
 {'Timestamp': '2017-03-17T00:57:55Z', 'Value': 30.06531},
 {'Timestamp': '2017-03-17T00:58:48Z', 'Value': 29.7681},
 {'Timestamp': '2017-03-17T00:59:00Z', 'Value': 29.73208},
 {'Timestamp': '2017-03-17T01:00:00Z', 'Value': 29.67508},
 {'Timestamp': '2017-03-17T01:01:01Z', 'Value': 29.54451},
 {'Timestamp': '2017-03-17T01:02:00Z', 'Value': 29.55049},
 {'Timestamp': '2017-03-17T01:03:00Z', 'Value': 29.65275},
 {'Timestamp': '2017-03-17T01:04:00Z', 'Value': 29.755},
 {'Timestamp': '2017-03-17T01:05:00Z', 'Value': 29.72754},
 {'Timestamp': '2017-03-17T01:06:01Z', 'Value': 29.83604},
 {'Timestamp': '2017-03-17T01:07:00Z', 'Value': 29.93986},
 {'Timestamp': '2017-03-17T01:08:00Z', 'Value': 30.04601},
 {'Timestamp': '2017-03-17T01:09:00Z', 'Value': 30.09564},
 {'Timestamp': '2017-03-17T01:10:00Z', 'Value': 30.06812},
 {'Timest

#### 8) See same data as a Panda dataframe

In [12]:
df = pd.DataFrame(fv31_bottom_temp.json())
df

Unnamed: 0,Timestamp,Value
0,2017-03-17T00:14:07Z,30.5642
1,2017-03-17T00:22:36Z,30.2
2,2017-03-17T00:53:25Z,30.2
3,2017-03-17T00:57:55Z,30.06531
4,2017-03-17T00:58:48Z,29.7681
5,2017-03-17T00:59:00Z,29.73208
6,2017-03-17T01:00:00Z,29.67508
7,2017-03-17T01:01:01Z,29.54451
8,2017-03-17T01:02:00Z,29.55049
9,2017-03-17T01:03:00Z,29.65275


#### 9) Plot the time-series

In [13]:
layout = dict(title='Bottom Temperature')
data = [go.Scattergl(x = df['Timestamp'], y = df['Value'], mode='lines+markers')]
fig = go.FigureWidget(data=data, layout=layout)
fig

FigureWidget({
    'data': [{'mode': 'lines+markers',
              'type': 'scattergl',
              'uid': …

## Often for analysis what is needed is a single table with data from:

### 1- Multiple sensors, settings, or calculations, all data organized as row of observations
### 2- Multiple similar assets: consistent data shape
### 3- Data at regular interval: missing data is interpolated from existing values 

## OSIsoft answer for the above is the Dataview 

### Once a Dataview is defined, a CSV table can be obtained from an HTTP GET request with the startIndex/endIndex/interval of interest

Dataviews are somewhat complex to define, this is why they're defined and put in place by data engineer/scientist for other users to consume 

## Creation of the Dataviews, for fermenters 31 up to 36

* Status 201 from POST request indicates success
* Status 401 indicates unauthorized (try refreshing authorization header)
* Status 409 when a Dataview with same Id already exists (go to last cell of this notebook to perform a clean up)
* One Dataview per fermenter vessel 

In [None]:
# Valid Fermenter Vessel IDs are 31 up to 36
for fv_id in range(31, 37):
    dataviews_id = ocs_client.create_fermenter_dataview(fv_id, version='8')

## Get dataframe and time it (up to a minute, or longer on slow link, be patient)

All responses should have HTTP code [200] to indicate that everything is ok 

### Note the that resulting dataframe has about 28801 rows

**This is for 1 fermenter X 20 days X 1440 rows per day (24 hours at 1 minute interval)**

## Save data into CSV file locally 

## The file `beer_ocs_all_20days.csv` can be opened with Excel for inspection 

# ADF Analysis 

## First start by reading all fermenter vessels data, 300 days

### (Previously saved on file, takes a little while to read back)

### Note: the result CSV above has 2.6 million rows 

### List all unique Fermentation Batch IDs, filter out bad ones

### Prepare ADF curve plots over time, one curve per Fermentation Batch ID for the first 20 days

### Add a range slider 

With a few time range selectors: 1 day, 3 days and everything 

## Step 1: Extract only the relevant columns for ADF analysis. We only want to look at data while the fermentor is in the fermentation stage

## Step 2: ADF is an offline measurement, keep only data that corresponds to a new measurement

## Step 3: We want to analyze all fermentations together, so we need to look at elapsed time. Here we find the index that corresponds to the beginning of each fermentation

## Step 4: Create new columns (computed in step 5)

* **Elapsed**: elapsed time since fermentation starts

* **tdif**: time difference with row just before

* **adfdif**: ADF difference with row just before 

## Step 5: Compute values for 3 new columns

### Number of fermentations for brand Realtime Hops 
This brand has the most data to work with for the analysis part

### Only keep data for Realtime Hops and remove inconsistent data

* All elapsed time must be positive
* A fermentation cannot last more than 4 days, remove is elapsed is over

### Compute new columns with Elasped and tdif in seconds
Note: warnings are OK

### Plot current data 

### The last cell of the data preprocessing part
The output file `regression_ocs.csv` is the input of the analysis part which follows

-----
-----
# Test that triggers 500 error code
# Requesting Dataviews in parallel (6 of them) worth 20 days of data, 15 times for 300 days total
-----
-----

In [14]:
interval = '00:00:15'
init_time = parser.parse('2017-03-17T07:00')
num_days = 5
delta_time = dt.timedelta(days=num_days)
for i in range(0, 61):
    start_time =  init_time + i * delta_time 
    end_time = start_time + delta_time
    df = ocs_client.get_all_fermenters_dataviews(start_time.isoformat(), end_time.isoformat(), interval, version='8')
    df.to_csv(f'beer_ocs_dv_test2_{num_days}days_{i:02d}.csv', index=False)    
    print(f'@@ Iteration {i} completed')
print('@@@ Done')

Urls: ['https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/fermenter_vessels/Dataviews/DV8_FV31/preview/interpolated?startIndex=2017-03-17T07:00:00&endIndex=2017-03-22T07:00:00&interval=00:00:15&form=csvh&maxcount=200000', 'https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/fermenter_vessels/Dataviews/DV8_FV32/preview/interpolated?startIndex=2017-03-17T07:00:00&endIndex=2017-03-22T07:00:00&interval=00:00:15&form=csvh&maxcount=200000', 'https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/fermenter_vessels/Dataviews/DV8_FV33/preview/interpolated?startIndex=2017-03-17T07:00:00&endIndex=2017-03-22T07:00:00&interval=00:00:15&form=csvh&maxcount=200000', 'https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/fermenter_vessels/Dataviews/DV8_FV34/preview/interpolated?startIndex=2017-03-17T07:00:00&endIndex=2017-03-22T

In [9]:
import multiprocessing
interval = '00:00:15'
init_time = parser.parse('2017-03-17T07:00')
num_days = 5
delta_time = dt.timedelta(days=num_days)

def f(i, start_index, end_index, interval, version='8'):
    df = ocs_client.get_all_fermenters_dataviews(start_time.isoformat(), end_time.isoformat(), interval, version='8')
    df.to_csv(f'beer_ocs_dv_test2_{num_days}days_{i:02d}.csv', index=False)
    print(f'@@ Iteration {i} completed')
    return 

In [10]:
pool = multiprocessing.Pool() #use all available cores, otherwise specify the number you want as an argument
pool

<multiprocessing.pool.Pool at 0x7f5e2a900828>

Urls: ['https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/fermenter_vessels/Dataviews/DV8_FV31/preview/interpolated?startIndex=2017-03-22T07:00:00&endIndex=2017-03-27T07:00:00&interval=00:00:15&form=csvh&maxcount=200000', 'https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/fermenter_vessels/Dataviews/DV8_FV32/preview/interpolated?startIndex=2017-03-22T07:00:00&endIndex=2017-03-27T07:00:00&interval=00:00:15&form=csvh&maxcount=200000', 'https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/fermenter_vessels/Dataviews/DV8_FV33/preview/interpolated?startIndex=2017-03-22T07:00:00&endIndex=2017-03-27T07:00:00&interval=00:00:15&form=csvh&maxcount=200000', 'https://dat-b.osisoft.com/api/v1-preview/Tenants/d7847614-2e4a-4c1e-812b-e8de5fd06a0f/Namespaces/fermenter_vessels/Dataviews/DV8_FV34/preview/interpolated?startIndex=2017-03-22T07:00:00&endIndex=2017-03-27T

In [11]:
for i in range(0, 21):
    start_time =  init_time + i * delta_time 
    end_time = start_time + delta_time
    pool.apply_async(f, args=(i, start_time.isoformat(), end_time.isoformat(), interval, '8', ))
    pool.close()
    pool.join()

ValueError: Pool not running