# HackDavis python library tests

## OCS-HackDavis Package Installation 

In [1]:
!pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ ocs-hackdavis==0.28.0

Looking in indexes: https://test.pypi.org/simple/, https://pypi.org/simple/


## Import functions from package `ocs_hackdavis`

### They are all described in this notebook with examples

In [2]:
from ocs_hackdavis import (
    ucdavis_buildings,  # list of campus buildings
    ucdavis_ceeds_of,   # list of CEED element of a building (Electricity, Steam, Chilled Water, etc)
    ucdavis_streams_of, # The list of all OCS data streams for a building and CEED pair  
    ucdavis_building_metadata,  # Metadata for a building: building code, lat/long, usage, etc.  
    ocs_stream_interpolated_data,  # Interpolated data from a stream given a time range + interpolation interval
    ucdavis_outside_temperature,  # Outside temperature at UC Davis for a given a time range + interpolation interval
)

## List of buildings

In [3]:
buildings = ucdavis_buildings()
len(buildings), buildings[:25]  # display first 25

(159,
 ['ARC Pavilion',
  'Academic Surge Building',
  'Activities and Recreation Center',
  'Advanced Materials Research Laboratory',
  'Advanced Transportation Infrastructure Research Center',
  'Aggie Stadium',
  'Agronomy Field Laboratory',
  'Animal Building',
  'Animal Resource Service J1',
  'Animal Resource Service M3',
  'Animal Resource Service N1',
  'Ann E. Pitzer Center',
  'Antique Mechanics Trailer',
  'Aquatic Biology & Environmental Science Bldg',
  'Art Building',
  'Art Building Annex',
  'Art, Music, Wright Halls',
  'Asmundson Annex',
  'Asmundson Hall',
  'Bainer Hall',
  'Bowley Head House',
  'Briggs Hall',
  'California Hall',
  'Campus Data Center',
  'Cellular Biology Laboratory'])

## Function `ucdavis_ceeds_of` returns list of CEEDs for a given building

### Display them for all buildings

In [4]:
for num, building in enumerate(ucdavis_buildings(), 1):
    print(f"[{num}] Building: {building} ==> CEEDS: {ucdavis_ceeds_of(building)}")

[1] Building: ARC Pavilion ==> CEEDS: ['Baseline Electricity', 'ChilledWater', 'Electricity', 'Steam']
[2] Building: Academic Surge Building ==> CEEDS: ['ChilledWater', 'Electricity', 'Steam']
[3] Building: Activities and Recreation Center ==> CEEDS: ['Baseline ChilledWater', 'Baseline Electricity', 'Baseline Steam', 'ChilledWater', 'Electricity', 'Steam']
[4] Building: Advanced Materials Research Laboratory ==> CEEDS: ['Baseline Electricity', 'Electricity']
[5] Building: Advanced Transportation Infrastructure Research Center ==> CEEDS: ['Baseline Electricity', 'Electricity']
[6] Building: Aggie Stadium ==> CEEDS: ['Electricity']
[7] Building: Agronomy Field Laboratory ==> CEEDS: ['Electricity']
[8] Building: Animal Building ==> CEEDS: ['Electricity']
[9] Building: Animal Resource Service J1 ==> CEEDS: ['Electricity', 'Natural Gas']
[10] Building: Animal Resource Service M3 ==> CEEDS: ['Electricity']
[11] Building: Animal Resource Service N1 ==> CEEDS: ['Electricity']
[12] Building: An

## Get the list of available streams of a building

#### Default CEED is Electricity

In [5]:
ucdavis_streams_of("Activities and Recreation Center")

{'AnnualCost': 'PI_uni-pida-vm0_403',
 'AnnualUsage': 'PI_uni-pida-vm0_418',
 'Cumulative Use': 'PI_uni-pida-vm0_415',
 'Demand': 'PI_uni-pida-vm0_532_ds2m',
 'Demand_kBtu': 'PI_uni-pida-vm0_419',
 'EUI': 'PI_uni-pida-vm0_182',
 'Electricity_EUI': 'PI_uni-pida-vm0_238',
 'MonthlyUsage': 'PI_uni-pida-vm0_417',
 'Rollover Check': 'PI_uni-pida-vm0_239',
 'Rollover Count Month': 'PI_uni-pida-vm0_241',
 'Rollover Count Year': 'PI_uni-pida-vm0_242'}

## Get the specific stream of a building

In [6]:
ucdavis_streams_of("Activities and Recreation Center")["Demand"]

'PI_uni-pida-vm0_532_ds2m'

## Import for OCS Sample Library and other necessary modules

In [7]:
from ocs_sample_library_preview import OCSClient
import configparser
import io
import json
import pandas as pd

## Standard configuration file parsing and OCS client object 

#### Reference: https://github.com/osisoft/OSI-Samples-OCS/blob/master/basic_samples/DataViews/Python3/program.py#L150

## IMPORTANT: REPLACE CLIENT ID + SECRET BELOW BEFORE RUNNING

In [8]:
config_text = u"""
; IMPORTANT: replace these values with those provided by OSIsoft
[Configurations]
Namespace = UC__Davis

[Access]
Resource = https://dat-b.osisoft.com
Tenant = 65292b6c-ec16-414a-b583-ce7ae04046d4
ApiVersion = v1-preview

[Credentials]
ClientId = REPLACE_ME
ClientSecret = REPLACE_ME
"""

In [9]:
config = configparser.ConfigParser(allow_no_value=True)
config.read_file(io.StringIO(config_text))

ocs_client = OCSClient(
    config.get("Access", "ApiVersion"),
    config.get("Access", "Tenant"),
    config.get("Access", "Resource"),
    config.get("Credentials", "ClientId"),
    config.get("Credentials", "ClientSecret"),
)

namespace_id = config.get("Configurations", "Namespace")
print(f"namespace_id: '{namespace_id}'")

namespace_id: 'UC__Davis'


## Getting interpolated data from a stream

In [10]:
# Step 1: get the stream Id 
stream_id = ucdavis_streams_of("Activities and Recreation Center")["Demand"]

# Step 2) request interpolated data
# NOTE 1: difference between endIndex and startIndex should be 31 days or less
# NOTE 2: interpolation interval cannot be less than 2 minutes
result = ocs_stream_interpolated_data(
    ocs_client,
    namespace_id,
    stream_id,
    start="2017-02-01", # UTC 
    end="2017-03-01",
    interval=2,  # 2 minutes
)
len(result), result[:100]  # display first 100 rows 

(20161,
 [{'Timestamp': '2017-02-01T00:00:00Z', 'Value': 254.05560302734375},
  {'Timestamp': '2017-02-01T00:02:00Z', 'Value': 262.8854675292969},
  {'Timestamp': '2017-02-01T00:04:00Z', 'Value': 249.9988250732422},
  {'Timestamp': '2017-02-01T00:06:00Z', 'Value': 252.9053192138672},
  {'Timestamp': '2017-02-01T00:08:00Z', 'Value': 252.3594512939453},
  {'Timestamp': '2017-02-01T00:10:00Z', 'Value': 248.40652465820312},
  {'Timestamp': '2017-02-01T00:12:00Z', 'Value': 249.87322998046875},
  {'Timestamp': '2017-02-01T00:14:00Z', 'Value': 245.76869201660156},
  {'Timestamp': '2017-02-01T00:16:00Z', 'Value': 249.33018493652344},
  {'Timestamp': '2017-02-01T00:18:00Z', 'Value': 249.44326782226562},
  {'Timestamp': '2017-02-01T00:20:00Z', 'Value': 256.346923828125},
  {'Timestamp': '2017-02-01T00:22:00Z', 'Value': 243.9831085205078},
  {'Timestamp': '2017-02-01T00:24:00Z', 'Value': 244.44805908203125},
  {'Timestamp': '2017-02-01T00:26:00Z', 'Value': 245.53001403808594},
  {'Timestamp': '20

## Transform result into a Pandas series

In [11]:
demand1 = pd.read_json(json.dumps(result)).set_index('Timestamp')
demand1

Unnamed: 0_level_0,Value
Timestamp,Unnamed: 1_level_1
2017-02-01 00:00:00+00:00,254.055603
2017-02-01 00:02:00+00:00,262.885468
2017-02-01 00:04:00+00:00,249.998825
2017-02-01 00:06:00+00:00,252.905319
2017-02-01 00:08:00+00:00,252.359451
...,...
2017-02-28 23:52:00+00:00,251.217514
2017-02-28 23:54:00+00:00,250.554886
2017-02-28 23:56:00+00:00,251.206818
2017-02-28 23:58:00+00:00,250.461777


## Get data for next month

In [12]:
result = ocs_stream_interpolated_data(
    ocs_client,
    namespace_id,
    stream_id,
    start="2017-03-01", # UTC 
    end="2017-04-01",
    interval=2,  # 2 minutes
)

## Transform into a Pandas time series and append to first month data

In [13]:
demand2 = pd.read_json(json.dumps(result)).set_index('Timestamp')
demand_2m = demand1.append(demand2)
len(demand_2m), demand_2m

(42482,                                 Value
 Timestamp                            
 2017-02-01 00:00:00+00:00  254.055603
 2017-02-01 00:02:00+00:00  262.885468
 2017-02-01 00:04:00+00:00  249.998825
 2017-02-01 00:06:00+00:00  252.905319
 2017-02-01 00:08:00+00:00  252.359451
 ...                               ...
 2017-03-31 23:52:00+00:00  222.544403
 2017-03-31 23:54:00+00:00  223.602707
 2017-03-31 23:56:00+00:00  223.262390
 2017-03-31 23:58:00+00:00  226.604126
 2017-04-01 00:00:00+00:00  224.027908
 
 [42482 rows x 1 columns])

## There is a repeated row at the boundary of the two appended series

More specifically, the bottom row of first series and first row of second series. Line below shows it:

In [14]:
demand_2m.loc[demand_2m.index.duplicated()]

Unnamed: 0_level_0,Value
Timestamp,Unnamed: 1_level_1
2017-03-01 00:00:00+00:00,258.662231


## Remove duplicated row

Difference in lenght (== # of rows) should be 1 

In [15]:
new_demand = demand_2m.loc[~demand_2m.index.duplicated(keep="first")]
len(demand_2m) - len(new_demand)

1

## Getting a full year of data

### Reusing what we've learn so far 

In [16]:
# Create an empty series
demand = pd.Series()
demand

Series([], dtype: float64)

In [17]:
for start_month in range(1, 12):
    start_date = f"2017-{start_month}-01"
    print(f"> processing {start_date}")
    result = ocs_stream_interpolated_data(
        ocs_client,
        namespace_id,
        stream_id,
        start=start_date,  # UTC
        end=f"2017-{start_month+1}-01",
        interval=2,  # 2 minutes
    )
    demand = demand.append(
        pd.read_json(json.dumps(result)).set_index("Timestamp", drop=True)
    )

# Note there is an additional column "0" with NaN (not a number), it will be remove in the next cell
len(demand), demand

> processing 2017-1-01
> processing 2017-2-01


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


> processing 2017-3-01
> processing 2017-4-01
> processing 2017-5-01
> processing 2017-6-01
> processing 2017-7-01
> processing 2017-8-01
> processing 2017-9-01
> processing 2017-10-01
> processing 2017-11-01


(240250,                             0       Value
 2017-01-01 08:02:00+00:00 NaN  203.602783
 2017-01-01 08:04:00+00:00 NaN  196.223251
 2017-01-01 08:06:00+00:00 NaN  195.362305
 2017-01-01 08:08:00+00:00 NaN  195.213913
 2017-01-01 08:10:00+00:00 NaN  197.648666
 ...                        ..         ...
 2017-11-30 23:52:00+00:00 NaN  228.839342
 2017-11-30 23:54:00+00:00 NaN  228.842742
 2017-11-30 23:56:00+00:00 NaN  228.846142
 2017-11-30 23:58:00+00:00 NaN  228.849542
 2017-12-01 00:00:00+00:00 NaN  228.852943
 
 [240250 rows x 2 columns])

In [18]:
# remove duplicate rows and keep only column named "Values"
demand = demand.loc[~demand.index.duplicated(keep="first")]["Value"]
len(demand), demand

(240240, 2017-01-01 08:02:00+00:00    203.602783
 2017-01-01 08:04:00+00:00    196.223251
 2017-01-01 08:06:00+00:00    195.362305
 2017-01-01 08:08:00+00:00    195.213913
 2017-01-01 08:10:00+00:00    197.648666
                                 ...    
 2017-11-30 23:52:00+00:00    228.839342
 2017-11-30 23:54:00+00:00    228.842742
 2017-11-30 23:56:00+00:00    228.846142
 2017-11-30 23:58:00+00:00    228.849542
 2017-12-01 00:00:00+00:00    228.852943
 Name: Value, Length: 240240, dtype: float64)

## Building metadata

In [19]:
ucdavis_building_metadata(ocs_client, namespace_id, "Activities and Recreation Center")

{'Annual Cost': 151156.0,
 'BuildingName': 'ARC',
 'kWh Rate': 0.0687,
 'Prefix': 'Activities_and_Recreation_Center_MSB',
 'Rollover': 10000000.0,
 'CAAN': 4799.0,
 'Construction Date': '04/15/2002',
 'Display Name': 'Activities and Recreation Center',
 'Latitude': 38.5428969596,
 'Longitude': -121.759644393,
 'Maintained Gross Sq. Ft.': 158120.0,
 'Primary Usage (Type)': 'REC - Athletics & Recreation'}

## Get outside temperature at UC Davis

### Data for this stream starts on 2017-04-01, 19:38

In [20]:
# display only first 100 rows
ucdavis_outside_temperature(ocs_client, namespace_id, "2017-04-01", "2017-05-01", 2)[:100]

[{'Timestamp': '2017-04-02T19:38:00Z', 'Value': 73.84828},
 {'Timestamp': '2017-04-02T19:40:00Z', 'Value': 74.04701},
 {'Timestamp': '2017-04-02T19:42:00Z', 'Value': 74.13842},
 {'Timestamp': '2017-04-02T19:44:00Z', 'Value': 74.0657959},
 {'Timestamp': '2017-04-02T19:46:00Z', 'Value': 74.32633},
 {'Timestamp': '2017-04-02T19:48:00Z', 'Value': 74.61136},
 {'Timestamp': '2017-04-02T19:50:00Z', 'Value': 74.52496},
 {'Timestamp': '2017-04-02T19:52:00Z', 'Value': 74.7417145},
 {'Timestamp': '2017-04-02T19:54:00Z', 'Value': 75.06158},
 {'Timestamp': '2017-04-02T19:56:00Z', 'Value': 75.67546},
 {'Timestamp': '2017-04-02T19:58:00Z', 'Value': 75.66438},
 {'Timestamp': '2017-04-02T20:00:00Z', 'Value': 75.3251953},
 {'Timestamp': '2017-04-02T20:02:00Z', 'Value': 75.10666},
 {'Timestamp': '2017-04-02T20:04:00Z', 'Value': 75.3027649},
 {'Timestamp': '2017-04-02T20:06:00Z', 'Value': 75.17714},
 {'Timestamp': '2017-04-02T20:08:00Z', 'Value': 75.3776245},
 {'Timestamp': '2017-04-02T20:10:00Z', 'Value'

## Test error code 

### Bad argument of the right type

In [21]:
# should fail
ucdavis_ceeds_of("Bad Building")

