In [1]:
#NOTE: we assume all time series data in PST time. Only time we are working with non PST data is when 
# we receive data and then convert. See get_occupancy function for how to do that.
# base imports
import time
from collections import defaultdict
%matplotlib inline
import pandas as pd
from copy import copy
from matplotlib import pyplot as plt
import matplotlib as mpl
import numpy as np
import pytz
tz = pytz.timezone("US/Pacific")
import doctest
import matplotlib.lines as plt_line
import matplotlib

In [2]:
# for fetching data
from xbos import get_client
from xbos.services.pundat import DataClient, timestamp, make_dataframe, merge_dfs
from xbos.services.hod import HodClient

In [3]:
# get a bosswave client
c = get_client(entity="/Users/Daniel/CIEE/SetUp/ciee_readonly.ent", agent="127.0.0.1:28589")
# get a HodDB client
hod = HodClient("ciee/hod", c)
# get an archiver client
archiver = DataClient(c,archivers=["ucberkeley"])

Saw [ciee/hod] HodDB 10 seconds 538.066 ms ago
Saw [ucberkeley] archiver 6 seconds 908.394 ms ago


In [11]:
# define a Brick query to get the temperature information
q = """SELECT ?x ?hvaczone ?uuid WHERE { 
?x bf:controls/bf:feeds ?hvaczone .
 ?hvaczone rdf:type brick:HVAC_Zone .
?x rdf:type/rdfs:subClassOf* brick:Thermostat .
  ?x bf:hasPoint ?status .
  ?status rdf:type brick:Thermostat_Status .
  ?status bf:uuid ?uuid .
};
"""
res = hod.do_query(q)['Rows']
zones = defaultdict(lambda: defaultdict(list))
for sensor in res:
    zones[sensor["?hvaczone"]]["Heating"].append(sensor["?uuid"])
print(zones)



defaultdict(<function <lambda> at 0x106ca0f50>, {'SouthZone': defaultdict(<type 'list'>, {'Heating': ['dfb2b403-fd08-3e9b-bf3f-18c699ce40d6']}), 'NorthZone': defaultdict(<type 'list'>, {'Heating': ['5e55e5b1-007b-39fa-98b6-ae01baa6dccd']}), 'CentralZone': defaultdict(<type 'list'>, {'Heating': ['187ed9b8-ee9b-3042-875e-088a08da37ae']}), 'EastZone': defaultdict(<type 'list'>, {'Heating': ['7e543d07-16d1-32bb-94af-95a01f4675f9']})})


In [5]:
# define a Brick query to get the occupancy information
q = """SELECT ?zone ?sensor_uuid WHERE {
   ?zone rdf:type brick:HVAC_Zone .
   ?zone bf:hasPart ?room .
   ?sensor bf:isLocatedIn ?room .
   ?sensor rdf:type/rdfs:subClassOf* brick:Occupancy_Sensor .
   ?sensor bf:uuid ?sensor_uuid .
};
"""
res = hod.do_query(q)['Rows']
for sensor in res:
    zones[sensor['?zone']]["Occupancy"].append(sensor['?sensor_uuid'])

In [6]:
test_zones = {"Test": {"Occupancy": pd.DataFrame([[0, 0, 0], [1,0,0]], columns=["f", "s", "t"], index=[0, 1]), "Heating":pd.DataFrame([[1], [0]], columns=["f"], index=[0,1])}}

In [7]:
# timestamps for data retrieval
start = '"2017-09-01 08:00:00 MST"'
end = '"2017-09-08 08:00:00 MST"'

test = False
if test:
    zones = test_zones

# loop to get data for time spent while (heating and occupied), (heating and not occupied), (not heating and occupied),
# (not heating and not occupied)
print("Loop has started")
zone_ho = {}
for zone, values in zones.items():
    t = time.time()
    # unpack data from the values in dictionary
    heating = values["Heating"]
    occupancy = values["Occupancy"]
    print(occupancy)
    # get data from archiver
    if not test:
        occupancy_data = make_dataframe(archiver.data_uuids(occupancy, start, end))

        heating_data = make_dataframe(archiver.data_uuids(heating, start, end))
        
        # merge the retrieved data into an acceptable dfs
        heating_dfs = merge_dfs(heating_data, resample="30S", do_max=True)
        occupancy_dfs = merge_dfs(occupancy_data, resample="30S", do_max=True)
        # set the occupany dfs to reflect is someone is in (1) or not (0)
        occupancy_dfs = 1*(occupancy_dfs > 0)
    if test:
        occupancy_dfs = occupancy
        heating_dfs = heating
    # change the column names to make them more easy to identify 
    occupancy_dfs.columns = occupancy_dfs.columns.map(lambda x: "occupancy_"+str(x))
    heating_dfs.columns = heating_dfs.columns.map(lambda x: "heating_" + str(x))
    # concatinating the two dfs. Might be not so good because of the timeseries. Might have to resample i suppose or
    # do something about it.
    data = pd.concat([heating_dfs, occupancy_dfs], axis=1)
    # function for a row of the data. Returns the four information sets accordingly
    criteria_heating = data.columns.map(lambda y: "heating" in y).tolist()
    criteria_occupancy = data.columns.map(lambda z: "occupancy" in z).tolist()
    def f(x):
        occupied = any(x[criteria_occupancy])
        heating = any(x[criteria_heating])
        return pd.Series([occupied and heating, not occupied and heating, occupied and not heating, not occupied and not heating])
    heating_occupancy = data.apply(f, axis=1)
    heating_occupancy.columns = pd.Series(["o&h", "no&h", "o&nh", "no&nh"])
    zone_ho[zone] = heating_occupancy
    print("Time for " + str(zone) + " was: " + str(time.time()-t))
print("Done with cleaning up data.")
    

Loop has started
['d919a14e-3ebd-3e5b-9727-0df54f287032', 'cab606d0-6d14-3aff-a0c2-e862ae297f22', 'c27adbad-b1b5-342c-91eb-a6709d365871', 'bdbbbf7c-21c3-3b61-8469-8516a8c666dd', 'afed35be-ef70-341a-8bf4-565d815bfd90', 'aefa14ef-a13a-355b-9b87-51511db7f541', '989ed503-9aa2-310e-abd3-464f6169e175', '97ba99af-869b-3c43-94e3-93040c9bdd33', '611ac5e9-33a5-3273-9fa6-548d3ad59481', '497145c4-2087-3e89-9c8f-f54b87dfb016', '47e37419-5091-3877-a476-bb6f942e93af', '2f5c888d-c0c6-33b4-9c30-d609f1e16fea', '2b3f9905-3bb7-302a-9545-7bfe2eb63547', '242cde13-7d25-306a-bfc8-918ad7bb26d6', '23d2afce-4c86-37c5-b1c2-f258758c3263', '0c51371e-6029-3302-984d-b71d850e899a', '00f45f05-08f9-3def-a977-357493fad61e']


TimeoutException: Query of select data in ("2017-09-01 08:00:00 MST", "2017-09-08 08:00:00 MST") where uuid = "d919a14e-3ebd-3e5b-9727-0df54f287032" or uuid = "cab606d0-6d14-3aff-a0c2-e862ae297f22" or uuid = "c27adbad-b1b5-342c-91eb-a6709d365871" or uuid = "bdbbbf7c-21c3-3b61-8469-8516a8c666dd" or uuid = "afed35be-ef70-341a-8bf4-565d815bfd90" or uuid = "aefa14ef-a13a-355b-9b87-51511db7f541" or uuid = "989ed503-9aa2-310e-abd3-464f6169e175" or uuid = "97ba99af-869b-3c43-94e3-93040c9bdd33" or uuid = "611ac5e9-33a5-3273-9fa6-548d3ad59481" or uuid = "497145c4-2087-3e89-9c8f-f54b87dfb016" or uuid = "47e37419-5091-3877-a476-bb6f942e93af" or uuid = "2f5c888d-c0c6-33b4-9c30-d609f1e16fea" or uuid = "2b3f9905-3bb7-302a-9545-7bfe2eb63547" or uuid = "242cde13-7d25-306a-bfc8-918ad7bb26d6" or uuid = "23d2afce-4c86-37c5-b1c2-f258758c3263" or uuid = "0c51371e-6029-3302-984d-b71d850e899a" or uuid = "00f45f05-08f9-3def-a977-357493fad61e" timed out

In [None]:
for zone, heating_occupancy in zone_ho.items():
    time_spent = heating_occupancy.sum(axis=0) * 0.5 / 60
    print("Times for: " + str(zone) + " in hours:")
    print(time_spent)
#     print("Ratio: "+str(time))
    figure = heating_occupancy.resample("1H").max().plot(legend=True, figsize=(15,8))
    figure.set_title(zone+ " occupancy&heating")
    figure.set_ylabel("True/False")