In [47]:
import json
import os
import numpy as np
import math
from typing import Dict, List, Tuple

from datetime import timedelta
from config import *
from utils import *
from main import *

import plotly.graph_objects as go
from plotly.subplots import make_subplots

data = test_umass(test_project="HomeG/2016")
ctx_info = ContextAccessor({
            TIME_CTX: {
                "range" : (0, 24*60),
                "interval" : 20,
            },
            # "humidity#NUM" : {
            #     "range" : (0., 1.0),
            #     "interval" : 0.1,
            # },
            WEEKDAY_CTX: {
                "range": (0, 6.1),
                "interval": 1,
            },
        })
print(ctx_info.get_all_ctx_ordered())
# on_rate = np.full(ctx_info.get_ctx_space_shape(), -0.1)

# for dis in data["WashingMachine"]:
#     on_rate[dis["coor"]] = dis["distribution"][1] / (dis["distribution"][0] + dis["distribution"][1])

# fig = go.Figure(data=go.Heatmap(z=np.transpose(on_rate)))
# fig.show()

DEBUG:root:The number of device events from processed file: {'LivingLights': 1951, 'HomeOffice': 1175, 'Range': 225, 'Microwave': 2915}
DEBUG:root:The number of context events from processed file: {'apparentTemperature#NUM': 8764, 'temperature#NUM': 8760, 'humidity#NUM': 7653, 'summary#CAT': 2110}
['min_of_day#NUM', 'day_of_week#CAT']


In [32]:
on_rate = np.full(ctx_info.get_ctx_space_shape(), -0.1)
xx = []
for dis in data["Microwave"]:
    on_rate[dis["coor"]] = dis["distribution"][1] / (dis["distribution"][0] + dis["distribution"][1])
    xx.append({"coor": dis["coor"], "on": dis["distribution"][1] / (dis["distribution"][0] + dis["distribution"][1])})
# print(sorted(xx, key=lambda x: -x["on"])[0:20])

fig = go.Figure(data=go.Heatmap(z=np.transpose(on_rate)))
fig.show()



In [35]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

from sklearn import tree
reg_x = []
reg_y = []
weight = []
for dis in data["Microwave"]:
    cnt = sum(dis["distribution"])
    weight.append(cnt)
    reg_x.append(dis["coor"])
    reg_y.append([
        x / cnt
        for x in dis["distribution"][1:]
    ])

X_train, X_test, y_train, y_test = train_test_split(
    reg_x, reg_y, test_size=0.33, random_state=42)

clf = DecisionTreeRegressor()
path = clf.cost_complexity_pruning_path(X_train, y_train)

regressor = DecisionTreeRegressor(ccp_alpha=20e-6)

regressor.fit(reg_x, reg_y, sample_weight=weight)

on_rate_group = np.full(ctx_info.get_ctx_space_shape(), 0.)
for i in range(ctx_info.get_ctx_space_shape()[0]):
    for j in range(ctx_info.get_ctx_space_shape()[1]):
        on_rate_group[i,j] = regressor.predict([[i,j]])

fig = go.Figure(data=go.Heatmap(z=np.transpose(on_rate_group)))
fig.show()

In [55]:

groups = {}
box = []
leaves = regressor.apply(reg_x)
for i,l in enumerate(leaves):
    if l not in groups:
        groups[l] = {"coors": [reg_x[i]], "tot_dis": np.array(reg_y[i]), "cnt": 1}
    else:
        groups[l]["coors"].append(reg_x[i])
        groups[l]["tot_dis"] += reg_y[i]
        groups[l]["cnt"] += 1
for g, points in groups.items():
    box.append({})
    box[-1]["box"] = bounding_box(points["coors"])
    box[-1]["dis"] = points["tot_dis"] / points["cnt"] 


on_rate_group = np.full(ctx_info.get_ctx_space_shape(), -0.1)
for i in range(ctx_info.get_ctx_space_shape()[0]):
    for j in range(ctx_info.get_ctx_space_shape()[1]):
        test = False
        for b in box:
            if check_in_box(b["box"], [i,j]):
                test = True
                on_rate_group[i,j] = b["dis"][0]
                break
        if not test:
            print((i,j))
fig = go.Figure(data=go.Heatmap(z=np.transpose(on_rate_group)))
fig.show()

In [20]:
import matplotlib.pyplot as plt

ccp_alphas, impurities = path.ccp_alphas, path.impurities
fig = go.Figure(data=go.Scatter(x=ccp_alphas[:-1], y=impurities[:-1], mode='lines+markers'))
fig.show()


In [21]:
clfs = []
for ccp_alpha in ccp_alphas:
    clf = DecisionTreeRegressor(ccp_alpha=ccp_alpha)
    clf.fit(X_train, y_train)
    clfs.append(clf)
clfs = clfs[:-1]
ccp_alphas = ccp_alphas[:-1]

node_counts = [clf.get_n_leaves() for clf in clfs]
depth = [clf.tree_.max_depth for clf in clfs]
fig = make_subplots(rows=2, cols=1)
fig.add_trace(
    go.Scatter(x=ccp_alphas, y=node_counts, mode='lines+markers'),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x=ccp_alphas, y=depth, mode='lines+markers'),
    row=2, col=1
)


In [22]:
train_scores = [clf.score(X_train, y_train) for clf in clfs]
test_scores = [clf.score(X_test, y_test) for clf in clfs]

fig = go.Figure()
fig.add_trace(
    go.Scatter(x=ccp_alphas, y=train_scores, mode='lines+markers', name="train"))
fig.add_trace(
    go.Scatter(x=ccp_alphas, y=test_scores, mode='lines+markers', name="test"))

fig.show()

In [4]:
data = test_umass(test_project="HomeD/2016")
on_rate = np.full(ctx_info.get_ctx_space_shape(), -0.1)
for dis in data["Microwave"]:
    on_rate[dis["coor"]] = dis["distribution"][1] / (dis["distribution"][0] + dis["distribution"][1])


fig = go.Figure(data=go.Heatmap(z=np.transpose(on_rate)))
fig.show()

DEBUG:root:The number of device events from processed file: {}
DEBUG:root:The number of context events from processed file: {'apparentTemperature#NUM': 8764, 'temperature#NUM': 8747, 'humidity#NUM': 7703, 'summary#CAT': 2578}


NameError: name 'ctx_info' is not defined

In [None]:
data = test_umass(test_project="HomeA/2016")
on_rate = np.full(ctx_info.get_ctx_space_shape(), -0.1)
for dis in data["Microwave [kW]"]:
    on_rate[dis["coor"]] = dis["distribution"][1] / (dis["distribution"][0] + dis["distribution"][1])


fig = go.Figure(data=go.Heatmap(z=np.transpose(on_rate)))
fig.show()

In [30]:
from sklearn.neighbors import KDTree, BallTree

def find_set(uf, a):
    if uf[a] != a:
        uf[a] = find_set(uf, uf[a])
    return uf[a]

def union_set(uf, a, b):
    if find_set(uf, a) != find_set(uf, b):
        uf[find_set(uf, a)] = uf[b]

print(len(unit_of_int))
tree = KDTree(unit_of_int, leaf_size=5)   
clss = [x for x in range(len(unit_of_int))]  

dist, ind = tree.query(unit_of_int, k=4, sort_results=True)  
for idx, unit in enumerate(unit_of_int):
    prob_unit = final_space[unit] / sum(final_space[unit])
    d_i = dist[idx]
    ind_i = ind[idx]
    for i in range(4):
        if d_i[i] > 1:
            continue
        cell = unit_of_int[ind_i[i]]
        prob_ngbr = final_space[cell] / sum(final_space[cell])
        if np.linalg.norm(prob_unit - prob_ngbr) < 0.05:
            union_set(clss, idx, ind_i[i])
# print(clss)
core = []
u = np.unique([find_set(clss, i) for i in range(len(clss))])
print(u)
a = np.full(space_shape, u[0]-1)
for i, unit in enumerate(unit_of_int):
    a[unit] = find_set(clss, i)

fig = go.Figure(data=go.Heatmap(z=a))
fig.show()

528
[  1  73 117 134 138 140 168 187 209 211 212 246 259 261 296 303 320 323
 337 341 350 355 356 366 367 387 400 401 402 404 409 414 417 425 426 427
 432 448 449 461 462 463 464 465 466 479 480 481 487 489 497 513 514 519
 521 525 527]


In [29]:

time_delta = 20
d_time_delta = timedelta(minutes=10)
humidity_delta = 0.1
on_state = [[],[]]
off_state = [[],[]]
space_shape = (math.ceil(1.0/humidity_delta), math.ceil(24*60/time_delta))
final_space = np.zeros([space_shape[0], space_shape[1], 2])
data_x = []
data_y = []
min_obs = 10
unit_of_int = []
def process_snapshot(cur_time, ctx_snapshot, d_state):
    cell_idx = (int(ctx_snapshot['humidity#NUM']/humidity_delta), int(ctx_snapshot['min_of_day#NUM']/time_delta))

    if d_state == "on":
        final_space[cell_idx[0], cell_idx[1], 0] += 1
    else:
        final_space[cell_idx[0], cell_idx[1], 1] += 1
    if final_space[cell_idx[0], cell_idx[1], 1] + final_space[cell_idx[0], cell_idx[1], 0] == min_obs:
        unit_of_int.append(cell_idx)
    data_x.append([ctx_snapshot['min_of_day#NUM'], ctx_snapshot['humidity#NUM']])
    data_y.append(0 if d_state=="on" else 1)

def tag_device(ctx_evts, device_evts):
    for d, d_evts in device_evts.items():
        if d != "Microwave [kW]":
            continue
        cur_time = d_evts[0][1]
        end_time = d_evts[-1][1]
        c_evt_idx = {c: 0 for c in ctx_evts}
        ctx_snapshot = {
            c: ctx_evts[c][0][0]
            for c in ctx_evts
        }
        cur_evt_idx = 0
        d_state = d_evts[0][0]
        while cur_time < end_time:
            for c, c_evts in ctx_evts.items():
                while c_evt_idx[c] < len(c_evts) and c_evts[c_evt_idx[c]][1] <= cur_time:
                    ctx_snapshot[c] = c_evts[c_evt_idx[c]][0]
                    c_evt_idx[c] += 1
            # Add additional contextes
            ctx_snapshot[TIME_CTX] = datetime_to_mins(cur_time)
            ctx_snapshot[WEEKDAY_CTX] = cur_time.date().weekday()  
            process_snapshot(cur_time, ctx_snapshot, d_state)  

            if d_evts[cur_evt_idx + 1][1] <= cur_time + d_time_delta:
                cur_time = d_evts[cur_evt_idx + 1][1]
                cur_evt_idx += 1
                d_state = d_evts[cur_evt_idx][0]
            else:
                cur_time += d_time_delta
            

def test_umass_2():
    test_project = "HomeF/2016"
    project_path = os.path.join(DATA_ROOT, UMASS_ROOT, test_project)
    input_file = os.path.join(project_path, PROCESSED_FILENAME)
    with open(input_file) as f:
        json_str = f.read()
        (ctx_evts, device_evts) = json.loads(json_str, object_hook=json_datetime_hook)
    logging.debug("The number of device events from processed file: {}".format(
        {x: len(device_evts[x]) for x in device_evts}))
    logging.debug("The number of context events from processed file: {}".format(
            {x: len(ctx_evts[x]) for x in ctx_evts}))
    
    tag_device(ctx_evts, device_evts)

test_umass_2()


on_rate = np.zeros(space_shape)
total_hit = np.zeros(space_shape)

count = 0
for i in range(space_shape[0]):
    for j in range(space_shape[1]):
        total_hit[i,j] = final_space[i,j][0] +  final_space[i,j][1]
        if final_space[i,j][0] +  final_space[i,j][1] == 0:
            count+=1
            continue
        on_rate[i,j] = final_space[i,j][0] / (final_space[i,j][0] +  final_space[i,j][1])  
print(len(unit_of_int))
print(space_shape[0]*space_shape[1])
fig = go.Figure(data=go.Heatmap(z=on_rate))
# fig = go.Figure(data=go.Heatmap(z=total_hit))
fig.show()

DEBUG:root:The number of device events from processed file: {'Microwave [kW]': 5013, 'Washing_Machine [kW]': 589}
DEBUG:root:The number of context events from processed file: {'apparentTemperature#NUM': 8743, 'humidity#NUM': 7669, 'summary#CAT': 1892}
528
720


In [85]:
# LDA not working
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import sklearn

clss = LDA()
data_x = sklearn.preprocessing.MinMaxScaler().fit_transform(data_x)

x = clss.fit_transform(data_x, data_y)
print(data_x[10])
print(clss.transform(data_x[10:11]))
print(clss.coef_)

[0.13898541 0.58823529]
[[0.49720339]]
[[-0.45413627  0.88417517]]


In [17]:
from scipy.spatial import ConvexHull
import copy

points = np.random.rand(30, 2)   # 30 random points in 2-D
hull = ConvexHull(points, incremental=True)
newp = np.random.rand(10,2)
print(len(hull.points))
hull2 = copy.deepcopy(hull)


30


TypeError: self._qh cannot be converted to a Python object for pickling