In [60]:
import json
import os
import numpy as np
import math
from typing import Dict, List, Tuple

from datetime import timedelta
from config import *
from utils import *

time_delta = 20
d_time_delta = timedelta(minutes=time_delta)
humidity_delta = 0.1
on_state = [[],[]]
off_state = [[],[]]
space_shape = (math.ceil(1.0/humidity_delta), math.ceil(24*60/time_delta))
final_space = np.zeros([space_shape[0], space_shape[1], 2])
data_x = []
data_y = []
min_obs = 10
unit_of_int = []
def process_snapshot(cur_time, ctx_snapshot, d_state):
    cell_idx = (int(ctx_snapshot['humidity#NUM']/humidity_delta), int(ctx_snapshot['min_of_day#NUM']/time_delta))

    if d_state == "on":
        final_space[cell_idx[0], cell_idx[1], 0] += 1
    else:
        final_space[cell_idx[0], cell_idx[1], 1] += 1
    if final_space[cell_idx[0], cell_idx[1], 1] + final_space[cell_idx[0], cell_idx[1], 0] == min_obs:
        unit_of_int.append(cell_idx)
    data_x.append([ctx_snapshot['min_of_day#NUM'], ctx_snapshot['humidity#NUM']])
    data_y.append(0 if d_state=="on" else 1)

def tag_device(ctx_evts, device_evts):
    for d, d_evts in device_evts.items():
        cur_time = d_evts[0][1]
        end_time = d_evts[-1][1]
        c_evt_idx = {c: 0 for c in ctx_evts}
        ctx_snapshot = {
            c: ctx_evts[c][0][0]
            for c in ctx_evts
        }
        cur_evt_idx = 0
        d_state = d_evts[0][0]
        while cur_time < end_time:
            for c, c_evts in ctx_evts.items():
                while c_evt_idx[c] < len(c_evts) and c_evts[c_evt_idx[c]][1] <= cur_time:
                    ctx_snapshot[c] = c_evts[c_evt_idx[c]][0]
                    c_evt_idx[c] += 1
            # Add additional contextes
            ctx_snapshot[TIME_CTX] = datetime_to_mins(cur_time)
            ctx_snapshot[WEEKDAY_CTX] = cur_time.date().weekday()  
            process_snapshot(cur_time, ctx_snapshot, d_state)  

            if d_evts[cur_evt_idx + 1][1] <= cur_time + d_time_delta:
                cur_time = d_evts[cur_evt_idx + 1][1]
                cur_evt_idx += 1
                d_state = d_evts[cur_evt_idx][0]
            else:
                cur_time += d_time_delta
            

def test_umass():
    test_project = "HomeF/2016"
    project_path = os.path.join(DATA_ROOT, UMASS_ROOT, test_project)
    input_file = os.path.join(project_path, PROCESSED_FILENAME)
    with open(input_file) as f:
        json_str = f.read()
        (ctx_evts, device_evts) = json.loads(json_str, object_hook=json_datetime_hook)
    logging.debug("The number of device events from processed file: {}".format(
        {x: len(device_evts[x]) for x in device_evts}))
    logging.debug("The number of context events from processed file: {}".format(
            {x: len(ctx_evts[x]) for x in ctx_evts}))
    
    tag_device(ctx_evts, device_evts)

test_umass()


In [87]:
import plotly.graph_objects as go

print(final_space.shape)
on_rate = np.zeros(space_shape)
total_hit = np.zeros(space_shape)

count = 0
for i in range(space_shape[0]):
    for j in range(space_shape[1]):
        total_hit[i,j] = final_space[i,j][0] +  final_space[i,j][1]
        if final_space[i,j][0] +  final_space[i,j][1] == 0:
            count+=1
            continue
        on_rate[i,j] = final_space[i,j][0] / (final_space[i,j][0] +  final_space[i,j][1])  
print(count)
print(space_shape[0]*space_shape[1])
fig = go.Figure(data=go.Heatmap(z=on_rate))
# fig = go.Figure(data=go.Heatmap(z=total_hit))
fig.show()


(10, 72, 2)
153
720


In [84]:
from sklearn.neighbors import KDTree, BallTree

def find_set(uf, a):
    if uf[a] != a:
        uf[a] = find_set(uf, uf[a])
    return uf[a]

def union_set(uf, a, b):
    if find_set(uf, a) != find_set(uf, b):
        uf[find_set(uf, a)] = uf[b]

print(len(unit_of_int))
tree = KDTree(unit_of_int, leaf_size=5)   
clss = [x for x in range(len(unit_of_int))]  

dist, ind = tree.query(unit_of_int, k=4, sort_results=True)  
for idx, unit in enumerate(unit_of_int):
    prob_unit = final_space[unit] / sum(final_space[unit])
    d_i = dist[idx]
    ind_i = ind[idx]
    for i in range(4):
        if d_i[i] > 1:
            continue
        cell = unit_of_int[ind_i[i]]
        prob_ngbr = final_space[cell] / sum(final_space[cell])
        if np.linalg.norm(prob_unit - prob_ngbr) < 0.05:
            union_set(clss, idx, ind_i[i])
        elif 
# print(clss)
core = []
u = np.unique([find_set(clss, i) for i in range(len(clss))])
print(u)
a = np.full(space_shape, u[0]-1)
for i, unit in enumerate(unit_of_int):
    a[unit] = find_set(clss, i)

fig = go.Figure(data=go.Heatmap(z=a))
fig.show()

528
[  2  21  54  75 154 159 170 208 231 276 304 306 317 335 349 377 385 386
 387 397 399 400 416 417 422 423 424 436 437 438 446 452 454 456 458 460
 462 469 471 473 474 477 479 488 489 490 491 504 505 506 507 508 521 527]


In [85]:
# LDA not working
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import sklearn

clss = LDA()
data_x = sklearn.preprocessing.MinMaxScaler().fit_transform(data_x)

x = clss.fit_transform(data_x, data_y)
print(data_x[10])
print(clss.transform(data_x[10:11]))
print(clss.coef_)

[0.13898541 0.58823529]
[[0.49720339]]
[[-0.45413627  0.88417517]]
