In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Creating 2 ways of feature representations



***NOTE***

-The timeslice length used here is 60 sec, which is proved to have good performance by paper

-$\vec{x_t^i}$ is a binary vector

-according to the paper, $y_t = j$ means the state(i.e. act) $j$ takes most of the 60 secs, even if there are other states(i.e. acts) happening during this 60 secs

-use different house data by setting house to A or B or C

In [2]:
timeslice = 60

In [3]:
#####
#select a house, 'A', 'B', or 'C'
#####
house = 'A'
act_df = pd.read_csv("house{}_act.csv".format(house))
sensor_df = pd.read_csv("house{}_sensor.csv".format(house))

In [4]:
# #check duration of activities or sensors
# plt.subplot(1,2,1)
# plt.hist(act_df.diff_sec, bins=5);
# plt.subplot(1,2,2)
# plt.hist(sensor_df.diff_sec, bins=5);

In [5]:
start = min(min(act_df.start_sec), min(sensor_df.start_sec))
end = max(max(act_df.end_sec), max(sensor_df.end_sec))
if (end-start)%timeslice != 0:
    end = (1+(end-start)/timeslice)*timeslice + start
duration = end-start

In [6]:
num_sensor = len(list(set(sensor_df.label)))
num_act = len(list(set(act_df.label)))
num_t = duration/timeslice
print "# sensors: ", num_sensor
print "# states/acts: ", num_act
print "# timeframes: ", num_t

# sensors:  14
# states/acts:  16
# timeframes:  40006


### Activity array

$y_t$ : activity at timeslice t 

0 means no activity

non-zero means labelled activity

In [7]:
#check counts
temp = list(set(zip(act_df.label, act_df.meaning)))
for y in temp:
    print "label:{}, meaning:{}, count:{}".format(y[0], y[1], sum(act_df.label==y[0]))

label:16.0, meaning:get snack, count:12
label:22.0, meaning:put clothes in washingmachine, count:3
label:6.0, meaning:brush teeth, count:16
label:17.0, meaning:get drink, count:20
label:23.0, meaning:unload washingmachine, count:4
label:4.0, meaning:use toilet, count:114
label:5.0, meaning:take shower, count:23
label:15.0, meaning:prepare Dinner, count:9
label:10.0, meaning:go to bed, count:24
label:19.0, meaning:unload dishwasher, count:4
label:20.0, meaning:store groceries, count:1
label:1.0, meaning:leave house, count:33
label:25.0, meaning:receive guest, count:3
label:18.0, meaning:put items in dishwasher, count:5
label:13.0, meaning:prepare Breakfast, count:20
label:3.0, meaning:Eating, count:1


In [8]:
Y = np.zeros(num_t)

In [9]:
for j in range(num_t):
    c = j*timeslice + start
    c_ = c + timeslice
    mask = ((act_df.start_sec <= c_) & (act_df.end_sec >= c_)) | ((act_df.start_sec <= c) & (act_df.end_sec >= c))
    temp_df = act_df[mask]
    max_cover = 0
    max_label = 0 #default, unknown act
    for i in range(len(temp_df)):
        s = np.array(temp_df.start_sec)[i]
        t = np.array(temp_df.end_sec)[i]
        l = np.array(temp_df.label)[i]
        
        if s <= c and t >= c_:
            max_cover = timeslice
            max_label = l
            break
        elif s > c and t < c_ and (t-s) > max_cover:
            max_cover = t-s
            max_label = l
        elif s<=c and t>=c and (t-c) > max_cover:
            max_cover = t-c
            max_label = l
        elif s<=c_ and t>=c_ and (c_-s) > max_cover:
            max_cover = c_-s
            max_label = l
    Y[j] = max_label
            

In [None]:
# #For house A, mannualy add eating
# temp_df = act_df[act_df.label==3]
# s = list(temp_df.start_sec)[0]
# diff = list(temp_df.diff_sec)[0]
# i = (s-start)/timeslice
# while diff > 0:
#     Y[i] = 3
#     i += 1
#     diff -= timeslice

In [None]:
# for y in list(set(act_df.label)):
#     if np.sum(Y==y)==0:
#         print y

### Representation 1: raw data

The raw sensor representation uses the sensor data directly as it was received from the sensors. It gives a 1 when the sensor is firing and a 0 otherwise.

**X_raw** is a num_t-by-num_sensor matrix, where rows are times and columns are features(or sensors). The maps of sensors to index can be found by the dictionary. 

In [None]:
#map from sensor to idx and idx to sensor
i2s = dict(zip(range(num_sensor), list(set(sensor_df.label))))
s2i = dict(zip(list(set(sensor_df.label)), range(num_sensor)))
s2i

In [None]:
#each row = (x1, x2, .. xn), n=num_sensor
X_raw = np.zeros([num_t, num_sensor])
for i in range(len(sensor_df)):
    elapsed = sensor_df.start_sec[i] - start
    row = elapsed/timeslice
    label = sensor_df.label[i]
    diff = sensor_df.diff_sec[i]
    while diff > 0:
        X_raw[row, s2i[label]] = 1
        row = row + 1
        diff = diff - timeslice

### Representation 2: changepoint

The change point representation indicates when a sensor event takes place. That is, it indicates when a sensor changes value. More formally, it gives a 1 when a sensor changes state (i.e. goes from zero to one or vice versa) and a 0 otherwise.

**X_change** is a num_t-by-num_sensor matrix, where rows are times and columns are features(or sensors). The maps of sensors to index can be found by the dictionary. 

In [None]:
X_change = np.zeros([num_t, num_sensor])
temp1 = np.zeros([num_t, num_sensor])
temp2 = np.zeros([num_t, num_sensor])

for i in range(len(sensor_df)):
    #for start point (from 0 to 1)
    row = (sensor_df.start_sec[i] - start)/timeslice
    label = sensor_df.label[i]
    temp1[row, s2i[label]] += 1 
    
    #for end point (from 1 to 0)
    row2 = (sensor_df.end_sec[i] - start)/timeslice
    #if ended in another time frame, we check the endpoint 
    if row2 != row:
        temp2[row2, s2i[label]] += 1

for i in range(num_t):
    for j in range(num_sensor):
        diff = abs(temp1[i,j]-temp2[i,j])
        if diff%2 == 1:
            X_change[i,j] = 1

In [None]:
a,b=0,0
for i in range(num_t):
    for j in range(num_sensor):
        if X_raw[i,j] == 1:
            a+=1
        if X_change[i,j] == 1:
            b+=1
print "ones in X_raw: ", a
print "ones in X_change: ", b

## Save files (only need to be done once)

In [None]:
# np.save("X_raw_house{}.npy".format(house), X_raw)
# np.save("X_change_house{}.npy".format(house), X_change)
# np.save("Y_house{}.npy".format(house), Y)