In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Creating 3 ways of feature representations



***NOTE***

-The timeslice length used here is 60 sec, which is proved to have good performance by paper

-$\vec{x_t^i}$ is a binary vector

-according to the paper, $y_t = j$ means the state(i.e. act) $j$ takes most of the 60 secs, even if there are other states(i.e. acts) happening during this 60 secs

-use different house data by setting house to A or B or C

In [2]:
timeslice = 60

In [44]:
#####
#select a house, 'A', 'B', or 'C'
#####
house = 'B'
act_df = pd.read_csv("house{}_act.csv".format(house))
sensor_df = pd.read_csv("house{}_sensor.csv".format(house))

In [45]:
# #check duration of activities or sensors
# plt.subplot(1,2,1)
# plt.hist(act_df.diff_sec, bins=5);
# plt.subplot(1,2,2)
# plt.hist(sensor_df.diff_sec, bins=5);

In [46]:
start = min(min(act_df.start_sec), min(sensor_df.start_sec))
end = max(max(act_df.end_sec), max(sensor_df.end_sec))
if (end-start)%timeslice != 0:
    end = (1+(end-start)/timeslice)*timeslice + start
duration = end-start

In [47]:
num_sensor = len(list(set(sensor_df.label)))
num_act = len(list(set(act_df.label)))
num_t = duration/timeslice
print "# sensors: ", num_sensor
print "# states/acts: ", num_act
print "# timeframes: ", num_t

# sensors:  22
# states/acts:  24
# timeframes:  38900


### Activity array

$y_t$ : activity at timeslice t 

0 means no activity

non-zero means labelled activity

In [48]:
#check counts
temp = list(set(zip(act_df.label, act_df.meaning)))
for y in temp:
    print "label:{}, meaning:{}, count:{}".format(y[0], y[1], sum(act_df.label==y[0]))

label:42.0, meaning:Gwenn searches keys, count:1
label:17.0, meaning:Get a drink, count:8
label:5.0, meaning:Take shower, count:11
label:6.0, meaning:Brush teeth, count:13
label:36.0, meaning:On phone, count:1
label:13.0, meaning:Prepare brunch, count:9
label:38.0, meaning:Wash toaster, count:1
label:32.0, meaning:Eat brunch, count:10
label:10.0, meaning:Go to bed, count:14
label:34.0, meaning:Unpacking, count:1
label:29.0, meaning:Answering phone, count:2
label:11.0, meaning:Get dressed, count:14
label:33.0, meaning:Setting up sensors, count:1
label:37.0, meaning:Fasten kitchen camera, count:5
label:15.0, meaning:Prepare dinner, count:6
label:1.0, meaning:Leaving the house, count:24
label:24.0, meaning:Wash dishes, count:6
label:9.0, meaning:Shaving, count:1
label:44.0, meaning:Drop dish (No dishwash), count:3
label:4.0, meaning:Use toilet, count:27
label:43.0, meaning:Prepare for leaving, count:9
label:40.0, meaning:Play piano, count:25
label:35.0, meaning:Install sensor, count:2
lab

In [49]:
Y = np.zeros(num_t)

In [50]:
for j in range(num_t):
    c = j*timeslice + start
    c_ = c + timeslice
    mask = ((act_df.start_sec <= c_) & (act_df.end_sec >= c_)) | ((act_df.start_sec <= c) & (act_df.end_sec >= c))
    temp_df = act_df[mask]
    max_cover = 0
    max_label = 0 #default, unknown act
    for i in range(len(temp_df)):
        s = np.array(temp_df.start_sec)[i]
        t = np.array(temp_df.end_sec)[i]
        l = np.array(temp_df.label)[i]
        
        if s <= c and t >= c_:
            max_cover = timeslice
            max_label = l
            break
        elif s > c and t < c_ and (t-s) > max_cover:
            max_cover = t-s
            max_label = l
        elif s<=c and t>=c and (t-c) > max_cover:
            max_cover = t-c
            max_label = l
        elif s<=c_ and t>=c_ and (c_-s) > max_cover:
            max_cover = c_-s
            max_label = l
    Y[j] = max_label
            

In [51]:
# #For house A, mannualy add eating
# temp_df = act_df[act_df.label==3]
# s = list(temp_df.start_sec)[0]
# diff = list(temp_df.diff_sec)[0]
# i = (s-start)/timeslice
# while diff > 0:
#     Y[i] = 3
#     i += 1
#     diff -= timeslice

In [52]:
# for y in list(set(act_df.label)):
#     if np.sum(Y==y)==0:
#         print y

### Representation 1: raw data

The raw sensor representation uses the sensor data directly as it was received from the sensors. It gives a 1 when the sensor is firing and a 0 otherwise.

**X_raw** is a num_t-by-num_sensor matrix, where rows are times and columns are features(or sensors). The maps of sensors to index can be found by the dictionary. 

In [53]:
#map from sensor to idx and idx to sensor
i2s = dict(zip(range(num_sensor), list(set(sensor_df.label))))
s2i = dict(zip(list(set(sensor_df.label)), range(num_sensor)))
s2i

{1.0: 0,
 3.0: 1,
 5.0: 2,
 6.0: 3,
 7.0: 4,
 9.0: 5,
 10.0: 6,
 12.0: 7,
 13.0: 8,
 14.0: 9,
 15.0: 10,
 16.0: 11,
 18.0: 12,
 19.0: 13,
 20.0: 14,
 21.0: 15,
 22.0: 16,
 24.0: 17,
 25.0: 18,
 26.0: 19,
 27.0: 20,
 28.0: 21}

In [54]:
#each row = (x1, x2, .. xn), n=num_sensor
X_raw = np.zeros([num_t, num_sensor])
for i in range(len(sensor_df)):
    elapsed = sensor_df.start_sec[i] - start
    row = elapsed/timeslice
    label = sensor_df.label[i]
    diff = sensor_df.diff_sec[i]
    while diff > 0:
        X_raw[row, s2i[label]] = 1
        row = row + 1
        diff = diff - timeslice

### Representation 2: changepoint

The change point representation indicates when a sensor event takes place. That is, it indicates when a sensor changes value. More formally, it gives a 1 when a sensor changes state (i.e. goes from zero to one or vice versa) and a 0 otherwise.

**X_change** is a num_t-by-num_sensor matrix, where rows are times and columns are features(or sensors). The maps of sensors to index can be found by the dictionary. 

In [55]:
X_change = np.zeros([num_t, num_sensor])
X_change[0] = X_raw[0]
for i in range(1, num_t):
    curr = X_raw[i]
    prev = X_raw[i-1]
    logic = curr==prev
    X_change[i] = [1-int(x) for x in logic]

In [56]:
print "ones in X_raw: ", sum(sum(X_raw))
print "ones in X_change: ", sum(sum(X_change))

ones in X_raw:  219822.0
ones in X_change:  4381.0


### Representation3: Last Fired Data
The last-fired sensor representation indicates which sensor fired last. The sensor that changed state last continues to give 1 and changes to 0 when another sensor changes state.
**X_last** is a num_t-by-num_sensor matrix, where rows are times and columns are features(or sensors). The maps of sensors to index can be found by the dictionary. 

In [57]:
X_last = np.zeros([num_t, num_sensor])
X_last[0] = X_change[0]
#sensor index
s_ind = list(X_change[0]).index(1.)
for i in range(1, num_t):
    if 1 in X_change[i]: s_ind = list(X_change[i]).index(1.)
    X_last[i][s_ind] = 1.   

## Save files (only need to be done once)

In [58]:
# np.save("X_raw_house{}.npy".format(house), X_raw)
# np.save("X_change_house{}.npy".format(house), X_change)
np.save("X_last_house{}.npy".format(house), X_last)
# np.save("Y_house{}.npy".format(house), Y)