In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
sns.set_style("white")

import time
import timeit

import scipy.stats 
import pandas as pd
import pymc as pm

import re
import numpy as np

import string
import itertools

import time

## Exactly the same code from NB

* define house numbers and x types
* read data
* define functions to evaluate prediction

In [2]:
# House Number and X Types
houses = ['A', 'B', 'C']
xtype_dict = {'r':'raw', 'c':'change','l':'last'}

def read_data(house, xtype):
    assert house in houses
    assert xtype in xtype_dict
    
    act_df = pd.read_csv("data/house{}_act.csv".format(house))
    sensor_df = pd.read_csv("data/house{}_sensor.csv".format(house))
    X = np.load("data/X_{}_house{}.npy".format(xtype_dict[xtype], house))
    Y = np.load("data/Y_house{}.npy".format(house))
    miu = np.load("data/mu{}_{}.npy".format(house, xtype))
    prior = np.load('data/Prior_{}.npy'.format(house))
    return act_df, sensor_df, X, Y, miu, prior

# Prediction Evaluation Functions
def precision(pred_label, Y):
    all_label = list(set(Y))
    N = len(all_label)
    res = 0
    for y in list(set(Y)):
        TP = np.sum(pred_label[Y==y]==y)
        TI = np.sum(pred_label==y)
        if TI != 0:
            res += (float(TP)/TI)
    return float(res)/N
def recall(pred_label, Y):
    all_label = list(set(Y))
    N = len(all_label)
    res = 0
    for y in list(set(Y)):
        TP = np.sum(pred_label[Y==y]==y)
        TT = np.sum(Y==y)
        if TT != 0:
            res += float(TP)/TT
    return float(res)/N
def f_score(pred_label, Y):
    p = precision(pred_label, Y)
    r = recall(pred_label, Y)
    return 2*p*r/(p+r)
def accuracy(pred_label, Y):
    res = 0
    all_label = list(set(Y))
    for y in list(set(Y)):
        TP = np.sum(pred_label[Y==y]==y)
        res += TP
    return float(res)/len(Y)

def evaluation(house,res_label, Y):
    print 'Precision of house {} is {}'.format(house,precision(res_label, Y))
    print 'recall of house {} is {}'.format(house,recall(res_label, Y))
    print 'F score of house {} is {}'.format(house,f_score(res_label, Y))
    print 'Accuracy of house {} is {}'.format(house,accuracy(res_label, Y))

## Experiment with HMM

For debugging purposes. Function written as the next step.

In [20]:
# load data
act_df, sensor_df, X, Y, miu, prior = read_data("A", "c")

In [21]:
print X.shape
print Y.shape
print miu.shape

(40006, 14)
(40006,)
(17, 14)


In [22]:
n_activity = 17

In [23]:
order = 2
A = miu.shape[0]
S = miu.shape[1]
K = n_activity ** order

activity_single = [int(x) for x in list(set(act_df.label))] + [0]
activity_higher = [prod for prod in itertools.product(*np.tile(activity_single, (order,1)))]

index_to_single = dict(zip(range(A), activity_single)) #ind to activity_single dict
single_to_index = dict(zip(activity_single, range(A))) #activity_single to ind dict
index_to_higher = dict(zip(range(K), activity_higher)) #ind to activity_higher dict
higher_to_index = dict(zip(activity_higher, range(K))) #activity_higher to ind dict

In [24]:
X_train = X
Y_train = Y
X_test = X
Y_test = Y
N_train = len(X)
N_test = len(X)

In [25]:
# construct transition and emission matrix
transition_count = np.zeros((K, K)) + 0.00001
emission_count = np.zeros((K, S, 2)) + 0.00001

for i in range(order-1, N_train-1):
    yi = higher_to_index[tuple(Y_train[i-order+1:i+1])]
    ynext = higher_to_index[tuple(Y_train[i-order+2:i+2])]
    transition_count[yi, ynext] += 1
    
    xi = X[i]
    
    for j in range(S): 
        emission_count[yi, j, int(xi[j])] += 1

higher_last = higher_to_index[tuple(Y_train[-order:])]
for j in range(S): 
        emission_count[higher_last, j, int(X[-1][j])] += 1

In [10]:
# transition_count = np.zeros((K,K)) + 0.00001
# emission_count2 = np.zeros((K, S)) + 0.00001

# for i in range(order-1, N_train-1):
#     yi = higher_to_index[tuple(Y_train[i-order+1:i+1])]
#     ynext = higher_to_index[tuple(Y_train[i-order+2:i+2])]
#     transition_count[yi, ynext] += 1
#     
#     
#     emission_count2[yi, :] += X_train[i]
# 
# higher_last = higher_to_index[tuple(Y_train[-order:])]
# emission_count2[higher_last, :] += X_train[-1]

In [26]:
transition = transition_count/np.sum(transition_count, axis=1).reshape(K,1)
log_transition = np.log(np.nan_to_num(transition))

emission = emission_count/np.sum(emission_count, axis=2).reshape(K,S,1)
log_emission = np.log(np.nan_to_num(emission))
log_emission2 = log_emission[:,:,0].reshape(K,S)

In [27]:
# initial probability
initial = np.zeros(K)
initial[higher_to_index[tuple(Y_test[:order])]] = 1
log_initial = np.log(initial)

In [28]:
# initialize T1 and T2
T1 = np.zeros((K,N_test-order+1))
T2 = np.zeros((K,N_test-order+1))

calc_emission = lambda p, x:  np.power(p,(1-x))*np.power((np.log(1-np.exp(p))),x)
T1[:,0] = log_initial + np.sum(calc_emission(log_emission2, X_train[order-1]), axis=1)

In [29]:
%%time
# iterate through time to update T1 and T2
for i in range(1, N_test-order+1):
    obj = T1[:, i-1].reshape(K,1) + log_transition + np.sum(calc_emission(log_emission2, X_train[i+order-1]), axis=1)
    T1[:,i] = np.max(obj, axis=0)
    T2[:,i] = np.argmax(obj, axis=0)

CPU times: user 23.5 s, sys: 440 ms, total: 23.9 s
Wall time: 24.6 s


In [30]:
%%time
# back-fill the MLE state
Z = np.zeros(N_test-order+1)
Z[-1] = np.argmax(T1[:,-1])

for i in range(-1, -(N_test-order), -1):
    Z[i-1] = T2[int(Z[i]), i]

Y_HMM = np.array([index_to_higher[i][0] for i in Z])
Y_HMM[0] = Y_test[:order][0]

CPU times: user 38.2 ms, sys: 5.95 ms, total: 44.2 ms
Wall time: 40 ms


In [31]:
count = 0
for i in range(N_train-1):
    if Y_test[i] == Y_HMM[i]:
        count += 1
print count/float(N_train)

0.850747387892


In [173]:
transition

array([[  9.98326818e-01,   5.07022254e-10,   4.56320536e-04,
          5.07022254e-10,   5.07027324e-05,   5.07022254e-10,
          5.07022254e-10,   1.01404958e-04,   2.02809409e-04,
          5.07022254e-10,   5.07022254e-10,   5.07022254e-10,
          5.07022254e-10,   5.07022254e-10,   5.07022254e-10,
          5.07022254e-10,   8.61938339e-04],
       [  5.88235294e-02,   5.88235294e-02,   5.88235294e-02,
          5.88235294e-02,   5.88235294e-02,   5.88235294e-02,
          5.88235294e-02,   5.88235294e-02,   5.88235294e-02,
          5.88235294e-02,   5.88235294e-02,   5.88235294e-02,
          5.88235294e-02,   5.88235294e-02,   5.88235294e-02,
          5.88235294e-02,   5.88235294e-02],
       [  1.36363986e-02,   4.54545103e-08,   6.36363190e-01,
          2.72727517e-02,   2.27273006e-02,   2.27273006e-02,
          2.27273006e-02,   4.54549649e-03,   4.54545103e-08,
          4.54549649e-03,   4.54545103e-08,   4.54549649e-03,
          4.54545103e-08,   4.54549649e-03

In [174]:
index_to_higher

{0: (1,),
 1: (3,),
 2: (4,),
 3: (5,),
 4: (6,),
 5: (10,),
 6: (13,),
 7: (15,),
 8: (16,),
 9: (17,),
 10: (18,),
 11: (19,),
 12: (20,),
 13: (22,),
 14: (23,),
 15: (25,),
 16: (0,)}

In [175]:
act_df

Unnamed: 0,start_time,end_time,start_sec,end_sec,diff_sec,label,meaning
0,2008-02-24 19:19:32,2008-02-24 19:21:23,1203898772,1203898883,111,6,brush teeth
1,2008-02-24 19:22:46,2008-02-25 04:34:12,1203898966,1203932052,33086,10,go to bed
2,2008-02-25 04:37:16,2008-02-25 04:38:02,1203932236,1203932282,46,4,use toilet
3,2008-02-25 04:49:23,2008-02-25 04:53:28,1203932963,1203933208,245,13,prepare Breakfast
4,2008-02-25 04:57:21,2008-02-25 05:01:30,1203933441,1203933690,249,19,unload dishwasher
5,2008-02-25 05:02:27,2008-02-25 05:12:41,1203933747,1203934361,614,5,take shower
6,2008-02-25 05:19:05,2008-02-25 11:55:37,1203934745,1203958537,23792,1,leave house
7,2008-02-25 11:58:44,2008-02-25 11:59:55,1203958724,1203958795,71,20,store groceries
8,2008-02-25 12:00:30,2008-02-25 12:01:33,1203958830,1203958893,63,4,use toilet
9,2008-02-25 12:19:23,2008-02-25 16:55:26,1203959963,1203976526,16563,25,receive guest


In [178]:
print T1[0]

[            -inf  -2.52437822e+01  -2.77560075e+01 ...,  -1.48251130e+05
  -1.48251340e+05  -1.48252159e+05]


## Nice and Clean function

In [None]:
# define function to fit HMM model
def HMM(X, Y, miu, order):
    