In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [82]:
houses = ['A', 'B', 'C']
xtype_dict = {'r':'raw', 'c':'change'}

In [90]:
def read_data(house, xtype):
    assert house in houses
    assert xtype in xtype_dict
    
    act_df = pd.read_csv("house{}_act.csv".format(house))
    sensor_df = pd.read_csv("house{}_sensor.csv".format(house))
    X = np.load("X_{}_house{}.npy".format(xtype_dict[xtype], house))
    Y = np.load("Y_house{}.npy".format(house))
    return act_df, sensor_df, X, Y

In [91]:
#read in data
act_df,sensor_df,X,Y = read_data('A', 'r')

In [129]:
#build 4 dict, act to idx, idx to act, sensor to idx, idx to sensor
all_act = list(set(act_df.label)) + [0] #all the act labels
a2i = dict(zip(all_act, range(len(all_act))))
i2a = dict(zip(range(len(all_act)), all_act))

all_sensor = list(set(sensor_df.label)) #all the sensor labels
i2s = dict(zip(range(len(all_sensor)), all_sensor))
s2i = dict(zip(all_sensor, range(len(all_sensor))))

In [150]:
print "# sensors: ", len(all_sensor)
print "# states/acts: ", len(all_act)
print "# timeframes: ", len(X)

# sensors:  14
# states/acts:  17
# timeframes:  40006


## Naive Bayes

In [131]:
#each row represents sensors for a specific action
def calc_miu(X, Y, all_sensor=all_sensor, all_act=all_act):
    assert len(X)==len(Y)
    
    num_act = len(all_act)
    num_sensor = len(all_sensor)
    miu = np.zeros([num_act, num_sensor])
    for act in all_act:
        sensor = X[Y==act]
        for row in sensor:
            miu[a2i[act]] += row
    #normalize
    for i,row in enumerate(miu):
        total = np.sum(row)
        if total != 0:
            miu[i] = miu[i]/total
    return miu

In [132]:
def max_like(miu, X):
    num_time = len(X)
    num_act = len(miu)
    
    prob = np.zeros((num_time, num_act))
    for i in range(num_time):
        sensor = X[i]
        for j in range(num_act):
            act = miu[j]
            logp = 0
            for k,s in enumerate(sensor):
                if s == 1:
                    logp += np.log(act[k])
                else:
                    logp += np.log(1-act[k])
            prob[i,j] = logp
    return prob

In [133]:
def mle(matrix):
    est = np.zeros(len(matrix))
    for i,row in enumerate(matrix):
        est[i] = np.argmax(row)
    return est

In [134]:
miu = calc_miu(X,Y)
likes = max_like(miu,X)
result = mle(likes) #note: result is index, not label yet

In [143]:
#convert index to label
res_label = np.array([i2a[e] for e in result])

In [149]:
sum(res_label==0)

864

In [148]:
sum(Y==0)

7235