In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
houses = ['A', 'B', 'C']
xtype_dict = {'r':'raw', 'c':'change','l':'last'}

### Naive Bayes Function

In [29]:
def read_data(house, xtype):
    assert house in houses
    assert xtype in xtype_dict
    
    act_df = pd.read_csv("house{}_act.csv".format(house))
    sensor_df = pd.read_csv("house{}_sensor.csv".format(house))
    X = np.load("X_{}_house{}.npy".format(xtype_dict[xtype], house))
    Y = np.load("Y_house{}.npy".format(house))
    miu = np.load("mu{}_{}.npy".format(house, xtype))
    prior = np.load('Prior_{}.npy'.format(house))
    return act_df, sensor_df, X, Y, miu, prior

In [11]:
def max_posterior(mu, X, prior):
    num_time = len(X)
    num_act = len(mu)
    prob = np.zeros((num_time, num_act))
    for i in range(num_time):
        sensor = X[i]
        for j in range(num_act):
            act = mu[j]
            logp = 0
            for k,s in enumerate(sensor):
                if s == 1.:
                    logp += np.log(act[k])
                else:
                    logp += np.log(1-act[k])
            prob[i,j] = logp + np.log(prior[i , j])
    return prob

In [12]:
def NB(matrix):
    est = np.zeros(len(matrix))
    for i,row in enumerate(matrix):
        est[i] = np.argmax(row)
    return est

### Evaluation Metrics Function
$$\text{Precision} = \frac{1}{N}\sum^N_i \frac{TP_i}{TI_i} $$
$$\text{Recall} = \frac{1}{N}\sum^N_i\frac{TP_i}{TT_i} $$
$$\text{F-Measure} = \frac{2 \cdot precision \cdot recal}{precision+recall} $$
$$\text{Accuracy} = \frac{\sum^N_i TP_i}{Total}$$

In [13]:
def precision(pred_label, Y):
    all_label = list(set(Y))
    N = len(all_label)
    res = 0
    for y in list(set(Y)):
        TP = np.sum(pred_label[Y==y]==y)
        TI = np.sum(pred_label==y)
        if TI != 0:
            res += (float(TP)/TI)
    return float(res)/N

def recall(pred_label, Y):
    all_label = list(set(Y))
    N = len(all_label)
    res = 0
    for y in list(set(Y)):
        TP = np.sum(pred_label[Y==y]==y)
        TT = np.sum(Y==y)
        if TT != 0:
            res += float(TP)/TT
    return float(res)/N

def f_score(pred_label, Y):
    p = precision(pred_label, Y)
    r = recall(pred_label, Y)
    return 2*p*r/(p+r)

def accuracy(pred_label, Y):
    res = 0
    all_label = list(set(Y))
    for y in list(set(Y)):
        TP = np.sum(pred_label[Y==y]==y)
        res += TP
    return float(res)/len(Y)

### Result Function

In [41]:
def result(miu, X, prior, act_df):
    likes = max_posterior(miu,X,prior)
    result = NB(likes) #note: result is index, not label yet
    #convert index to label
    all_act = list(set(act_df.label)) + [0] #all the act labels
    i2a = dict(zip(range(len(all_act)), all_act)) #ind to act dict
    res_label = np.array([i2a[e] for e in result])
#     for i,act in enumerate(all_act):
#         print "{},act label: {}, Y count:{}, estimate count:{}".format(i,act, sum(Y==act), sum(res_label==act))
    return res_label

In [42]:
def evaluation(house,res_label, Y):
    print 'Precision of house {} is {}'.format(house,precision(res_label, Y))
    print 'recall of house {} is {}'.format(house,recall(res_label, Y))
    print 'F score of house {} is {}'.format(house,f_score(res_label, Y))
    print 'Accuracy of house {} is {}'.format(house,accuracy(res_label, Y))

# Naive Bayes Raw Data Representation

In [45]:
t = 'r'
for house in houses:
    act_df,sensor_df,X,Y,miu,prior = read_data(house, t)
    # result
    res_label = result(miu, X, prior, act_df)
    # Evaluation
    print 'House{}:'.format(house)
    evaluation(house,res_label, Y)

HouseA:
Precision of house A is 0.827909600324
recall of house A is 0.576325527602
F score of house A is 0.679580545629
Accuracy of house A is 0.775583662451
HouseB:
Precision of house B is 0.806193713717
recall of house B is 0.71489999497
F score of house B is 0.757807199635
Accuracy of house B is 0.874652956298
HouseC:
Precision of house C is 0.621109582685
recall of house C is 0.469259255971
F score of house C is 0.534610693766
Accuracy of house C is 0.767253095741


# Naive Bayes Change Data Representation

In [46]:
t = 'c'
for house in houses:
    act_df,sensor_df,X,Y,miu,prior = read_data(house, t)
    # result
    res_label = result(miu, X, prior, act_df)
    # Evaluation
    print 'House{}:'.format(house)
    evaluation(house,res_label, Y)

HouseA:
Precision of house A is 0.770766501191
recall of house A is 0.575909143045
F score of house A is 0.659240370298
Accuracy of house A is 0.710318452232
HouseB:
Precision of house B is 0.775019136988
recall of house B is 0.425651213242
F score of house B is 0.549506091962
Accuracy of house B is 0.542519280206
HouseC:
Precision of house C is 0.716180378285
recall of house C is 0.432146479634
F score of house C is 0.539036123947
Accuracy of house C is 0.735238598611


# Naive Bayes Last Fire Data Representation

In [47]:
t = 'l'
for house in houses:
    act_df,sensor_df,X,Y,miu,prior = read_data(house, t)
    # result
    res_label = result(miu, X, prior, act_df)
    # Evaluation
    print 'House{}:'.format(house)
    evaluation(house,res_label, Y)

HouseA:
Precision of house A is 0.662168290111
recall of house A is 0.620084068935
F score of house A is 0.640435565987
Accuracy of house A is 0.853147027946
HouseB:
Precision of house B is 0.667133479473
recall of house B is 0.566522040912
F score of house B is 0.612725050238
Accuracy of house B is 0.754550128535
HouseC:
Precision of house C is 0.618966706095
recall of house C is 0.610360858939
F score of house C is 0.614633660112
Accuracy of house C is 0.800437934159
