In [None]:
"""
import packages
"""
import os, glob
import pandas as pd
import numpy as np
import math, random
from datetime import datetime
import pickle
from matplotlib import pyplot as plt
import argparse
import path, sys, re, time
from collections import Counter
from scipy.signal import find_peaks

In [None]:
"""
import custom modules
"""
from module_.readText import read_hh
from module_.featureExtraction import feature_extraction
from module_.changePointDetection import change_point_detection
from module_.info.hh import baseline_activities
from module_.info.config import feature_name, exclude_list

# hh101 (CASAS, 1-resident, serialized)

In [None]:
"""
0. load casas dataset: hh101
"""
# with open('dataset/hh/hh101/ann.txt','rb') as f: 
#     rawdata=f.readlines()
# events=read_hh(rawdata)
# events=np.array(events)
events=np.load("./preprocessed/test/ann.npy")

# 1. Replication

In [None]:
"""
1. remove all except M and D
"""
data_name='hh101'
metric='SEP'

# EXCLUDE "IDLE" and INCLUDE ONLY "MOTION and DOOR"
events_md=np.array([event for event in events if event[3]!="" and event[0][0] in ['M', 'D']])

# MAPPING TO PRE-DEFINED ACTIVITY
for i in range(events_md.shape[0]):
    events_md[i,3]=baseline_activities[events_md[i,3]]

# TRANSITION POINTS
trs_md = np.array([i for i in range(len(events_md)) if events_md[i][3]!=events_md[max(i-1,0)][3]])

# Sensor List
sensor_list = sorted(set(events_md[:,0]))

print("""
    The number of events: {},
    The number of transitions: {},
    The number of sensors: {}

""".format(len(events_md), len(trs_md), len(sensor_list)))

raw_features = np.array(feature_extraction(events_md, data_name, sensor_list))

choose_features = []
for i in range(raw_features.shape[1]):
    if i not in exclude_list['A']:
        choose_features.append(raw_features[:,i].reshape(-1,1))
choose_features = np.concatenate(choose_features, axis=1)

print(raw_features.shape, choose_features.shape)

scores = np.array(change_point_detection(choose_features, data_name=data_name, metric=metric))

np.save("./replication/hh101/scores.npy", scores)
np.save("./replication/hh101/transitions.npy", trs_md)

In [None]:
scores = np.load("./replication/hh101/scores.npy")
transitions = np.load("./replication/hh101/transitions.npy")

threshold = 0.1
interval = 10
# Reject scores lower than threshold
# scores[scores<threshold] = 0

# Choose peak scores (one score in the range: 2*interval)
finalists=[]
prevtimestamp = 0
for i in range(len(scores)):
    # if scores[i]>threshold and float(events_md[i, 2])-prevtimestamp>2*interval:
    #     finalists.append(i)
    #     prevtimestamp = float(events_md[i, 2])
    if scores[i]>threshold:
        finalists.append(i)

groundtruth = np.array([float(events_md[i, 2]) for i in transitions])

# Evaluation
truePositive = trueNegative = falsePositive = falseNegative = 0
for i in range(len(events_md)):
    if i in finalists: # Positive
        if i in trs_md: # True
            truePositive+=1
        else:
            timeGap = abs(groundtruth - float(events_md[i, 2]))
            if sum(timeGap<interval)!=0:
                truePositive+=1
            else:
                falsePositive+=1
    else: # Negative
        if i in trs_md: # False
            falseNegative+=1
        else:
            trueNegative+=1

print(truePositive/(truePositive+falseNegative))
print(falsePositive/(falsePositive+trueNegative))



# 2. Analysis

In [None]:
episodes, trs, tags = [], [], []
previdx=0
for i in range(len(trs_md)-1):
    left=np.array(events_md[previdx:trs_md[i]])
    right=np.array(events_md[trs_md[i]:trs_md[i+1]])
    episode=np.concatenate((left, right))
    episodes.append(episode)
    trs.append(left.shape[0])
    pairname="{}-{}".format(left[0][3], right[0][3])
    tags.append(pairname)
    previdx=trs_md[i]

data_name = 'hh101'
metric = 'SEP'
dataset_folder = './features/{}'.format(data_name)

sensor_list = sorted(set(events_md[:,0]))
# print(len(sensor_list))

if not os.path.exists(dataset_folder):
    os.mkdir(dataset_folder)

# fig = plt.figure(constrained_layout=True, figsize=(15, 15))

for ei, eps in enumerate(episodes):
    transition_point = trs[ei]
    # sensor_list = sorted(set(eps[:,0]))
    features = feature_extraction(eps, data_name, sensor_list)
    features = np.array(features)

    

    # pair_folder = '{}/{}'.format(dataset_folder, tags[ei])
    # if not os.path.exists(pair_folder):
    #     os.mkdir(pair_folder)
    # idx_folder = '{}/{}'.format(pair_folder, ei)
    # if not os.path.exists(idx_folder):
    #     os.mkdir(idx_folder)

    folder_ = "./features/{}/{}/{}".format(data_name, tags[i], i)
    x_ = range(len(eps))
    # fig, ax = plt.subplots(12, 1, sharex=True, constrained_layout=True, figsize=(20, 15))

    scores=np.array(change_point_detection(features, data_name=data_name, metric=metric))
    scores[scores<0]=0
    plt.title(tags[i])
    plt.bar(range(len(scores)), scores); plt.axvline(x=transition_point, linestyle=':', color='r')
    
    break
    names = list(feature_name.values())

    numbasicfeatures = len(names)-2

    for i in range(numbasicfeatures):
        ax_ = fig.add_subplot(numbasicfeatures+1, 1, i+1)
        ax_.plot(x_, features[:,i], '.-')
        ax_.set_ylabel(names[i])
        ax_.axvline(transition_point, linestyle=':', color='r')
        ax_.set_ylim(0,2)
    ax_ = fig.add_subplot(numbasicfeatures+1, 1, numbasicfeatures+1)
    ax_.bar(x_, scores, color='g')
    ax_.set_ylabel(metric)
    ax_.axvline(transition_point, linestyle=':', color='r')
    ax_.set_ylim(0,1)
    fig.savefig("{}/feature_basic.png".format(idx_folder))
    fig.clf()

    # # fig, ax = plt.subplots(len(sensor_list), 1, sharex=True, constrained_layout=True, figsize=(15, 15))
    # for i in range(12, 12+len(sensor_list)):
    #     i_ = i - 12
    #     ax_ = fig.add_subplot(len(sensor_list), 2, 2*i_+1)
    #     ax_.plot(x_, features[:,i], '.-')
    #     ax_.set_ylabel(sensor_list[i_])
    #     ax_.axvline(transition_point, linestyle=':', color='r')
    #     ax_.set_ylim(-0.2,1.2)
    
    # # fig.savefig("{}/feature_count.png".format(idx_folder))
    # # fig.clf()

    # # fig, ax = plt.subplots(len(sensor_list), 1, sharex=True, constrained_layout=True, figsize=(15, 15))
    # for i in range(12+len(sensor_list), 12+2*len(sensor_list)):
    #     i_ = i-(12+len(sensor_list))
    #     ax_ = fig.add_subplot(len(sensor_list), 2, 2*(i_+1))
    #     ax_.plot(x_, features[:,i], '.-')
    #     ax_.set_ylabel(sensor_list[i_])
    #     ax_.axvline(transition_point, linestyle=':', color='r')
    #     ax_.set_ylim(-0.2,1.2)
    
    # fig.savefig("{}/feature_sensor.png".format(idx_folder))
    # fig.clf()

In [None]:
"""make combinations
    1. pick one group type
    2. pick an activity stream from the group
    3. pick another group type w/o type 1.
    4. pick an activity stream from the group
"""

data_name='hh101'
metric='SEP'

for i, eps in enumerate(episodes):

    if i%100==0:
        print("{}/{}".format(i, len(episodes)))

    sensor_list=sorted(set(eps[:,0]))
    features=feature_extraction(eps, data_name, sensor_list)
    folder_="./outputs/{}/{}/{}/{}".format(data_name, metric, tags[i], i)

    if not os.path.exists(folder_):
        os.mkdir(folder_)

    scores=change_point_detection(features, folder_, data_name=data_name, metric=metric, save=False)

###
    plt.title("{}-{}".format(tags[i], i))
    plt.ylabel('score')
    plt.xlabel('event')
    plt.ylim(0,2)
    plt.bar(range(len(scores)), scores)
    # plt.bar(peaks, scores[peaks], color='g')
    # plt.bar(negative, scores[negative], color='b')
    plt.axhline(y=0.1, linestyle=':', color='r', label='threshold')
    plt.axvline(x=trs[i], linestyle=':', color='g', label='transition')
    plt.legend()

    break
    plt.savefig("{}/graph.png".format(folder_))
    plt.clf()

###

In [None]:
"""
    hh101 Evaluation
    - load scores
"""

data_name='hh101'
metric='RuLSIF'

total_counts=np.zeros(4)
denom = numer = 0
for activity_folder in glob.glob("./outputs/{}/{}/*".format(data_name, metric)):
    # one type of pairs
    activity_pair=activity_folder.split("/")[-1]
    print(activity_pair)
    pair_counts=np.zeros(4) # TP, FP, TN, FN

    for episode_folder in glob.glob("{}/*".format(activity_folder)):
        eps_order=int(episode_folder.split("/")[-1])
        denom+=1
        eps, point=episodes[eps_order], trs[eps_order]
        scores=np.load("{}/scores.npy".format(episode_folder))

        # peaks, _ = find_peaks(scores)
        positives=[i for i in range(len(scores)) if scores[i]>0.1]
        numer+=len(positives)
        # positives=[i for i in range(len(scores)) if scores[i]>0.3]
        ttimestamp=float(eps[point][2])

        for i in range(len(scores)):
            if i in positives:
                if i==point:
                    pair_counts[0]+=1
                else:
                    timestamp_b=float(eps[i-1][2])
                    timestamp_a=float(eps[i][2])
                    if abs(ttimestamp-timestamp_b)<10 or abs(ttimestamp-timestamp_a)<10:
                        pair_counts[0]+=1
                    else:
                        pair_counts[1]+=1
            else:
                if i==point:
                    pair_counts[3]+=1
                else:
                    pair_counts[2]+=1
    if pair_counts[0]+pair_counts[3]==0 or pair_counts[1]+pair_counts[2]==0:
        continue
    TPR_=pair_counts[0]/(pair_counts[0]+pair_counts[3])
    FPR_=pair_counts[1]/(pair_counts[1]+pair_counts[2])
    print("Avg. TPR and FPR: ({}, {})".format(TPR_, FPR_))

    total_counts+=pair_counts

TPR=total_counts[0]/(total_counts[0]+total_counts[3])
FPR=total_counts[1]/(total_counts[1]+total_counts[2])
print("Total Avg. TPR and FPR: ({}, {})".format(TPR, FPR))

print(numer/denom)