## Import Packages and Modules

In [1]:
import os, glob
import pandas as pd
import numpy as np
import math, random
from datetime import datetime
import pickle
from matplotlib import pyplot as plt
import argparse
import path, sys, re, time
from collections import Counter
from scipy.signal import find_peaks

from module_.readText import read_hh
from module_.featureExtraction import feature_extraction
from module_.changePointDetection import change_point_detection
from module_.info.hh import baseline_activities
from module_.info.config import feature_name, exclude_list

In [2]:
from module_.replication import replication


replication("SEP")

Load Raw Data: 322603 events
Use Motion and Door Sensors: 219511 events
Raw Transitions: 4457
Create Raw Activity List.
Construct Dataset based on Distribution: 3745 activities
Preprocessed events: 199820, 
    transitions: 3744,
    the number of sensors: 18
Feature shape: (199820, 48)
10000/199820 (accumulated) time: 97.37157773971558
20000/199820 (accumulated) time: 193.7675120830536
30000/199820 (accumulated) time: 290.3879518508911
40000/199820 (accumulated) time: 389.8780391216278
50000/199820 (accumulated) time: 485.8610622882843
60000/199820 (accumulated) time: 581.5460960865021
70000/199820 (accumulated) time: 680.2721569538116
80000/199820 (accumulated) time: 776.8767416477203
90000/199820 (accumulated) time: 873.1291608810425
100000/199820 (accumulated) time: 969.9090266227722
110000/199820 (accumulated) time: 1065.9255678653717
120000/199820 (accumulated) time: 1161.972050666809
130000/199820 (accumulated) time: 1257.91415309906
140000/199820 (accumulated) time: 1353.374213

In [3]:
ppevents = np.load("replication/hh101/ppevents.npy"); print(ppevents.shape)
pptransitions = np.load("replication/hh101/pptransitions.npy"); print(pptransitions.shape)
scores = np.load("replication/hh101/scores_SEP.npy"); print(scores.shape)

(199820, 4)
(3744,)
(199820,)


In [5]:
scores[:100]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [4]:
eventtimes = np.array([float(event[2]) for event in ppevents])
transitiontimes = np.array([float(ppevents[idx,2]) for idx in pptransitions])

threshold = 0.1
lambda_ = 10
TP = TN = FP = FN = 0
for i in range(len(ppevents)):
    if scores[i]>threshold: # POSITIVE
        if i in pptransitions: # TRUE
            TP+=1
        else:
            timediff = abs(transitiontimes-eventtimes[i])
            if sum(timediff<lambda_)!=0:
                TP+=1
            else:
                FP+=1
    else: # NEGATIVE
        if i not in pptransitions:
            TN+=1
        else:
            FN+=1

print(TP/(TP+FN))
print(FP/(FP+TN))

0.0
0.0


## Load Dataset: hh101

In [None]:
events=np.load("./preprocessed/test/ann.npy")

print("""Raw events: {}
First 5 Raw events:
{}""".format(events.shape, events[:5]))

## Data Preprocessing

In [None]:
# INCLUDE ONLY "MOTION and DOOR"
# events_md=np.array([event for event in events if event[3]!="" and event[0][0] in ['M', 'D']])
events_md = np.array([event for event in events if event[0][0] in ['M', 'D']])
# events_md = events

# MAPPING TO PRE-DEFINED ACTIVITY
for i in range(events_md.shape[0]):
    events_md[i,3]=baseline_activities[events_md[i,3]]

# TRANSITION POINTS
trs_md = np.array([i for i in range(len(events_md)) if events_md[i][3]!=events_md[max(i-1,0)][3]])

# Sensor List
sensor_list = sorted(set(events_md[:,0]))

print(
"""The number of events: {},
The number of transitions: {},
The number of sensors: {}""".format(len(events_md), len(trs_md), len(sensor_list)))

print(Counter([events_md[i,3] for i in trs_md]))

## Feature Extraction

In [None]:
data_name = "hh101"
choose_type = "A"

if os.path.exists("./replication/{}/features_{}.npy".format(data_name, choose_type)):
    choose_features = np.load("./replication/{}/features_{}.npy".format(data_name, choose_type))
else:
    raw_features = np.array(feature_extraction(events_md, data_name, sensor_list))

    choose_features = []
    for i in range(raw_features.shape[1]):
        if i not in exclude_list[choose_type]:
            choose_features.append(raw_features[:,i].reshape(-1,1))
    choose_features = np.concatenate(choose_features, axis=1)

    np.save("./replication/hh101/features_{}.npy".format(choose_type), choose_features)

    print("""Raw features: {}
    Excluded features: {}
    Chosen features: {}""".format(raw_features.shape, [feature_name[i] for i in exclude_list[choose_type]], choose_features.shape))

## Dissimilarity Scoring

In [None]:
metric = "SEP"
scores = np.array(change_point_detection(choose_features, data_name=data_name, metric=metric))

np.save("./replication/hh101/scores_{}.npy".format(metric), scores)
np.save("./replication/hh101/transitions.npy", trs_md)

## Evaluation

In [None]:
scores = np.load("./replication/hh101/scores_RuLSIF.npy")
# transitions = np.load("./replication/hh101/transitions.npy")

threshold = 0.1
interval = 10
# Reject scores lower than threshold
# scores[scores<threshold] = 0

# Choose peak scores (one score in the range: 2*interval)
finalists=[]
prevtimestamp = 0
for i in range(len(scores)):
    # if scores[i]>threshold and float(events_md[i, 2])-prevtimestamp>2*interval:
    #     finalists.append(i)
    #     prevtimestamp = float(events_md[i, 2])
    if scores[i]>threshold:
        finalists.append(i)

groundtruth = np.array([float(events_md[i, 2]) for i in trs_md])

print(len(groundtruth), len(finalists))

In [None]:
# Evaluation
truePositive = trueNegative = falsePositive = falseNegative = 0
for i in range(len(events_md)):
    if i in finalists: # Positive
        if i in trs_md: # True
            truePositive+=1
        else:
            timeGap = abs(groundtruth - float(events_md[i, 2]))
            if sum(timeGap<interval)!=0:
                truePositive+=1
            else:
                falsePositive+=1
    else: # Negative
        if i in trs_md: # False
            falseNegative+=1
        else:
            trueNegative+=1

print(truePositive/(truePositive+falseNegative))
print(falsePositive/(falsePositive+trueNegative))

# 2. Analysis

In [None]:
episodes, trs, tags = [], [], []
previdx=0

for i in range(len(trs_md)-1):
    left=np.array(events_md[previdx:trs_md[i]])
    right=np.array(events_md[trs_md[i]:trs_md[i+1]])
    episode=np.concatenate((left, right))
    episodes.append(episode)
    trs.append(left.shape[0])
    pairname="{}-{}".format(left[0][3], right[0][3])
    tags.append(pairname)
    previdx=trs_md[i]

data_name = 'hh101'
metric = 'SEP'
dataset_folder = './features/{}'.format(data_name)

sensor_list = sorted(set(events_md[:,0]))

if not os.path.exists(dataset_folder):
    os.mkdir(dataset_folder)

for ei, eps in enumerate(episodes):
    transition_point = trs[ei]
    features = feature_extraction(eps, data_name, sensor_list)
    features = np.array(features)

    folder_ = "./features/{}/{}/{}".format(data_name, tags[i], i)
    x_ = range(len(eps))

    scores=np.array(change_point_detection(features, data_name=data_name, metric=metric))
    scores[scores<0]=0
    plt.title(tags[i])
    plt.bar(range(len(scores)), scores); plt.axvline(x=transition_point, linestyle=':', color='r')
    
    break