# Import packages and modules

In [1]:
import os
from glob import glob
import json
import pandas as pd
import numpy as np
import math, random
import datetime as dt
import pickle
from matplotlib import pyplot as plt
import argparse
import path, sys, re, time
from collections import Counter
from scipy.spatial import distance_matrix
from scipy.signal import find_peaks


from module_.info.testbed import activityfiles_new
from module_.info.config import config, feature_name
from module_.readText import create_episodes, time_correction, read_adlmr
from module_.featureExtraction import feature_extraction
from module_.changePointDetection import change_point_detection

from module_.correlation import module_pmc
from module_.featureExtraction import sliding_window


In [None]:
# CORRELATION
from module_.correlation import correlation

result = correlation("hh101", None)
# result = correlation("testbed","RuLSIF")
# result = correlation("adlmr","RuLSIF")

In [2]:
from module_.dataLoader import dataLoader

dataset = "testbed"

# episodes, transitions, labels = dataLoader("testbed")
episodes, transitions, labels = dataLoader(dataset)

sensors = set()
for episode in episodes:
    sensors = sensors | set(episode[:,0])
sensors = sorted(sensors)

# episodes, transitions, labels = dataLoader("hh101")
# episodes = np.load("./dataload/hh101/episodes.npy", allow_pickle=True)
# transitions = np.load("./dataload/hh101/transitions.npy")
# labels = np.load("./dataload/hh101/labels.npy")

In [None]:
# active state

from module_.validation import check_active_state

fig = plt.figure(figsize=(20, 5))
ax = plt.axes()

colorstring = "bgrcmk"

threshold = 60

for d, folder in enumerate(glob("./correlation/testbed/*")):
    
    if d!=1:
        continue
    print(folder)
    pair = folder.split("/")[-1].split(".")[0]
    episode = np.load("{}/episode.npy".format(folder))
    transition = np.load("{}/label.npy".format(folder))[0]

    start_time = float(episode[0,2])
    transition_time_l = float(episode[transition-1,2])-start_time
    transition_time_r = float(episode[transition,2])-start_time

    correlation = np.load("{}/relationships.npy".format(folder))

    active_dict = check_active_state(episode)

    ax.set_title('Active State of {}'.format(pair))
    ax.set_xlabel("Timestamp"); ax.set_ylabel("Sensor")
    ax.set_yticks(range(1, len(active_dict.keys())+1))
    ax.set_yticklabels(active_dict.keys())

    for i, item in enumerate(active_dict.items()):
        k, v = item
        for fragment in v:
            # if fragment[0]>transition_time_l-600 and fragment[1]<transition_time_r+600:
            ax.hlines(y=i+1, xmin=fragment[0], xmax=fragment[1], linewidth=3, color=colorstring[i%len(colorstring)-1])
    
    plt.axvline(
        x=transition_time_l, color="g", linestyle=":"
    )
    plt.axvline(
        x=transition_time_r, color="g", linestyle=":"
    )
    plt.axvspan(max(0, transition_time_l-threshold), min(transition_time_r+threshold, float(episode[-1,2])-float(episode[0,2])), 
        facecolor='g', alpha=0.2, label="transition"
    )

    plt.legend()

    break

In [None]:

# relationships = np.load("./correlation/testbed/npy/GroupStudy-Presentation-Discussion-Chatting_correlation.npy")
# relationships = np.load("./correlation/adlmr/npy/GA-GB-GC-GD-GE-GA-GB-GC-GD-GE_correlation.npy")
# relationships = np.load("./correlation/hh101/npy/Personal_Hygiene-Other_correlation.npy")

index = 0
episode, transition, label = episodes[index], transitions[index], labels[index]

relationships = module_pmc(episode, 0, sensors)

In [None]:
for ei in range(len(episode)):
    s, v, t, _ = episode[ei]
    print(ei, s)
    si = sensors.index(s)
    row = relationships[ei][si]
    for ri in range(len(row)):
        if row[ri]!=1.:
            ps = sensors[ri]
            print(ps, np.trunc(row[ri]*1e3)/1e3, end=" ")
    print()

In [None]:
for i, event in enumerate(episode):
    print(i, event)

In [None]:
# MEAN RELATIONSHIPS

mean_relationships = [np.mean(item) for item in relationships]

fig = plt.figure(figsize=(20, 5))
ax = fig.add_subplot(1, 1, 1)

plt.title(label)
plt.bar(range(len(mean_relationships)), mean_relationships)
plt.ylim(min(mean_relationships), 1)
# plt.bar(range(len(weighted_relationships)), weighted_relationships)
# plt.ylim(min(weighted_relationships), 1)
if type(transition)!=list:
    transition = [transition]
for trans in transition:
    lb, ub = trans-1, trans
    tl, tr = float(episode[lb,2]), float(episode[ub,2])
    while tl-float(episode[lb,2])<10. and lb!=0:
        lb-=1
    while float(episode[ub,2])-tr<10. and ub!=len(episode)-1:
        ub+=1
    plt.axvline(
        trans, linestyle="dotted", color="g"
    )
    plt.axvspan(
        lb, ub, alpha=0.2, color='g'
    )

In [None]:
# CHANGES

mean_relationships = [np.mean(item) for item in relationships]

window_size = 30
windows = sliding_window(mean_relationships, window_size)


values = []
slopes = []
for i in range(len(windows)):
    meanvalue = np.mean(windows[i])
    diff = sum(abs(np.array(windows[i])-meanvalue))
    values.append(diff)

    if len(values)>=window_size:
        y = np.array(values[-window_size:])
        A = np.vstack([np.array([k/10. for k in range(window_size)]), np.ones(window_size)]).T
    else:
        y = np.array(values)
        A = np.vstack([np.array([k/10. for k in range(len(values))]), np.ones(len(values))]).T
    m, _ = np.linalg.lstsq(A, y, rcond=None)[0]
    slopes.append(max(0, m))
    # slopes.append(m)

fig = plt.figure(figsize=(20, 5))
ax = fig.add_subplot(1, 1, 1)

plt.title(label)
plt.bar(range(len(values)), values)

if type(transition)!=list:
    transition = [transition]
for trans in transition:
    lb, ub = trans-1, trans
    tl, tr = float(episode[lb,2]), float(episode[ub,2])
    while tl-float(episode[lb,2])<10. and lb!=0:
        lb-=1
    while float(episode[ub,2])-tr<10. and ub!=len(episode)-1:
        ub+=1
    plt.axvline(
        trans, linestyle="dotted", color="g"
    )
    plt.axvspan(
        lb, ub, alpha=0.2, color='g'
    )

In [None]:
# SLOPE

slopes = np.array(slopes)
slopes[slopes<0]=0

fig = plt.figure(figsize=(20, 5))
ax = fig.add_subplot(1, 1, 1)

plt.title(label)
plt.bar(range(len(slopes)), slopes)
if type(transition)!=list:
    transition = [transition]
for trans in transition:
    lb, ub = trans-1, trans
    tl, tr = float(episode[lb,2]), float(episode[ub,2])
    while tl-float(episode[lb,2])<10. and lb!=0:
        lb-=1
    while float(episode[ub,2])-tr<10. and ub!=len(episode)-1:
        ub+=1
    plt.axvline(
        trans, linestyle="dotted", color="g"
    )
    plt.axvspan(
        lb, ub, alpha=0.2, color='g'
    )

In [None]:
# PEAK

flow = False
prevflow = previdx = 0
peaks = []
for i in range(len(slopes)):
    if flow:
        if slopes[i]==0.:
            continue
        if slopes[i]>prevflow: # Increasing
            prevflow = slopes[i]
            previdx = i
        else:
            peaks.append(previdx) # peak
            flow=False
    else:
        if slopes[i]>0.:
            flow=True
            prevflow = slopes[i]
            previdx = i
    

fig = plt.figure(figsize=(20, 5))
ax = fig.add_subplot(1, 1, 1)

peakslope = [slopes[i] if i in peaks else 0 for i in range(len(slopes)) ]
peakslope = np.array(peakslope)

# peakslope[peakslope<0.075]=0

plt.title(label)
plt.bar(range(len(peakslope)), peakslope)
if type(transition)!=list:
    transition = [transition]
for trans in transition:
    lb, ub = trans-1, trans
    tl, tr = float(episode[lb,2]), float(episode[ub,2])
    while tl-float(episode[lb,2])<10. and lb!=0:
        lb-=1
    while float(episode[ub,2])-tr<10. and ub!=len(episode)-1:
        ub+=1
    plt.axvline(
        trans, linestyle="dotted", color="g"
    )
    plt.axvspan(
        lb, ub, alpha=0.2, color='g'
    )

In [None]:
# EVENT SEQUENCE

from scipy.spatial import distance_matrix
score = peakslope
print(transition)
print([i for i in range(len(score)) if score[i]>=0.02])
enumber = 620
print(score[enumber])

lb, ub = max(0, enumber-45+1), min(enumber+2+1, len(episode))
data = episode[lb:ub]
for i, datum in enumerate(data):
    # print(f"{i+lb} {datum[0]}, {datum[1]}, {datum[2]}, {datum[3]}, {datum[4]}")
    print(f"{i+lb} {datum[0]}, {datum[1]}, {datum[2]}")
# diff_feature

In [14]:
window_size = 30

tslopes = []
tpeaks = []

for epi in range(len(episodes)):

    episode, transition, label = episodes[epi], transitions[epi], labels[epi]

    relationships = module_pmc(episode, 0, sensors)

    # mean_relationships = [np.mean(item) for item in relationships]
    assert len(episode)==len(relationships)

    weighted_relationships = []
    for ri in range(len(relationships)):
        matrix = relationships[ri]
        esi = sensors.index(episode[ri][0])
        weight_vector = matrix[esi]
        weighted_sum = np.zeros(len(sensors))
        for si in range(len(matrix)):
            if esi==si:
                continue
            weighted_sum += matrix[si]*weight_vector[si]
        weighted_relationships.append(sum(weighted_sum))



    windows = sliding_window(weighted_relationships, window_size)

    values = []
    slopes = []
    for i in range(len(windows)):
        meanvalue = np.mean(windows[i])
        diff = sum(abs(np.array(windows[i])-meanvalue))
        values.append(diff)

        if len(values)>=window_size:
            y = np.array(values[-window_size:])
            # A = np.vstack([np.array([(float(episode[k][2])-float(episode[0][2])) for k in range(max(0, i-window_size+1), i+1)]), np.ones(window_size)]).T
            A = np.vstack([np.array([k/10. for k in range(max(0, i-window_size+1), i+1)]), np.ones(window_size)]).T
        else:
            y = np.array(values)
            A = np.vstack([np.array([k/10. for k in range(len(values))]), np.ones(len(values))]).T
            # A = np.vstack([np.array([(float(episode[k][2])-float(episode[0][2])) for k in range(len(values))]), np.ones(len(values))]).T
        m, _ = np.linalg.lstsq(A, y, rcond=None)[0]
        slopes.append(max(0, m))

    flow = False
    prevflow = previdx = 0
    peaks = []
    for i in range(len(slopes)):
        if flow:
            if slopes[i]==0.:
                continue
            if slopes[i]>prevflow: # Increasing
                prevflow = slopes[i]
                previdx = i
            else:
                peaks.append(previdx) # peak
                flow=False
        else:
            if slopes[i]>0.:
                flow=True
                prevflow = slopes[i]
                previdx = i

    peakslope = [slopes[i] if i in peaks else 0 for i in range(len(slopes)) ]
    peakslope = np.array(peakslope)

    plt.plot(range(len(peakslope)), peakslope)
    plt.axvline()


    break

    tslopes.append(slopes)
    tpeaks.append(peaks)

    

    

    # peakslopes.append([p for p in peaks if slopes[p]>threshold])
    # print([i for i in range(len(peakslope)) if peakslope[i]>0.02])
# np.save(f"./correlation/{dataset}/npy/slopes_{window_size}.npy", tslopes)
# np.save(f"./correlation/{dataset}/npy/peaks_{window_size}.npy", tpeaks)
# np.save(f"./correlation/adlmr/npy/peaks_{threshold}.npy", peakslopes)

  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  c /= stddev[:, None]
  c /= stddev[None, :]


0. 실험한 결과
문제점 1. 전환점(t) 으로부터 Window size (ws) 만큼 뒤에서 포착 (t+ws)되기 때문에 상당한 Delay가 존재한다
문제점 2. Dataset마다 다른 Window size를 적용해야 한다 (센서의 발현 주기가 다르기 때문에, 너무 작게 설정하면 Peak를 많이 발생시키고 너무 크게 설정하면 Peak를 못 잡기도 한다)
문제점 3. 기울기 Threshold을 고정값으로 설정하기가 어렵다
    - 특정한 두 연속된 행동에 대해서 전환점을 찾으려 할때, 변화하는 센서의 수에 따라서 변화의 기울기가 가파르게 나타나기도 하고 느슨하게 나타나기도 한다
    - 기울기를 변화에 관여한 센서의 수에 비례하게 계산할 수 있는 방법이 필요한데.

In [12]:
slopes = np.load(f"./correlation/{dataset}/npy/slopes_{window_size}.npy", allow_pickle=True)
peaks = np.load(f"./correlation/{dataset}/npy/peaks_{window_size}.npy", allow_pickle=True)

threshold = 0.015

assert len(slopes)==len(peaks)

tp = tn = fp = fn = 0

for epi in range(len(episodes)):
    episode, transition, label = episodes[epi], transitions[epi], labels[epi]
    slope, peak = slopes[epi], peaks[epi]

    peak_ = [p for p in peak if slope[p]>threshold]

    etp = etn = efp = efn = 0

    lb, ub = transition-1, transition
    lbt, ubt = float(episode[lb][2]), float(episode[ub][2])
    for ei in range(len(episode)):
        if dataset=="adlmr":
            s, v, t, _, _ = episode[ei]
        else:
            s, v, t = episode[ei]
        if ei in peak_: # POSITIVE
            # if abs(float(t)-lbt)<10. or abs(float(t)-ubt)<10.:
            if abs(ei-ub)<15:
                etp+=1
            else:
                efp+=1
        else: # NEGATIVE
            if ei==lb or ei==ub:
                efn+=1
            else:
                etn+=1
    
    if etp!=0:
        etp=1
        efn=0
    else:
        etp=0
        efn=1
    tp+=etp; tn+=etn; fp+=efp; fn+=efn

    
print(tp/len(episodes), fp/len(episodes))





0.6666666666666666 50.416666666666664


In [None]:
print(transition, [i for i in peaks if slopes[i]>0.02])
idx = 64
print(episode[idx-30:idx+1])