In [17]:
import pandas as pd
import numpy as np
from itertools import groupby
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, accuracy_score, confusion_matrix, f1_score
%matplotlib qt

In [18]:
filename = 'cluster_results_f50_w5_c6_labeled.csv'
df = pd.read_csv('./results/feature50/' + filename, index_col=0)

In [19]:
# Select a feature(for visualization)
feature = 'tGravityAcc-mean()-X' # User parameter

In [20]:
"""
Setting
"""
colors = ['#eb3b5a', '#f7b731', '#20bf6b', '#a55eea', '#3867d6', '#fa8231']
activity_lst = ['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 
                'SITTING', 'STANDING', 'LAYING']
cluster_lst = ['a', 'b', 'c', 'd', 'e', 'f']

# Make the NaN dataframe(for plotting empty place at result)
origin_df = pd.DataFrame(columns=activity_lst, index=df.index) # label
pred_df = pd.DataFrame(columns=cluster_lst, index=df.index) # predict label

# Assign the values to each column
for act in activity_lst: origin_df[act] = df.loc[df.ActivityName==act][feature]
for cluster in cluster_lst: pred_df[cluster] = df.loc[df.pred==cluster][feature]

In [21]:
"""
Function Definition
"""
# 여러 개의 cluster를 하나의 fig로 출력
def line_plot_several_into_one(dataframe, title, activity_lst=activity_lst, colors=colors):
    plt.figure(figsize=(20, 10))

    for i in range(len(activity_lst)):
        plt.plot(dataframe.iloc[:, i], color=colors[i], label=activity_lst[i])
        plt.legend()

    plt.title(title, size=20)
    plt.show()

# 여러 개의 cluster를 여러 개의 fig로 출력
def line_plot_several_into_several(dataframe, title, activity_lst=activity_lst):
    f, axes = plt.subplots(3, 2)
    f.set_size_inches((20, 15))
    f.suptitle(title, size=20)
    plt.subplots_adjust(wspace=0.2, hspace=0.2)

    for i in range(3):
        for j in range(2):
            idx = i * 2 + j
            axes[i, j].plot(dataframe.iloc[:, idx], label=activity_lst[idx], color='#ff3f34')
            axes[i, j].set_title(str(activity_lst[idx]))

    plt.legend()
    plt.show()

def line_plot_one_to_one(activity, pred_activity):
    plt.figure(figsize=(20,10))
    plt.plot(origin_df[activity], color=colors[0], label=activity, zorder=2)
    plt.plot(pred_df[pred_activity], color=colors[3], label=pred_activity, alpha=0.4, zorder=1)
    plt.legend()
    plt.show()

def scatter_plot_one_to_one(activity, pred_activity):
    plt.figure(figsize=(20,10))
    a= origin_df[activity].replace(np.nan, None)
    b= pred_df[pred_activity].replace(np.nan, None)
    plt.scatter(origin_df.index, a, color=colors[0], label=activity, zorder=2)
    plt.scatter(pred_df.index, b, color=colors[3], label=pred_activity, alpha=0.4, zorder=1)
    plt.legend()
    plt.show()

# 동일한 요소의 세그먼트 수 count
def get_segmentation_count(lst):
    return [key for key, group in groupby(lst)]


## Evaluation

In [22]:
origin_act = origin_df.copy().notnull().astype('int')
pred_act = pred_df.copy().notnull().astype('int')

In [23]:
"""
Evaluation Metrics
"""
evl_df_lst = []

for act in origin_act.columns:
    evl_df = pd.DataFrame(index=pred_act.columns)
    for pred in pred_act.columns:
        evl_df.loc[pred,'Precision'] = precision_score(origin_act[act], pred_act[pred])
        evl_df.loc[pred,'Recall'] = recall_score(origin_act[act], pred_act[pred])
        evl_df.loc[pred,'Accuracy'] = accuracy_score(origin_act[act], pred_act[pred])
        evl_df.loc[pred,'F1_score'] = f1_score(origin_act[act], pred_act[pred])
        # conf_mat_lst.append(confusion_matrix(origin_act[act], pred_act[pred]))
    evl_df_lst.append(evl_df)

In [24]:
# Activity-성능 매핑
evl_dict = dict(zip(origin_act.columns,evl_df_lst))
evaluation_df = pd.concat(evl_dict)

In [25]:
# 기준 지표 설정
metric = 'F1_score'
evl_base = evaluation_df[metric].copy()

In [26]:
evl_idx_lst = list(evl_base.index)
# evl_idx_lst

In [27]:
evl_idx_lst

[('WALKING', 'a'),
 ('WALKING', 'b'),
 ('WALKING', 'c'),
 ('WALKING', 'd'),
 ('WALKING', 'e'),
 ('WALKING', 'f'),
 ('WALKING_UPSTAIRS', 'a'),
 ('WALKING_UPSTAIRS', 'b'),
 ('WALKING_UPSTAIRS', 'c'),
 ('WALKING_UPSTAIRS', 'd'),
 ('WALKING_UPSTAIRS', 'e'),
 ('WALKING_UPSTAIRS', 'f'),
 ('WALKING_DOWNSTAIRS', 'a'),
 ('WALKING_DOWNSTAIRS', 'b'),
 ('WALKING_DOWNSTAIRS', 'c'),
 ('WALKING_DOWNSTAIRS', 'd'),
 ('WALKING_DOWNSTAIRS', 'e'),
 ('WALKING_DOWNSTAIRS', 'f'),
 ('SITTING', 'a'),
 ('SITTING', 'b'),
 ('SITTING', 'c'),
 ('SITTING', 'd'),
 ('SITTING', 'e'),
 ('SITTING', 'f'),
 ('STANDING', 'a'),
 ('STANDING', 'b'),
 ('STANDING', 'c'),
 ('STANDING', 'd'),
 ('STANDING', 'e'),
 ('STANDING', 'f'),
 ('LAYING', 'a'),
 ('LAYING', 'b'),
 ('LAYING', 'c'),
 ('LAYING', 'd'),
 ('LAYING', 'e'),
 ('LAYING', 'f')]

In [28]:
assigned_result = []
while(len(assigned_result) < len(activity_lst)):
    max_idx = evl_base.idxmax() # max값을 가지는 index 추출
    assigned_result.append(max_idx)
    evl_idx_lst = list(filter(lambda x: (x[0] != max_idx[0]) and (x[1] != max_idx[1]) , evl_idx_lst)) # Activity와 clsuter 매칭 -> max값을 가지는 activity와 cluster 제거(할당 완료되었으므로)
    evl_base = evl_base[evl_idx_lst]
    # print(evl_idx_lst,'\n')

## Visualization
- Clustering Result

In [29]:
line_plot_several_into_one(origin_df, 'Original Label')
line_plot_several_into_one(pred_df, 'Predict Label')

In [None]:
line_plot_several_into_several(origin_df, 'Original Label')
line_plot_several_into_several(pred_df, 'Predict Label')

## Visualization 
- Evaluation Result

In [30]:
num = 0 # User parameter

In [31]:
line_plot_one_to_one(assigned_result[num][0], assigned_result[num][1])
scatter_plot_one_to_one(assigned_result[num][0], assigned_result[num][1])

In [None]:
act = assigned_result[num][0]
clu = assigned_result[num][1]

tp_idx = (origin_act.loc[origin_act[act] == 1, act].index).intersection(pred_act.loc[pred_act[clu] == 1, clu].index)
fp_idx = (origin_act.loc[origin_act[act] == 0, act].index).intersection(pred_act.loc[pred_act[clu] == 1, clu].index)

In [None]:
"""
Visualization
"""
plt.figure(figsize=(20,10))
plt.suptitle(f"({act}, {clu}) (TP/FP)", size=20)
vis_df = pd.DataFrame(index=df.index)

vis_df['tp'] = df[feature].loc[tp_idx].replace(np.nan, None)
vis_df['fp'] = df[feature].loc[fp_idx].replace(np.nan, None)

plt.scatter(df.index, vis_df['tp'], color=colors[0], label='tp', alpha=0.3, zorder=1)
plt.scatter(df.index, vis_df['fp'], color=colors[1], label='fp', alpha=0.3, zorder=2)
plt.legend()
plt.show()

### Segmentation

In [32]:
# 동일한 요소의 세그먼트 수 count
def get_segmentation_count(lst):
    return [key for key, group in groupby(lst)]

In [33]:
# 실제 Segmentation 개수
len(get_segmentation_count(df.Activity))

400

In [34]:
# 예측 Segmentation 개수
len(get_segmentation_count(df.pred))

249

In [None]:
# 객체 설정(사람 한 명 선택)
sbj = 1 # User parameter

print(get_segmentation_count(df.loc[df.subject==sbj].Activity)) # 실제 세그먼트
print(get_segmentation_count(df.loc[df.subject==sbj].pred)) # 예측 세그먼트