In [110]:
%matplotlib notebook
# %matplotlib ipympl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

#### Generate Dataframe of all keypoints from JSON file

In [111]:
keypoints_all = pd.read_json('./Keypoints_All.json', orient='records')
# new_columns = ['Gesture', 'Sub folder No.', 'Frame No.', 'Person', 'Joint', 'X', 'Y', 'Probability', 'Depth']
new_columns = ['Gesture', 'Sub folder No.', 'Frame No.', 'Joint', 'X', 'Y', 'Probability', 'Depth']
keypoints_all = keypoints_all.reindex(columns = new_columns)
print('keypoints_all shape', keypoints_all.shape)
keypoints_all.head()

keypoints_all shape (221800, 8)


Unnamed: 0,Gesture,Sub folder No.,Frame No.,Joint,X,Y,Probability,Depth
0,1,1,1,0,646.969116,167.00444,0.882761,0.058824
1,1,1,1,1,649.058167,268.895813,0.866711,0.392157
2,1,1,1,2,576.459595,272.745087,0.785951,0.788235
3,1,1,1,3,504.003571,343.379303,0.762569,0.733333
4,1,1,1,4,519.62439,225.855576,0.807953,0.745098


#### Generate Dataframe of keypoints 1-7

In [112]:
keypoints_1_7 = keypoints_all[keypoints_all['Joint'].isin(range(1,8))]
print('keypoints_1_7 shape', keypoints_1_7.shape)

keypoints_1_7 shape (62104, 8)


#### Check Probability (from OpenPose output) per folder

In [113]:
fig, axes = plt.subplots(2,1, sharex=True)
pd.pivot_table(keypoints_1_7, index='Gesture', columns = 'Sub folder No.', values='Probability', aggfunc='mean').plot(kind='bar', ax=axes[0], title='Average of all keypoints\' Probability per folder', legend=False, alpha=0.5)
pd.pivot_table(keypoints_1_7, index='Gesture', columns = 'Sub folder No.', values='Probability', aggfunc='var').plot(kind='bar', ax=axes[1], title='Variance of all keypoints\' Probability per folder', legend=False, alpha=0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x27956e294a8>

#### Observe distribution of Probability

In [114]:
fig = plt.figure()
keypoints_1_7['Probability'].plot.hist(bins=100, title='Distribution of \"Probability\" (OpenPose output)', alpha=0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2792cd60438>

#### Get Probability value at percentile 5, use it as probability threshold for later data cleaning

In [115]:
prob_threshold = keypoints_1_7['Probability'].quantile(0.001)
print(prob_threshold)

0.5404563260947001


#### Calculate the percentage of "low probability keypoints" (keypoints probability < probability threshold) per foler

In [116]:
keypoints_1_7_folder_prob_threshold_percent = keypoints_1_7.pivot_table(index = ['Gesture', 'Sub folder No.'], values='Probability', aggfunc = lambda x:np.count_nonzero(x<prob_threshold)/len(x)*100)
keypoints_1_7.pivot_table(index = 'Gesture', columns = 'Sub folder No.', values='Probability', aggfunc = lambda x:np.count_nonzero(x<prob_threshold)/len(x)*100).plot.bar(title='Before folder filter, Percentage of \"low probability keypoints\" per folder', legend=False, alpha=0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2792ced8160>

#### Filter the folders and keey only the folders has less than 10 percent of "low probability keypoints" 

In [117]:
low_prob_keypoints_percentage_threshold = 1
keypoints_1_7_folder_filtered_index = keypoints_1_7_folder_prob_threshold_percent[keypoints_1_7_folder_prob_threshold_percent['Probability'] < low_prob_keypoints_percentage_threshold].index.tolist()
keypoints_1_7_folder_filtered = keypoints_1_7[keypoints_1_7[['Gesture','Sub folder No.']].apply(lambda x:(x[0],x[1]) in keypoints_1_7_folder_filtered_index, axis=1)]

print('keypoints_1_7_filtered shape', keypoints_1_7_folder_filtered.shape)

keypoints_1_7_filtered shape (59955, 8)


In [118]:
keypoints_1_7_folder_filtered.pivot_table(index = 'Gesture', columns = 'Sub folder No.', values='Probability', aggfunc = lambda x:np.count_nonzero(x<prob_threshold)/len(x)*100).plot.bar(title='After folder filter, Percentage of \"low probability keypoint\" per folder', legend=False, alpha=0.5)
plt.show()

<IPython.core.display.Javascript object>

In [119]:
fig, axes = plt.subplots(2,1, sharex=True)
pd.pivot_table(keypoints_1_7_folder_filtered, index='Gesture', columns = 'Sub folder No.', values='Probability', aggfunc='mean').plot(kind='bar', ax=axes[0], title='Average of all keypoints\' Probability per folder', legend=False, alpha=0.5)
pd.pivot_table(keypoints_1_7_folder_filtered, index='Gesture', columns = 'Sub folder No.', values='Probability', aggfunc='var').plot(kind='bar', ax=axes[1], title='Variance of all keypoints\' Probability per folder', legend=False, alpha=0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x279467e5dd8>

#### Check the X values of Joint 3 in all frames, we can see there are 0 values, this also need to be cleaned

In [48]:
# keypoints_1_7_folder_filtered[keypoints_1_7_folder_filtered['Joint']==3].pivot_table(index = 'Frame No.', columns = ['Gesture', 'Sub folder No.'], values='X').plot.line(alpha=0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2791cf49208>

#### We use Weighted Moving Average Smoothing to clean the 0 values of X and Y.
#### First, prepare a dataframe with t-1 and t+1 values for each keypoints

In [72]:
# df = pd.concat([keypoints_1_7_folder_filtered, keypoints_1_7_folder_filtered.shift(7)[['X', 'Y', 'Probability']].rename(columns={'X':'X(t-1)', 'Y':'Y(t-1)', 'Probability':'Probability(t-1)'}), keypoints_1_7_folder_filtered.shift(-7)[['X', 'Y', 'Probability']].rename(columns={'X':'X(t+1)', 'Y':'Y(t+1)', 'Probability':'Probability(t+1)'})], axis=1)
# df.head(14)

Unnamed: 0,Gesture,Sub folder No.,Frame No.,Person,Joint,X,Y,Probability,Depth,X(t-1),Y(t-1),Probability(t-1),X(t+1),Y(t+1),Probability(t+1)
4926,1,4,1,0,1,599.539856,117.944977,0.606791,0.341176,,,,538.22644,120.586067,0.831424
4927,1,4,1,0,2,585.228027,110.167336,0.484397,0.501961,,,,497.787415,130.999329,0.744093
4928,1,4,1,0,3,530.401123,174.0849,0.800581,0.733333,,,,474.329376,202.840897,0.887369
4929,1,4,1,0,4,527.75824,129.74855,0.760209,0.705882,,,,433.882629,258.918304,0.834839
4930,1,4,1,0,5,610.026001,121.889938,0.664449,0.333333,,,,577.405212,111.47551,0.703935
4931,1,4,1,0,6,569.547424,213.194748,0.837365,0.32549,,,,611.282471,201.496506,0.811391
4932,1,4,1,0,7,486.079956,192.37912,0.828425,0.415686,,,,547.374878,254.978317,0.840501
4951,1,4,2,0,1,538.22644,120.586067,0.831424,0.454902,599.539856,117.944977,0.606791,587.8302,108.876297,0.653654
4952,1,4,2,0,2,497.787415,130.999329,0.744093,0.635294,585.228027,110.167336,0.484397,568.263733,106.234604,0.543617
4953,1,4,2,0,3,474.329376,202.840897,0.887369,0.627451,530.401123,174.0849,0.800581,538.255554,157.121521,0.425784


#### Use Weighted Moving Average Smoothing to calculate X_cleaned and Y_cleaned

In [73]:
# X_cleaned = df.apply(lambda row: (row['X']*row['Probability'] + row['X(t-1)']*row['Probability(t-1)'] + row['X(t+1)']*row['Probability(t+1)'])/(row['Probability']+row['Probability(t-1)']+row['Probability(t+1)']) if row['Probability']==0 else row['X'], axis=1)
# Y_cleaned = df.apply(lambda row: (row['Y']*row['Probability'] + row['Y(t-1)']*row['Probability(t-1)'] + row['Y(t+1)']*row['Probability(t+1)'])/(row['Probability']+row['Probability(t-1)']+row['Probability(t+1)']) if row['Probability']==0 else row['Y'], axis=1)

#### Add cleaned X and Y to the original dataframe

In [74]:
# keypoints_1_7_frame_cleaned = pd.concat([keypoints_1_7_folder_filtered, X_cleaned.rename('X_cleaned'), Y_cleaned.rename('Y_cleaned')], axis=1)
# keypoints_1_7_frame_cleaned[keypoints_1_7_frame_cleaned['Probability']==0].head()

Unnamed: 0,Gesture,Sub folder No.,Frame No.,Person,Joint,X,Y,Probability,Depth,X_cleaned,Y_cleaned
13504,1,12,34,0,4,0.0,0.0,0.0,0.039216,428.795566,193.354531
16331,1,15,29,0,6,0.0,0.0,0.0,0.917647,617.835623,193.691148
16332,1,15,29,0,7,0.0,0.0,0.0,0.917647,550.622166,175.39164
16581,1,15,39,0,6,0.0,0.0,0.0,0.92549,611.923678,193.682379
16582,1,15,39,0,7,0.0,0.0,0.0,0.92549,544.73273,181.237718


#### Recheck the X values of Joint 3 in all frames, we can see no more 0 values

In [75]:
# keypoints_1_7_frame_cleaned[keypoints_1_7_frame_cleaned['Joint']==3].pivot_table(index = 'Frame No.', columns = ['Gesture', 'Sub folder No.'], values='X_cleaned').plot.line(alpha=0.5)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.axes._subplots.AxesSubplot at 0x149ea521160>

#### After data cleaning of OpenPose data, we also need to clean depth data

#### Check the depth value when probability == 0 (aka. X==0 and Y==0)

In [76]:
# keypoints_1_7_frame_cleaned[keypoints_1_7_frame_cleaned['Probability']==0].pivot_table(index = ['Frame No.', 'Joint'], columns = ['Gesture', 'Sub folder No.'], values='Depth').swapaxes(axis1=0, axis2=1)

Unnamed: 0_level_0,Frame No.,9,11,11,16,18,18,21,21,23,27,...,184,184,241,241,360,552,554,554,554,554
Unnamed: 0_level_1,Joint,4,3,4,4,3,4,3,4,4,4,...,3,4,6,7,4,7,1,2,3,4
Gesture,Sub folder No.,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
1,12,,,,,,,,,,,...,,,,,,,,,,
1,15,,,,,,,,,,,...,,,,,,,,,,
1,24,0.976471,0.976471,0.976471,,,,,,,,...,,,,,,,,,,
1,27,,,,,,,,,,,...,0.976471,0.976471,,,,,,,,
1,28,,,,,0.980392,0.980392,0.980392,0.980392,0.976471,0.976471,...,,,,,,,,,,
1,32,,,,0.980392,,,,,0.980392,,...,,,,,,,,,,
2,1,,,,,,,,,,0.031373,...,,,,,,,,,,
2,2,,,,,,,,,,,...,,,,,,,,,,
2,31,,,,,,,,,,,...,,,,,,,,,,
2,32,,,,,,,,,,,...,,,,,,,,,,


#### When Probability == 0, it means that the keypoint is occluded. To estimate the depth value, we use Moving Average Smoothing: get average of depth data at t-1 and t+1

In [77]:
# df = pd.concat([keypoints_1_7_frame_cleaned, keypoints_1_7_frame_cleaned.shift(7)['Depth'].rename('Depth(t-1)'), keypoints_1_7_frame_cleaned.shift(-7)['Depth'].rename('Depth(t+1)')], axis=1)
# Depth_cleaned = df[['Probability', 'Depth', 'Depth(t-1)', 'Depth(t+1)']].apply(lambda x: np.mean((x[2], x[3])) if x[0]==0 else x[1], axis=1)
# keypoints_1_7_frame_cleaned['Depth_cleaned'] = Depth_cleaned
# keypoints_1_7_frame_cleaned[keypoints_1_7_frame_cleaned['Probability']==0].head()

Unnamed: 0,Gesture,Sub folder No.,Frame No.,Person,Joint,X,Y,Probability,Depth,X_cleaned,Y_cleaned,Depth_cleaned
13504,1,12,34,0,4,0.0,0.0,0.0,0.039216,428.795566,193.354531,0.3
16331,1,15,29,0,6,0.0,0.0,0.0,0.917647,617.835623,193.691148,0.02549
16332,1,15,29,0,7,0.0,0.0,0.0,0.917647,550.622166,175.39164,0.066667
16581,1,15,39,0,6,0.0,0.0,0.0,0.92549,611.923678,193.682379,0.031373
16582,1,15,39,0,7,0.0,0.0,0.0,0.92549,544.73273,181.237718,0.068627


#### Now check the depth data of Joint 1 in all frames, we can see abnormal depth values (noise)

In [126]:
# keypoints_1_7_frame_cleaned[keypoints_1_7_frame_cleaned['Joint']==1].pivot_table(index = 'Frame No.', columns = ['Gesture', 'Sub folder No.'], values='Depth_cleaned').plot.line(alpha=0.5)
keypoints_1_7_folder_filtered[np.logical_and(keypoints_1_7_frame_cleaned['Joint']==4, keypoints_1_7_frame_cleaned['Gesture']==1)].pivot_table(index = 'Frame No.', columns = ['Gesture', 'Sub folder No.'], values='Depth').plot.line(alpha=0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x27948fdf908>

#### Let's see the distribution of the difference of each frame's depth comparing to the median depth of each Joint

In [128]:
# Depth_median = keypoints_1_7_frame_cleaned.pivot_table(index = ['Gesture', 'Sub folder No.', 'Joint'], values = 'Depth_cleaned', aggfunc = lambda x: np.median(x)).rename(columns={'Depth_cleaned':'Depth_median'})
# df = keypoints_1_7_frame_cleaned.join(Depth_median, on=['Gesture', 'Sub folder No.', 'Joint'])
# df['Depth_cleaned - Depth_median'] = df['Depth_cleaned'] - df['Depth_median']
# df.pivot_table(index = ['Sub folder No.', 'Frame No.'], columns = ['Gesture', 'Joint'], values = 'Depth_cleaned - Depth_median').plot.hist(bins=200, alpha=0.5)
Depth_median = keypoints_1_7_folder_filtered.pivot_table(index = ['Gesture', 'Sub folder No.', 'Joint'], values = 'Depth', aggfunc = lambda x: np.median(x)).rename(columns={'Depth':'Depth_median'})
df = keypoints_1_7_folder_filtered.join(Depth_median, on=['Gesture', 'Sub folder No.', 'Joint'])
df['Depth - Depth_median'] = df['Depth'] - df['Depth_median']
df.pivot_table(index = ['Sub folder No.', 'Frame No.'], columns = ['Gesture', 'Joint'], values = 'Depth - Depth_median').plot.hist(bins=200, alpha=0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x279526da9e8>

In [129]:
# depth_percentile_left = df.pivot_table(index = ['Gesture', 'Joint'], values = 'Depth_cleaned - Depth_median', aggfunc = lambda x: x.quantile(0.02)).rename(columns={'Depth_cleaned - Depth_median':'Depth_confidence_interval_left'})
# depth_percentile_right = df.pivot_table(index = ['Gesture', 'Joint'], values = 'Depth_cleaned - Depth_median', aggfunc = lambda x: x.quantile(0.98)).rename(columns={'Depth_cleaned - Depth_median':'Depth_confidence_interval_right'})
depth_percentile_left = df.pivot_table(index = ['Gesture', 'Joint'], values = 'Depth - Depth_median', aggfunc = lambda x: x.quantile(0.02)).rename(columns={'Depth - Depth_median':'Depth_confidence_interval_left'})
depth_percentile_right = df.pivot_table(index = ['Gesture', 'Joint'], values = 'Depth - Depth_median', aggfunc = lambda x: x.quantile(0.98)).rename(columns={'Depth - Depth_median':'Depth_confidence_interval_right'})

In [131]:
df2 = df.join(depth_percentile_left, on=['Gesture', 'Joint'])
df2 = df2.join(depth_percentile_right, on=['Gesture', 'Joint'])
df2.head(28)

Unnamed: 0,Gesture,Sub folder No.,Frame No.,Joint,X,Y,Probability,Depth,Depth_median,Depth - Depth_median,Depth_confidence_interval_left,Depth_confidence_interval_right
1,1,1,1,1,649.058167,268.895813,0.866711,0.392157,0.392157,0.0,-0.029333,0.027451
2,1,1,1,2,576.459595,272.745087,0.785951,0.788235,0.745098,0.043137,-0.737255,0.043137
3,1,1,1,3,504.003571,343.379303,0.762569,0.733333,0.639216,0.094118,-0.745098,0.266667
4,1,1,1,4,519.62439,225.855576,0.807953,0.745098,0.745098,0.0,-0.737255,0.591059
5,1,1,1,5,721.469788,262.992706,0.763511,0.058824,0.739216,-0.680392,-0.694118,0.572549
6,1,1,1,6,756.712524,368.897095,0.769164,0.058824,0.058824,0.0,-0.666667,0.67451
7,1,1,1,7,778.288879,484.381592,0.790371,0.694118,0.709804,-0.015686,-0.615216,0.635294
26,1,1,2,1,650.95343,266.936798,0.852125,0.380392,0.392157,-0.011765,-0.029333,0.027451
27,1,1,2,2,586.245117,270.83078,0.799579,0.745098,0.745098,0.0,-0.737255,0.043137
28,1,1,2,3,509.80011,378.636993,0.826347,0.721569,0.639216,0.082353,-0.745098,0.266667


In [92]:
keypoints_1_7_folder_filtered.pivot_table(index = ['Joint'], values = 'Depth', aggfunc = lambda x: np.count_nonzero(x==0))

Unnamed: 0_level_0,Depth
Joint,Unnamed: 1_level_1
1,518.0
2,539.0
3,1188.0
4,1992.0
5,523.0
6,699.0
7,746.0


In [93]:
keypoints_1_7_folder_filtered.pivot_table(index = ['Joint'], values = 'Depth', aggfunc = lambda x: np.median(x))

Unnamed: 0_level_0,Depth
Joint,Unnamed: 1_level_1
1,0.168627
2,0.180392
3,0.145098
4,0.129412
5,0.184314
6,0.156863
7,0.145098
