## Exploratory analysis of Kinect data from Python session 

(E and K pair debugging an implementation of the hangman game)

In [1]:
# Import software libraries
import numpy as np
from sklearn.cluster import KMeans
import pandas as pd

import seaborn as sns # for the nicer default plot aesthetics 
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
% matplotlib inline 
# % matplotlib notebook

from IPython.display import display
pd.options.display.max_columns = None
import pprint as pp

import helpers # written by us for MMLA pipeline purposes

In [None]:
# Import data from Unity or Python session
# Contact repository owners if you would like an anonymized data sample to run this notebook on your computer
skeleton_df = pd.read_csv('data/kinectSkeleton.csv', sep=';', names=helpers.col_names())

In [None]:
# Clustering
kmeans = KMeans(n_clusters=8, random_state=0).fit(skeleton_df)
print(kmeans.labels_)
#kmeans.predict([[unseen data goes here]])
kmeans.cluster_centers_

In [None]:
# Quick overview of how many rows are in each cluster
plt.figure()
plt.hist(kmeans.labels_)
plt.xlabel('Cluster #')
plt.ylabel('Count')
plt.show()

In [None]:
# Set up dataframe of XYZ position columns
kinect_xyz_df = helpers.create_xyz_df(skeleton_df)

In [None]:
# Add ClusterLabel column to dataframes
kinect_xyz_df['ClusterLabel'] = kmeans.labels_
skeleton_df['ClusterLabel'] = kmeans.labels_

In [None]:
# Check 
kinect_xyz_df.head()

In [None]:
# Plotting a point cloud for a specific row (Kinect data sample)
plt.figure()
row = kinect_xyz_df.iloc[500]
row_triples = helpers.xyz_triples(row)
helpers.plot_xyz_triples(row_triples, title='Row 500')
plt.show()

Once we have the header for the Kinect data we can follow up on what body positions these clusters correspond to and try

(1) Find average of position of each skelton.joint.cluster
(2) Visualization of joint.clusters 
(3) Markov state transition diagram of those body positions
(4) label all data points according to their cluster, name those clusters and something like "active", "passive" "happy".


In [None]:
PairOccurrences = {}
for a in range(8):
    for b in range(8):
        PairOccurrences[a,b] = 0
        
for c in range (0,len(kinect_xyz_df.index)-2):
    PairOccurrences[int(kinect_xyz_df.iloc[c]['ClusterLabel']),int(kinect_xyz_df.iloc[c+1]['ClusterLabel'])] +=1 

num_transitions = sum(PairOccurrences.values())
transitionFrequencies = {}
for k in PairOccurrences.keys():
    transitionFrequencies[k] = PairOccurrences[k] / num_transitions * 100

In [None]:
pp.pprint(transitionFrequencies)

### Visualizing skeletons

In [None]:
row = kinect_xyz_df.iloc[500]
row_triples = helpers.xyz_triples(row)

fig = plt.figure(1)
ax = Axes3D(fig)
ax.scatter(row_triples['X_POS'], row_triples['Y_POS'], row_triples['Z_POS'])
ax.set_xlabel('X_POS')
ax.set_ylabel('Y_POS')
ax.set_zlabel('Z_POS')
ax.set_title('EQW and KBR Skeletons (row 500)')

# TODO: put into helpers.py eventually
def draw_line(ax, a, b):    
    x = np.linspace(a[0], b[0], 100)
    y = np.linspace(a[1], b[1], 100)
    z = np.linspace(a[2], b[2], 100)
    ax.plot(x, y, z)

In [None]:
# Drop all the lower body positions before clustering/analyzing (skeleton_df_upper)

# TODO: put into helpers.py eventually
UPPER_PARTS = ['SHOULDER', 'HEAD', 'ELBOW', 'WRIST', 'HAND'] 
UPPER_PARTS_COLS = []

for c in skeleton_df.columns.values.tolist():
    for part in UPPER_PARTS:
        if ('POS' in c) and (part in c) and ('CONF' not in c):
            UPPER_PARTS_COLS.append(c)

skeleton_df_upper = skeleton_df[UPPER_PARTS_COLS]

In [None]:
# Group by cluster for individual cluster analysis
clusters = skeleton_df.groupby('ClusterLabel')
cluster7 = clusters.get_group(7)
cluster7centroid = cluster7.mean()

In [None]:
# TODO: put into helpers.py eventually 
JOINT_PAIRS = [
                ['WRIST_LEFT', 'HAND_LEFT'],
                ['WRIST_LEFT', 'ELBOW_LEFT'],
                ['ELBOW_LEFT', 'SHOULDER_LEFT'],
                ['SHOULDER_LEFT', 'SHOULDER_CENTER'],
                ['SHOULDER_CENTER', 'SHOULDER_RIGHT'],
                ['ELBOW_RIGHT', 'SHOULDER_RIGHT'],
                ['WRIST_RIGHT', 'ELBOW_RIGHT'],
                ['WRIST_RIGHT', 'HAND_RIGHT'],
                ['SHOULDER_CENTER', 'HEAD'],
              ]

def plot_skeleton(ax, row):
    # expecting row to be from a dataframe with UPPER_PARTS_COLS values
    
    for person in ['P1', 'P2']:
        for jp in JOINT_PAIRS:
            draw_line(ax, [row[jp[0]+'_POS_X_'+person], row[jp[0]+'_POS_Z_'+person], row[jp[0]+'_POS_Y_'+person]], 
                          [row[jp[1]+'_POS_X_'+person], row[jp[1]+'_POS_Z_'+person], row[jp[1]+'_POS_Y_'+person]])
                
    return 

In [None]:
test_row = cluster7centroid
title = 'Python Kinect Skeleton Cluster 7 Mean'

fig = plt.figure(5)
ax = Axes3D(fig)
plot_skeleton(ax, test_row)
ax.set_xlabel('x position')
ax.set_ylabel('y position')
ax.set_zlabel('z position')
ax.set_title(title)
plt.savefig('figs/'+title.replace(' ', '')+'.jpg', dpi=300)
plt.show()

^ Cluster 7 = hands out passive

### Markov Analysis with networkx

In [None]:
# TODO: put into helpers.py eventually
import networkx as nx
import matplotlib.pyplot as plt

def draw_graph(transitionFrequencies, 
               labels=None, 
               graph_layout='shell',
               node_size=7000, 
               node_color='pink', 
               node_alpha=0.3,
               node_text_size=40,
               edge_color='blue', 
               edge_alpha=0.3, 
               edge_tickness=1,
               edge_text_pos=0.3,
               text_font='sans-serif'):

    # create networkx graph
    G=nx.Graph()

    # add edges
    for edge in transitionFrequencies.keys():
        G.add_edge(edge[0],edge[1])

    # these are different layouts for the network you may try
    # shell seems to work best
    if graph_layout == 'spring':
        graph_pos=nx.spring_layout(G)
    elif graph_layout == 'spectral':
        graph_pos=nx.spectral_layout(G)
    elif graph_layout == 'random':
        graph_pos=nx.random_layout(G)
    else:
        graph_pos=nx.shell_layout(G)

    # draw graph
    nx.draw_networkx_nodes(G,graph_pos,node_size=node_size, 
                           alpha=node_alpha, node_color=node_color)
    nx.draw_networkx_edges(G,graph_pos,width=edge_tickness,
                           alpha=edge_alpha,edge_color=edge_color)
    nx.draw_networkx_labels(G, graph_pos,font_size=node_text_size,
                            font_family=text_font)      
    nx.draw_networkx_edge_labels(G, 
                                 graph_pos, 
                                 edge_labels=transitionFrequencies, 
                                 label_pos=edge_text_pos,
                                 font_size=24)
    plt.show()

In [None]:
PairOccurrences = {}
for a in range(8):
    for b in range(8):
        PairOccurrences[a,b] = 0
        
for c in range (0,len(kinect_xyz_df.index)-2):
    PairOccurrences[int(kinect_xyz_df.iloc[c]['ClusterLabel']),
                    int(kinect_xyz_df.iloc[c+1]['ClusterLabel'])] +=1 

num_transitions = sum(PairOccurrences.values())
transitionFrequencies = {}
for k in PairOccurrences.keys():
    transitionFrequencies[k] = format(PairOccurrences[k] / num_transitions * 100, '.2f')
plt.figure(figsize=(20,20))
draw_graph(transitionFrequencies)