In [None]:
import numpy as np
import cv2, PIL
from cv2 import aruco
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import os
from pandas import DataFrame
import csv
from scipy.spatial import distance
import math
import re

import warnings
warnings.filterwarnings('ignore')

%matplotlib nbagg

In [None]:
Participants_metrics_dataset = pd.read_csv("Participants_metrics_dataset.csv", index_col = 'Participant')

In [None]:
features = Participants_metrics_dataset[['avgT_detection_VP','avgT_pointing_VP','avgT_action_VP','avgT_conf_VP', 'avg_length_point_VP',
                                'avg_length_gazeb_VP','avg_length_gazem_VP','avg_length_gazec_VP',
                                'avg_frequency_gazeb_VP','avg_frequency_gazem_VP','avg_frequency_gazec_VP']].dropna().to_numpy()
features.shape

In [None]:
features_non_normal = Participants_metrics_dataset[['avgT_pointing_VP','avgT_action_VP', 'avg_length_point_VP',
                                'avg_length_gazeb_VP','avg_length_gazem_VP',
                                'avg_frequency_gazeb_VP','avg_frequency_gazem_VP','avg_frequency_gazec_VP']].dropna().to_numpy()
features_non_normal.shape

In [None]:
accuracy_features = Participants_metrics_dataset[['Acc_gaze_per_VP','Acc_per_VP']].dropna().to_numpy()
accuracy_features.shape

In [None]:
labels = Participants_metrics_dataset.index.values.astype(str).tolist()
Y = labels
X = features[:,10]

plt.figure(figsize=(20, 16))
plt.scatter(X,Y,s= 250)
plt.show()

In [None]:
X = accuracy_features[:,0]
Y = accuracy_features[:,1]
labels = Participants_metrics_dataset.index.values.astype(str).tolist()

plt.figure(figsize=(20, 16))
plt.subplots_adjust(bottom=0.1)
plt.scatter(X,Y, label='True Position',s= 550)

# for label, x, y in zip(labels, X, Y):
#     plt.annotate(
#         label,
#         xy=(x, y), xytext=(-3, 3),
#         textcoords='offset points', ha='right', va='bottom')

plt.xlabel('Gaze Accuracy')
plt.ylabel('Pointing Accuracy')
plt.xlim(0, 100)
plt.ylim(0, 100)

plt.tight_layout()
plt.rcParams.update({'font.size': 48})
# plt.savefig('distribution_accuracies_2.png')
plt.show()


In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage

linked = linkage(accuracy_features, 'ward')

labelList = labels

plt.figure(figsize=(20, 16))
dendrogram(linked,
            orientation='top',
            labels=labelList,
            distance_sort='descending',
            show_leaf_counts=True, leaf_font_size =23,no_labels=False)
plt.xlabel('Participants')
plt.ylabel('Score')
plt.tight_layout()
plt.rcParams.update({'font.size': 48})
#plt.savefig('dendogram_accuracies.pdf')
plt.show()

In [None]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.decomposition import PCA

cluster = AgglomerativeClustering(n_clusters=4, affinity='euclidean', linkage='ward')
cluster.fit_predict(accuracy_features)

print(cluster.labels_)

pca = PCA(n_components=2).fit_transform(accuracy_features)

fig, ax = plt.subplots(figsize=(20, 16))
scatter = ax.scatter(accuracy_features[:, 0], accuracy_features[:, 1], c=cluster.labels_, s=750, cmap='Paired')
plt.xlabel('Gaze Accuracy')
plt.ylabel('Pointing Accuracy')
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)

#plt.xlabel('First Principal Component')
#plt.ylabel('Second Principal Componenet')


# produce a legend with the unique colors from the scatter
#legend1 = ax.legend(*scatter.legend_elements(),
#                    loc="upper right", title="Clusters")
#ax.add_artist(legend1)
plt.tight_layout()
plt.rcParams.update({'font.size': 48})
# plt.savefig('heirarch_cluster_accuracies_3.pdf')
plt.show()

In [None]:
x = features[:,0]
y = features[:,1]
labels = Participants_metrics_dataset.index.values.astype(str).tolist()

plt.figure(figsize=(20, 16))

for i in range(0,len(x)):
    plt.scatter(x[i], y[i], label=labels[i], s= 250)

#plt.title('Participants\' average Pointing time vs ')
plt.xlabel('Gaze Accuracy')
plt.ylabel('Pointing Accuracy')
plt.legend(loc=1)
plt.tight_layout()
plt.rcParams.update({'font.size': 18})
# plt.savefig('timedv.png')
plt.show()

In [None]:
from sklearn.cluster import KMeans
import numpy as np
from sklearn.decomposition import PCA


Nc = range(1, 40)
kmeans = [KMeans(n_clusters=i, random_state=420) for i in Nc]
score = [kmeans[i].fit(accuracy_features).score(accuracy_features) for i in range(len(kmeans))]
print(score)
plt.figure(figsize=(20, 16))
plt.plot(Nc,score,linewidth=5)
plt.xlabel('Number of Clusters')
plt.ylabel('Score')
plt.tight_layout()
plt.rcParams.update({'font.size': 18})
# plt.savefig('elbowcurve_timedv.pdf')
plt.show()

In [None]:


N = 3

pca = PCA(n_components=2).fit_transform(accuracy_features)
print(pca.shape)
print("==========================")


kmeans = KMeans(n_clusters=N, random_state=420).fit(accuracy_features)
#kmeans.labels_.shape

for i in range(N):
    print("cluster ",i)
    print("length ",np.where(kmeans.labels_ == i)[0].shape[0])
    print(np.where(kmeans.labels_ == i)[0])
    print("-----------------------------------")
    

fig, ax = plt.subplots(figsize=(20, 16))
scatter = ax.scatter(accuracy_features[:, 0], accuracy_features[:, 1], c=kmeans.labels_, s=750, cmap='Paired')
plt.xlabel('Gaze Accuracy')
plt.ylabel('Pointing Accuracy')
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)

# scatter = ax.scatter(pca[:, 0], pca[:, 1], c=kmeans.labels_, s=750, cmap='Paired')
# plt.xlabel('First Principal Component')
# plt.ylabel('Second Principal Componenet')


# produce a legend with the unique colors from the scatter
#legend1 = ax.legend(*scatter.legend_elements(),
#                    loc="upper right", title="Clusters")
#ax.add_artist(legend1)
plt.tight_layout()
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams.update({'font.size': 15})
# plt.savefig('kmeans_cluster_timedv.pdf')
plt.show()

In [None]:
from sklearn.mixture import GaussianMixture

N=3

gmm = GaussianMixture(n_components=N, random_state=420, init_params= 'random', covariance_type='full').fit(accuracy_features)
labels = gmm.predict(accuracy_features)
print(labels)

labels__ = Participants_metrics_dataset.index.values.astype(str).tolist()
labels__int = Participants_metrics_dataset.index.values

for i in range(N):
    print("cluster ",i)
    print("length ",np.where(labels == i)[0].shape[0])
    print(np.where(labels == i)[0])
    print("-----------------------------------")
    print(labels__int[np.where(labels == i)[0]])
    print("-----------------------------------")
    

fig, ax = plt.subplots(figsize=(20, 16))
scatter = ax.scatter(accuracy_features[:, 0], accuracy_features[:, 1], c=labels, s=750, cmap='Paired')



# plt.figure(figsize=(20, 16))
# plt.subplots_adjust(bottom=0.1)
# plt.scatter(X,Y, label='True Position',s= 550)

# for label, x, y in zip(labels__, X, Y):
#     plt.annotate(
#         label,
#         xy=(x, y), xytext=(-3, 3),
#         textcoords='offset points', ha='right', va='bottom')


plt.xlabel('Gaze Accuracy')
plt.ylabel('Pointing Accuracy')
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)

# scatter = ax.scatter(pca[:, 0], pca[:, 1], c=kmeans.labels_, s=750, cmap='Paired')
# plt.xlabel('First Principal Component')
# plt.ylabel('Second Principal Componenet')


# produce a legend with the unique colors from the scatter
#legend1 = ax.legend(*scatter.legend_elements(),
#                    loc="upper right", title="Clusters")
#ax.add_artist(legend1)
plt.tight_layout()
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams.update({'font.size': 48})
# ax.patch.set_visible(False)
# ax.axis('off')
# plt.ylim(0, 1)
# plt.savefig('GMM_Example.png')
# plt.savefig('GaussianMixture_full.png')
plt.show()

In [None]:


N = 11

#pca = PCA(n_components=2).fit_transform(accuracy_features)
#print(pca.shape)
#print("==========================")


kmeans = KMeans(n_clusters=N, random_state=420).fit(accuracy_features)
#kmeans.labels_.shape

for i in range(N):
    print("cluster ",i)
    print("length ",np.where(kmeans.labels_ == i)[0].shape[0])
    print(np.where(kmeans.labels_ == i)[0])
    print("-----------------------------------")
    

fig, ax = plt.subplots(figsize=(20, 16))
scatter = ax.scatter(accuracy_features[:, 0], accuracy_features[:, 1], c=kmeans.labels_, s=750, cmap='Paired')
plt.xlabel('Gaze Accuracy')
plt.ylabel('Pointing Accuracy')
#plt.xlabel('First Principal Component')
#plt.ylabel('Second Principal Componenet')


# produce a legend with the unique colors from the scatter
#legend1 = ax.legend(*scatter.legend_elements(),
#                    loc="upper right", title="Clusters")
#ax.add_artist(legend1)
plt.tight_layout()
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams.update({'font.size': 58})
# plt.savefig('kmeans_cluster_accuracies.pdf')
plt.show()