# Import

In [14]:
import pandas as pd
import numpy as np
import pygaze
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import scipy
import glob
from tqdm import tqdm
from sklearn.cluster import DBSCAN
import detectors
import gazeplotter
from collections import defaultdict

# function

In [2]:
def get_center(clustering, data):
    center = []
    for i in range(len(set(clustering.labels_)) - 1):
        xi = data[np.where(clustering.labels_ == i)]
        cx = sum(xi.T[0])/len(xi)
        cy = sum(xi.T[1])/len(xi)
        center.append((cx,cy))
    return center

# cluster_center = get_center(clustering,X)

In [3]:
def transition_matrix(transitions):
    n = 1+ max(transitions) #number of states

    M = [[0]*n for _ in range(n)]

    for (i,j) in zip(transitions,transitions[1:]):
        M[i][j] += 1

    #now convert to probabilities:
    for row in M:
        s = sum(row)
        if s > 0:
            row[:] = [f/s for f in row]
    return M


In [4]:
def distance(x,y):
    return ((x[0]-y[0])**2 + (x[1]-y[1])**2)**0.5

def dbscan_predict(cluster_center, X_new, min_dist = 50, metric=distance):
    # Result is noise by default
    y_new = np.ones(shape=len(X_new), dtype=int)*-1 

    # Iterate all input samples for a label
    for j, x_new in enumerate(X_new):
        # Find a core sample closer than EPS
        for i, x_core in enumerate(cluster_center): 
            if metric(x_new, x_core) < min_dist:
                # Assign label of x_core to x_new
                y_new[j] = i
                break

    return y_new

In [53]:
def run_all(csv):
    df_data = pd.read_csv(csv)
    df_data.fillna(0.0, inplace=True)

    X = np.array(df_data['X Pos'].tolist())
    Y = np.array(df_data['Y Pos'].tolist())
    time = np.array(df_data['Start Time (secs)'].tolist())*1000
    
    # detect blink, fixation and saccade
    Sblk, Eblk = detectors.blink_detection(X,Y,time,minlen=6)
    Sfix, Efix = detectors.fixation_detection(X,Y,time,maxdist=10,mindur=50)
    Ssac, Esac = detectors.saccade_detection(X,Y,time,minlen=5,maxvel=40,maxacc=340)
    
    # clustering
    X = np.array(Efix).T[3:].T
    clustering = DBSCAN(eps=20, min_samples=3).fit(X)
    cluster_center = get_center(clustering,X)
    pred = dbscan_predict(cluster_center, np.array(Efix).T[3:].T)
    transitions = pred[np.where(pred!=-1)]
    
    # transition matrix and GTE, SGE
    trans_matrix = transition_matrix(transitions)
    pA = [len(np.where(np.array(transitions)==i)[0])/len(transitions) for i in range(len(set(transitions)))]
    Ht = 0
    Hs = 0
    for i in range(len(pA)):
        Hs += -1 * np.nan_to_num(pA[i]*np.log2(pA[i]))
        t = np.nan_to_num(trans_matrix[i]*np.log2(trans_matrix[i]))
        Ht += -sum(pA[i]*(t))
    
    total_time = time[-1] - time[0]
        
    return Eblk, Efix, Esac, trans_matrix, Hs, Ht, total_time
    

# import data

In [6]:
csv_files = glob.glob("data/*.csv")

In [7]:
csv_files_one = [v for v in csv_files if "One Gaze-Left" in v]
csv_files_two = [v for v in csv_files if "Two Gaze-Left" in v]
csv_files_three = [v for v in csv_files if "Three Go-Around Gaze-Left" in v]

In [11]:
df_par = pd.read_csv("participant.csv")
group = [df_par[df_par['Group'].str.contains("1")]['ID'].tolist(), df_par[df_par['Group'].str.contains("2")]["ID"].tolist()]
group = [[i[-3:] for i in v] for v in group]
group

[['032', '027', '031', '028', '004', '008', '010', '029', '003', '007', '023'],
 ['021',
  '006',
  '019',
  '022',
  '015',
  '016',
  '014',
  '005',
  '025',
  '002',
  '001',
  '020',
  '011',
  '017']]

In [54]:
feature_groups = []
for g in tqdm(group):
    trials = []
    for csv_files in [csv_files_one, csv_files_two, csv_files_three]:
        ret = defaultdict(list)
        for csv in csv_files:
            par_id = csv[14:17]
            if par_id not in g:
                continue
            Eblk, Efix, Esac, trans_matrix, Hs, Ht, total_time = run_all(csv)
            ret["Eblk"].append(Eblk)
            ret["Efix"].append(Efix)
            ret["Esac"].append(Esac)
            ret["trans_matrix"].append(trans_matrix)
            ret["Hs"].append(Hs)
            ret["Ht"].append(Ht)
            ret["total_time"].append(total_time)
        trials.append(ret)
    feature_groups.append(trials)





100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:06<00:00,  3.01s/it]


# hypothesis test

In [19]:
# standardized effect size - cohen's d 
def effect_size(a, b):
    es = np.abs(np.mean(a) - np.mean(b))
    sd_pooled = np.sqrt((((len(a)-1)*(np.std(a)**2) + (len(b)-1)*(np.std(b)**2)) / (len(a) + len(b) - 2)))
    d = es/sd_pooled
    
    return d

In [25]:
def statistic(g):
    print("GROUP 1")
    print("mean trial 1:", np.mean(g[0][0]))
    print("mean trial 2:", np.mean(g[0][1]))
    print("mean trial 3:", np.mean(g[0][2]))

    print("\nstd trial 1:", np.std(g[0][0]))
    print(  "std trial 2:", np.std(g[0][1]))
    print(  "std trial 3:", np.std(g[0][2]))

    print("--------------------------")
    print("GROUP 2")
    print("mean trial 1:", np.mean(g[1][0]))
    print("mean trial 2:", np.mean(g[1][1]))
    print("mean trial 3:", np.mean(g[1][2]))

    print("\nstd trial 1:", np.std(g[1][0]))
    print(  "std trial 2:", np.std(g[1][1]))
    print(  "std trial 3:", np.std(g[1][2]))

## fixation count

In [75]:
g = []
for v in feature_groups:
    x = np.array([len(v) for v in v[0]['Efix']])/np.array(v[0]['total_time'])*1000
    y = np.array([len(v) for v in v[1]['Efix']])/np.array(v[1]['total_time'])*1000
    z = np.array([len(v) for v in v[2]['Efix']])/np.array(v[2]['total_time'])*1000

    g.append([x,y,z])

In [76]:
statistic(g)

GROUP 1
mean trial 1: 2.5341624659854247
mean trial 2: 2.787380311075475
mean trial 3: 2.632076457334475

std trial 1: 0.648067211716558
std trial 2: 0.2579959704868572
std trial 3: 0.33681306005581
--------------------------
GROUP 2
mean trial 1: 2.819582132646579
mean trial 2: 2.8548615129938297
mean trial 3: 2.8306069973099026

std trial 1: 0.3144658869610738
std trial 2: 0.24493859619843417
std trial 3: 0.25892061357044466


In [77]:
scipy.stats.ttest_ind(g[0][2], g[1][2], equal_var = False)

Ttest_indResult(statistic=-1.4896529681039306, pvalue=0.1557453116041762)

In [78]:
effect_size(g[0][2],g[1][2])

0.6768930439711685

## fixation duration

In [79]:
g = []
for v in feature_groups:
    x = np.array([])
    for i, p in enumerate(v[0]['Efix']):
        x = np.append(x,np.array(p).T[2])

    y = []
    for i, p in enumerate(v[1]['Efix']):
        y = np.append(y,np.array(p).T[2])

    z = []
    for i, p in enumerate(v[2]['Efix']):
        z = np.append(z,np.array(p).T[2])
        
    g.append([x,y,z])
    

In [80]:
statistic(g)

GROUP 1
mean trial 1: 204.40369556977026
mean trial 2: 204.2687406858598
mean trial 3: 205.45100911161882

std trial 1: 199.9427968851456
std trial 2: 191.52683624893587
std trial 3: 182.2572849996305
--------------------------
GROUP 2
mean trial 1: 203.9062511033328
mean trial 2: 216.55536761778345
mean trial 3: 198.14541759112814

std trial 1: 190.51646988803185
std trial 2: 202.51649652261278
std trial 3: 175.01682092024961


In [85]:
scipy.stats.ttest_ind(g[0][2], g[1][2], equal_var = False)

Ttest_indResult(statistic=0.9265604330095188, pvalue=0.35427651172364394)

In [86]:
effect_size(g[0][2],g[1][2])

0.04103747092721051

## SGE

In [87]:
g = []
for v in feature_groups:
    x = np.array(v[0]['Hs'])
    y = np.array(v[1]['Hs'])
    z = np.array(v[2]['Hs'])

    g.append([x,y,z])

In [88]:
statistic(g)

GROUP 1
mean trial 1: 1.5103813694222759
mean trial 2: 1.479482047175015
mean trial 3: 2.0081675255562677

std trial 1: 0.25151702414687976
std trial 2: 0.2668253793957232
std trial 3: 0.34328910777172555
--------------------------
GROUP 2
mean trial 1: 1.6450024252579145
mean trial 2: 1.535543248954841
mean trial 3: 1.8706108051834711

std trial 1: 0.34501697574742524
std trial 2: 0.3241505774938866
std trial 3: 0.6010248360862738


In [93]:
scipy.stats.ttest_ind(g[0][1], g[1][1], equal_var = False)

Ttest_indResult(statistic=-0.4546840456434708, pvalue=0.6536183806385925)

In [94]:
effect_size(g[0][1],g[1][1])

0.1865144462445202

## GTE

In [95]:
g = []
for v in feature_groups:
    x = np.array(v[0]['Ht'])
    y = np.array(v[1]['Ht'])
    z = np.array(v[2]['Ht'])

    g.append([x,y,z])

In [96]:
statistic(g)

GROUP 1
mean trial 1: 1.0807025587639763
mean trial 2: 1.0610754681499375
mean trial 3: 1.3984240110388713

std trial 1: 0.16860950410030703
std trial 2: 0.23836251794453747
std trial 3: 0.20814554351550482
--------------------------
GROUP 2
mean trial 1: 1.1909855713734065
mean trial 2: 1.132499987256989
mean trial 3: 1.34530484547176

std trial 1: 0.2614571330355321
std trial 2: 0.2705067583257731
std trial 3: 0.4641994181425891


In [106]:
scipy.stats.ttest_ind(g[0][2], g[1][2], equal_var = False)

Ttest_indResult(statistic=0.3632058680936936, pvalue=0.720399526349748)

In [107]:
effect_size(g[0][2],g[1][2])

0.13947207002765363