# Import

In [14]:
import pandas as pd
import numpy as np
import pygaze
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import scipy
import glob
from tqdm import tqdm
from sklearn.cluster import DBSCAN
import detectors
import gazeplotter
from collections import defaultdict

# function

In [2]:
def get_center(clustering, data):
    center = []
    for i in range(len(set(clustering.labels_)) - 1):
        xi = data[np.where(clustering.labels_ == i)]
        cx = sum(xi.T[0])/len(xi)
        cy = sum(xi.T[1])/len(xi)
        center.append((cx,cy))
    return center

# cluster_center = get_center(clustering,X)

In [3]:
def transition_matrix(transitions):
    n = 1+ max(transitions) #number of states

    M = [[0]*n for _ in range(n)]

    for (i,j) in zip(transitions,transitions[1:]):
        M[i][j] += 1

    #now convert to probabilities:
    for row in M:
        s = sum(row)
        if s > 0:
            row[:] = [f/s for f in row]
    return M


In [4]:
def distance(x,y):
    return ((x[0]-y[0])**2 + (x[1]-y[1])**2)**0.5

def dbscan_predict(cluster_center, X_new, min_dist = 50, metric=distance):
    # Result is noise by default
    y_new = np.ones(shape=len(X_new), dtype=int)*-1 

    # Iterate all input samples for a label
    for j, x_new in enumerate(X_new):
        # Find a core sample closer than EPS
        for i, x_core in enumerate(cluster_center): 
            if metric(x_new, x_core) < min_dist:
                # Assign label of x_core to x_new
                y_new[j] = i
                break

    return y_new

In [9]:
def run_all(csv):
    df_data = pd.read_csv(csv)
    df_data.fillna(0.0, inplace=True)

    X = np.array(df_data['X Pos'].tolist())
    Y = np.array(df_data['Y Pos'].tolist())
    time = np.array(df_data['Start Time (secs)'].tolist())*1000
    
    # detect blink, fixation and saccade
    Sblk, Eblk = detectors.blink_detection(X,Y,time,minlen=6)
    Sfix, Efix = detectors.fixation_detection(X,Y,time,maxdist=10,mindur=50)
    Ssac, Esac = detectors.saccade_detection(X,Y,time,minlen=5,maxvel=40,maxacc=340)
    
    # clustering
    X = np.array(Efix).T[3:].T
    clustering = DBSCAN(eps=20, min_samples=3).fit(X)
    cluster_center = get_center(clustering,X)
    pred = dbscan_predict(cluster_center, np.array(Efix).T[3:].T)
    transitions = pred[np.where(pred!=-1)]
    
    # transition matrix and GTE, SGE
    trans_matrix = transition_matrix(transitions)
    pA = [len(np.where(np.array(transitions)==i)[0])/len(transitions) for i in range(len(set(transitions)))]
    Ht = 0
    Hs = 0
    for i in range(len(pA)):
        Hs += pA[i]*np.log2(pA[i])
        t = np.nan_to_num(trans_matrix[i]*np.log2(trans_matrix[i]))
        Ht += -sum(pA[i]*(t))
    
    total_time = time[-1] - time[0]
        
    return Eblk, Efix, Esac, trans_matrix, Hs, Ht, total_time
    

# import data

In [6]:
csv_files = glob.glob("data/*.csv")

In [7]:
csv_files_one = [v for v in csv_files if "One Gaze-Left" in v]
csv_files_two = [v for v in csv_files if "Two Gaze-Left" in v]
csv_files_three = [v for v in csv_files if "Three Go-Around Gaze-Left" in v]

In [11]:
df_par = pd.read_csv("participant.csv")
group = [df_par[df_par['Group'].str.contains("1")]['ID'].tolist(), df_par[df_par['Group'].str.contains("2")]["ID"].tolist()]
group = [[i[-3:] for i in v] for v in group]
group

[['032', '027', '031', '028', '004', '008', '010', '029', '003', '007', '023'],
 ['021',
  '006',
  '019',
  '022',
  '015',
  '016',
  '014',
  '005',
  '025',
  '002',
  '001',
  '020',
  '011',
  '017']]

In [18]:
feature_groups = []
for g in tqdm(group):
    trials = []
    for csv_files in [csv_files_one, csv_files_two, csv_files_three]:
        ret = defaultdict(list)
        for csv in csv_files:
            par_id = csv[14:17]
            if par_id not in g:
                continue
            Eblk, Efix, Esac, trans_matrix, Hs, Ht, total_time = run_all(csv)
            ret["Eblk"].append(Eblk)
            ret["Efix"].append(Efix)
            ret["Esac"].append(Esac)
            ret["trans_matrix"].append(trans_matrix)
            ret["Hs"].append(Hs)
            ret["Ht"].append(Ht)
            ret["total_time"].append(total_time)
        trials.append(ret)
    feature_groups.append(trials)





100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:08<00:00,  4.15s/it]


# hypothesis test

In [19]:
# standardized effect size - cohen's d 
def effect_size(a, b):
    es = np.abs(np.mean(a) - np.mean(b))
    sd_pooled = np.sqrt((((len(a)-1)*(np.std(a)**2) + (len(b)-1)*(np.std(b)**2)) / (len(a) + len(b) - 2)))
    d = es/sd_pooled
    
    return d

In [20]:
def statistic(g):
    print("GROUP 1")
    print("mean trial 1:", np.mean(g[0][0]))
    print("mean trial 2:", np.mean(g[0][1]))
    print("mean trial 3:", np.mean(g[0][2]))

    print("\nstd trial 1:", np.std(g[0][0]))
    print(  "std trial 2:", np.std(g[0][1]))
    print(  "std trial 3:", np.std(g[0][2]))

    print("--------------------------")
    print("GROUP 2")
    print("mean trial 1:", np.mean(g[1][0]))
    print("mean trial 2:", np.mean(g[1][1]))
    print("mean trial 3:", np.mean(g[1][2]))

    print("\nstd trial 1:", np.std(g[1][0]))
    print(  "std trial 2:", np.std(g[1][1]))
    print(  "std trial 3:", np.std(g[1][2]))

## fixation duration

In [None]:
x = []
for g in groups:
    dur_fix1 = np.array([])
    mean_fix1 = []
    for i, p in enumerate(g[0]['Efix']):
        dur_fix1 = np.append(dur_fix1,np.array(p).T[2])
        mean_fix1.append(np.mean(np.array(p).T[2])/g[0]['total_time'][i])

    dur_fix2 = []
    mean_fix2 = []
    for i, p in enumerate(g[1]['Efix']):
        dur_fix2 = np.append(dur_fix2,np.array(p).T[2])
        mean_fix2.append(np.mean(np.array(p).T[2])/g[1]['total_time'][i])

    dur_fix3 = []
    mean_fix3 = []
    for i, p in enumerate(g[2]['Efix']):
        dur_fix3 = np.append(dur_fix3,np.array(p).T[2])
        mean_fix3.append(np.mean(np.array(p).T[2])/g[2]['total_time'][i])
        
    dur_fix.append([dur_fix1,dur_fix2,dur_fix3])
    