In [1]:
import pandas as pd
import numpy as np
import pygaze
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import scipy
import glob
from tqdm import tqdm
from sklearn.cluster import DBSCAN
import detectors
import gazeplotter
from collections import defaultdict
# import local lib
import eye_metrics_utils
import data_utils
import gaze_entropy

In [2]:
import warnings
# warnings.filterwarnings(action='once')
warnings.filterwarnings('ignore')

In [3]:
def run_all(df_data):
    df_x = df_data.copy()
    if (data_utils.check_percentage_null(df_x) < 0.5): # if missing value > 50%, remove
        return None
    
    time = np.array(df_data['Start Time (secs)'].tolist())

    Efix = eye_metrics_utils.detect_fixations(df_x)
#     print(Efix)
    X = np.array(Efix).T[3:].T
    Hs, Ht = gaze_entropy.entropy(X)
    total_time = time[-1] - time[0]
    
    return Efix, Hs, Ht, total_time
    

In [5]:
csv_files = glob.glob("data/*.csv")

In [6]:
csv_files_one = [v for v in csv_files if "One Gaze-Vergence" in v]
csv_files_two = [v for v in csv_files if "Two Gaze-Vergence" in v]
csv_files_three = [v for v in csv_files if "Three Go-Around Gaze-Vergence" in v]

In [7]:
df_par = pd.read_csv("participant.csv")
group = [df_par[df_par['Group'].str.contains("1")]['ID'].tolist(), df_par[df_par['Group'].str.contains("2")]["ID"].tolist()]
group = [[i[-3:] for i in v] for v in group]
group

[['032', '027', '031', '028', '004', '008', '010', '029', '003', '007', '023'],
 ['021',
  '006',
  '019',
  '022',
  '015',
  '016',
  '014',
  '005',
  '025',
  '002',
  '001',
  '020',
  '011',
  '017']]

In [9]:
feature_groups = []
for g in group:
    trials = []
    for csv_files in [csv_files_one, csv_files_two, csv_files_three]:
        ret = defaultdict(list)
        for csv in csv_files:
            par_id = csv[14:17]
            if par_id not in g:
                continue
                
            print(csv)
            df_data = pd.read_csv(csv)
            print(len(df_data))
            for v in data_utils.data_slicing(df_data):
                r = run_all(v)
                if r != None:
                    Efix, Hs, Ht, total_time = r
#                     ret["Eblk"].append(Eblk)
                    ret["Efix"].append(Efix)
#                     ret["Esac"].append(Esac)
#                     ret["trans_matrix"].append(trans_matrix)
                    ret["Hs"].append(Hs)
                    ret["Ht"].append(Ht)
                    ret["total_time"].append(total_time)
        trials.append(ret)
    feature_groups.append(trials)

data\PISSS_ID_003_Approach One Gaze-Vergence.csv
9122
data\PISSS_ID_004_Approach One Gaze-Vergence.csv
9307
data\PISSS_ID_007_Approach One Gaze-Vergence.csv
9492
data\PISSS_ID_008_Approach One Gaze-Vergence.csv
9736
data\PISSS_ID_010_Approach One Gaze-Vergence.csv
9554
data\PISSS_ID_023_Approach One Gaze-Vergence.csv
9369
data\PISSS_ID_027_Approach One Gaze-Vergence.csv
9060
data\PISSS_ID_028_Approach One Gaze-Vergence.csv
8999
data\PISSS_ID_029_Approach One Gaze-Vergence.csv
9862
data\PISSS_ID_031_Approach One Gaze-Vergence.csv
9677
data\PISSS_ID_032_Approach One Gaze-Vergence.csv
8629
data\PISSS_ID_003_Approach Two Gaze-Vergence.csv
9368
data\PISSS_ID_004_Approach Two Gaze-Vergence.csv
9862
data\PISSS_ID_007_Approach Two Gaze-Vergence.csv
9677
data\PISSS_ID_008_Approach Two Gaze-Vergence.csv
9923
data\PISSS_ID_010_Approach Two Gaze-Vergence.csv
9923
data\PISSS_ID_023_Approach Two Gaze-Vergence.csv
9677
data\PISSS_ID_027_Approach Two Gaze-Vergence.csv
8568
data\PISSS_ID_028_Approach T

In [18]:
df_x = pd.DataFrame()
for j, g in enumerate(feature_groups):
    fix_dur = []
    for i, p in enumerate(g[1]['Efix']):
        fix_dur = np.append(fix_dur,np.array(p).T[2])
    Hs = g[1]['Hs']
    Ht = g[1]['Ht']
    group = j*np.ones_like(Hs)
    df = pd.DataFrame(zip(fix_dur, Hs, Ht, group), columns=["fix_dur", "Hs", "Ht", "group"]).astype({"group":"int"})
    df_x = pd.concat([df_x, df])
    
df_x.head(10)


Unnamed: 0,fix_dur,Hs,Ht,group
0,0.487013,1.890805,1.541628,0
1,0.185009,1.515655,1.316056,0
2,0.163963,0.954434,0.911104,0
3,0.51801,0.995727,0.942766,0
4,0.116004,0.949452,0.896885,0
5,0.372018,0.899744,0.870875,0
6,0.324005,0.998001,0.956601,0
7,0.080001,0.0,0.0,0
8,0.226009,1.571542,1.022576,0
9,0.132028,0.881291,0.697009,0


In [19]:
df_x.dtypes

fix_dur    float64
Hs         float64
Ht         float64
group        int32
dtype: object

In [20]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [21]:
X = df_x[['fix_dur', 'Hs', 'Ht']].values
y = df_x[['group']].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = LogisticRegression(random_state=0).fit(X_train, y_train)
# clf.predict(X_test)
clf.score(X_test, y_test)

0.6396396396396397

In [22]:
print(clf.classes_)
print(clf.coef_)

[0 1]
[[-1.03691331 -0.10537734  0.38811859]]


In [23]:
y_pred = clf.predict(X_test)
target_names = ['group 1', 'group 2']
print(classification_report(y_test, y_pred, target_names=target_names))

              precision    recall  f1-score   support

     group 1       0.76      0.31      0.44       102
     group 2       0.61      0.92      0.73       120

    accuracy                           0.64       222
   macro avg       0.69      0.62      0.59       222
weighted avg       0.68      0.64      0.60       222

