In [23]:
import numpy as np
import pandas as pd
import os
import time

In [2]:
project_path = os.path.abspath(os.getcwd())

### Clean dataset and create smaller subset

In [35]:
ctime = time.time()
df = pd.read_csv(project_path + "/data/final_A116.csv", index_col=0, low_memory=False)
dfs = df[["real_time", "object", "left.pupil_diameter_mm", "right.pupil_diameter_mm"]]
dfs = dfs.loc[(dfs["left.pupil_diameter_mm"]!=-1) & (dfs["right.pupil_diameter_mm"]!=-1)]
dfs = dfs.dropna()
dfs.columns = ['time', 'gaze_target', 'left.pupil_diameter_mm', 'right.pupil_diameter_mm']
dfs.to_csv(project_path + "/data/smaller_subset_Pyt.csv", index=False)
print(time.time() - ctime)

0.5666000843048096


### I. Aggregating raw gaze-target information into gaze transition datasets

In [41]:
ctime = time.time()

df = pd.read_csv(project_path + "/data/smaller_subset_Pyt.csv", low_memory=False)

ID = "001"

ID_lst_new = list()
source_lst = list()
target_lst = list()
time_lst = list()
trans_time_lst = list()

source = df['gaze_target'].iloc[0]

for i in range(1, len(df)):
    if source != df['gaze_target'].iloc[i]:
        ID_lst_new.append(ID)
        time_lst.append(df['time'].iloc[i - 1])
        trans_time_lst.append(df['time'].iloc[i] - df['time'].iloc[i - 1])
        source_lst.append(source)
        target_lst.append(df['gaze_target'].iloc[i])

        source = df['gaze_target'].iloc[i]

df_trans = pd.DataFrame({'participant': ID_lst_new, 'time_point': time_lst, 'trans_dur': trans_time_lst,
                         'Source': source_lst, 'Target': target_lst})
df_trans.to_csv(project_path + "/data/transition_Pyt.csv", index=False)

print(time.time() - ctime)

28.8058979511261


In [80]:
import networkx as nx

In [81]:
ctime = time.time()
df = pd.read_csv(project_path + "/data/transition_Pyt.csv")

#Group to weighted adjecency dataframe
df_mat = pd.DataFrame(df.groupby(['Source', 'Target'], as_index=False).size())
df_mat.columns = ['Source', 'Target', 'Weight']

# Normalize
#w_sum = np.sum(df_mat['Weight'].values)
#df_mat['Weight'] = df_mat['Weight'] / w_sum

# Create graph    
G = nx.from_pandas_edgelist(df_mat, source='Source', target='Target', edge_attr=['Weight'],
                            create_using=nx.MultiDiGraph())
print(time.time() - ctime)

0.011000394821166992


### Calculate graph features

In [58]:
ctime = time.time()
# Weighted degree centrality for specific node
node = "CartoonTeacher"
edges = G.out_edges(node, data=True)
sumw = G.out_degree(node, weight='Weight')

weight_lst = list()

for e in edges:
    source, target, weight = e
    weight_lst.append(weight['Weight'])

# Necessary to compare different graphs with different outgoing nodes.
weight_lst.sort()

DC = len(edges)
FCi = np.zeros(DC - 1)

for i in range(DC - 1):  # 0,1,2,3
    fj = 0
    for j in range(i + 1):
        fj += weight_lst[j] / sumw
    FCi[i] = fj

WDC = 1 + 2 * (np.sum(FCi))
print(time.time() - ctime)

0.0009913444519042969


In [75]:
ctime = time.time()
stud = ['S11_C', 'S12_C', 'S13_C', 'S14_C', 'S15_C', 'S16_C', 
          'S17_C', 'S22_C', 'S23_C', 'S24_C', 'S27_C', 'S28_C',
          'S32_C', 'S33_C', 'S34_C', 'S35_C', 'S36_C', 'S37_C',
          'S38_C', 'S42_C', 'S43_C', 'S44_C', 'S47_C', 'S48_C']

SG = nx.induced_subgraph(G,stud) 
UG = SG.to_undirected()
for node in SG:
    for ngbr in nx.neighbors(SG, node):
        if node in nx.neighbors(SG, ngbr):
            UG.edges[node, ngbr, 0]['Weight'] = (SG.edges[node, ngbr, 0]['Weight'] + SG.edges[ngbr, node, 0][
                'Weight']) / 2

# Find maximal cliques
cl = nx.find_cliques(UG)

# count number of cliques larger than two nodes with only peers
cl_count = 0

for c in cl:
    if len(c) > 1:
        cl_count +=1
print(time.time() - ctime)

0.0010001659393310547


In [76]:
cl_count

10

In [82]:
node = "CartoonTeacher"
edges = G.out_edges(node, data=True)
edges

OutMultiEdgeDataView([('CartoonTeacher', 'FloorFront', {'Weight': 3}), ('CartoonTeacher', 'FrontWall', {'Weight': 116}), ('CartoonTeacher', 'S16_C', {'Weight': 7}), ('CartoonTeacher', 'SM_Blackboard_Schiene_low_99', {'Weight': 46}), ('CartoonTeacher', 'SM_Blackboard_Tafel_unten_low_101', {'Weight': 114}), ('CartoonTeacher', 'Screen_95', {'Weight': 199}), ('CartoonTeacher', 'frontcabin', {'Weight': 2}), ('CartoonTeacher', 'leftwall_5', {'Weight': 1}), ('CartoonTeacher', 'table12', {'Weight': 1}), ('CartoonTeacher', 'window1', {'Weight': 1})])

In [83]:
len(edges)

10

In [68]:
weight_lst = list()

for e in edges:
    source, target, weight = e
    weight_lst.append(weight['Weight'])

In [69]:
weight_lst

[0.0008733624454148472,
 0.033770014556040756,
 0.0020378457059679767,
 0.01339155749636099,
 0.03318777292576419,
 0.057933042212518195,
 0.0005822416302765648,
 0.0002911208151382824,
 0.0002911208151382824,
 0.0002911208151382824]

In [70]:
weight_lst.sort()

In [71]:
weight_lst

[0.0002911208151382824,
 0.0002911208151382824,
 0.0002911208151382824,
 0.0005822416302765648,
 0.0008733624454148472,
 0.0020378457059679767,
 0.01339155749636099,
 0.03318777292576419,
 0.033770014556040756,
 0.057933042212518195]

In [72]:
G.out_degree(node, weight='Weight')

0.14264919941775842

In [84]:
df_mat

Unnamed: 0,Source,Target,Weight
0,CartoonTeacher,FloorFront,3
1,CartoonTeacher,FrontWall,116
2,CartoonTeacher,S16_C,7
3,CartoonTeacher,SM_Blackboard_Schiene_low_99,46
4,CartoonTeacher,SM_Blackboard_Tafel_unten_low_101,114
...,...,...,...
505,window1,leftwall_5,2
506,window2,S34_C,1
507,window2,S42_C,2
508,window2,leftwall,3


In [85]:
WDC

3.2897959183673464