In [1]:
import pandas as pd
from matplotlib.pyplot import figure
from scipy.stats import entropy
import utils
import numpy as np

In [2]:
#----------------------------------------------------------------------------------------
#
#  1. Transition Matrix and Markov Model
#  (Explanation in Slides 42-43)
#
#----------------------------------------------------------------------------------------

In [3]:
#----------------------------------------------------------------------------------------
#Assumption: the AOIs are not overlapping
#----------------------------------------------------------------------------------------

In [4]:
#AOIs to consider --- **TO UPDATE IF YOU UPDATE THE AOIs**

#Coarse-grained level
#AoisToConsider = ['Text_Area','Figure', 'Formula']

#Fine-grained level
AoisToConsider = ['Paragraph1', 'Paragraph2', 'Paragraph3', 'Paragraph4',
       'Paragraph5', 'Paragraph6', 'SubFigure1',
       'SubFigure2', 'SubFigure3', 'Formula']

In [30]:
# Read dwell data using pandas library
data = pd.read_csv("data/dwells.csv")

# set display.max_columns to none, to show all the columns when using head()
pd.set_option('display.max_columns', None)

In [31]:
# Preview dwells data
data.head()

Unnamed: 0,Respondent,VisitedAOI,Dwell Start,Dwell End,Number of Fixations in Dwell,Number of Saccades in Dwell,Dwell Time
0,Anonymous 14-11-22 09h35m,Paragraph4,133.1086,433.0785,1.0,1.0,299.9699
1,Anonymous 14-11-22 09h35m,Text_Area,133.1086,783.036,2.0,4.0,649.9274
2,Anonymous 14-11-22 09h35m,Paragraph1,566.4076,716.4244,1.0,0.0,150.0168
3,Anonymous 14-11-22 09h35m,Paragraph1,899.6993,6957.3621,23.0,22.0,6057.6628
4,Anonymous 14-11-22 09h35m,Text_Area,899.6993,6957.3621,23.0,22.0,6057.6628


In [32]:
#----------------------------------------------------------------------------------------
#
# 1.1 Identify transitions (including self-transitions e.g., transitions from AOI1 to AOI1)
#
#----------------------------------------------------------------------------------------

In [33]:
transitions = pd.DataFrame()
transitions[['Respondent','TransitionSource','TransitionTarget']] = data.apply(lambda x: utils.identifyTransition(data,x.name), axis=1)

In [34]:
# Preview transitions dataframe
transitions.head()

Unnamed: 0,Respondent,TransitionSource,TransitionTarget
0,Anonymous 14-11-22 09h35m,Paragraph4,Text_Area
1,Anonymous 14-11-22 09h35m,Text_Area,Paragraph1
2,Anonymous 14-11-22 09h35m,Paragraph1,Paragraph1
3,Anonymous 14-11-22 09h35m,Paragraph1,Text_Area
4,Anonymous 14-11-22 09h35m,Text_Area,Formula


In [35]:
# Generate transition matrix
transitionMatrix = utils.generateTransitionMatrix(transitions,'TransitionSource','TransitionTarget')

In [36]:
# Display transitionMatrix
display(transitionMatrix)

TransitionTarget,Paragraph4,Text_Area,Paragraph1,Formula,Paragraph3,Paragraph2,Figure,SubFigure2,SubFigure1,SubFigure3,Paragraph5,Paragraph6
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Paragraph4,7,7,0,4,4,5,2,2,3,0,1,1
Text_Area,6,2,11,3,11,10,5,2,8,0,15,8
Paragraph1,0,10,6,0,4,7,1,0,0,0,2,0
Formula,1,3,1,4,0,4,6,1,9,2,1,1
Paragraph3,3,0,4,1,7,2,0,3,3,0,7,1
Paragraph2,6,14,5,1,1,3,2,1,0,0,0,0
Figure,2,15,0,10,0,0,3,26,17,7,0,7
SubFigure2,0,6,1,1,0,2,23,16,24,23,3,2
SubFigure1,1,7,0,6,0,0,31,20,12,3,1,0
SubFigure3,1,2,0,2,0,0,7,21,1,5,0,2


In [37]:
# Export transitionMatrix as CSV
transitionMatrix.to_csv("data/transitionMatrix.csv")

In [38]:
#----------------------------------------------------------------------------------------
# Markov Model
#----------------------------------------------------------------------------------------

In [39]:
# Generate a transition matrix with values normalized over each row (by dividing all values by the sum of values in row)
transitionMatrixNormalized = utils.generateTransitionMatrix(transitions,'TransitionSource','TransitionTarget',normalize='index')

In [40]:
# Display transitionMatrixNormalized
display(transitionMatrixNormalized)

TransitionTarget,Paragraph4,Text_Area,Paragraph1,Formula,Paragraph3,Paragraph2,Figure,SubFigure2,SubFigure1,SubFigure3,Paragraph5,Paragraph6
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Paragraph4,0.194444,0.194444,0.0,0.111111,0.111111,0.138889,0.055556,0.055556,0.083333,0.0,0.027778,0.027778
Text_Area,0.074074,0.024691,0.135802,0.037037,0.135802,0.123457,0.061728,0.024691,0.098765,0.0,0.185185,0.098765
Paragraph1,0.0,0.333333,0.2,0.0,0.133333,0.233333,0.033333,0.0,0.0,0.0,0.066667,0.0
Formula,0.030303,0.090909,0.030303,0.121212,0.0,0.121212,0.181818,0.030303,0.272727,0.060606,0.030303,0.030303
Paragraph3,0.096774,0.0,0.129032,0.032258,0.225806,0.064516,0.0,0.096774,0.096774,0.0,0.225806,0.032258
Paragraph2,0.181818,0.424242,0.151515,0.030303,0.030303,0.090909,0.060606,0.030303,0.0,0.0,0.0,0.0
Figure,0.022989,0.172414,0.0,0.114943,0.0,0.0,0.034483,0.298851,0.195402,0.08046,0.0,0.08046
SubFigure2,0.0,0.059406,0.009901,0.009901,0.0,0.019802,0.227723,0.158416,0.237624,0.227723,0.029703,0.019802
SubFigure1,0.012346,0.08642,0.0,0.074074,0.0,0.0,0.382716,0.246914,0.148148,0.037037,0.012346,0.0
SubFigure3,0.02439,0.04878,0.0,0.04878,0.0,0.0,0.170732,0.512195,0.02439,0.121951,0.0,0.04878


In [41]:
# Export transitionMatrix as CSV
transitionMatrixNormalized.to_csv("data/transitionMatrixNormalized-MarkovModel.csv")

In [42]:
#----------------------------------------------------------------------------------------
#
# 1.2 Identify transitions (with no self-transitions)
#
#----------------------------------------------------------------------------------------

In [43]:
# drop consecutive duplicate VisitedAOIs
dataNoConsDupVisits = data[['VisitedAOI']]     
dataNoConsDupVisits = data[(dataNoConsDupVisits.ne(dataNoConsDupVisits.shift())).any(axis=1)]

In [44]:
#identify transitions
noSelfTransitions = pd.DataFrame()
noSelfTransitions[['Respondent','TransitionSource','TransitionTarget']] = dataNoConsDupVisits.apply(lambda x: utils.identifyTransition(dataNoConsDupVisits,x.name), axis=1)

In [45]:
# Preview noSelfTransitions dataframe
noSelfTransitions.head()

Unnamed: 0,Respondent,TransitionSource,TransitionTarget
0,Anonymous 14-11-22 09h35m,Paragraph4,Text_Area
1,Anonymous 14-11-22 09h35m,Text_Area,Paragraph1
2,Anonymous 14-11-22 09h35m,Paragraph1,Text_Area
4,Anonymous 14-11-22 09h35m,Formula,Paragraph1
5,Anonymous 14-11-22 09h35m,Paragraph1,Text_Area


In [46]:
# Generate transition matrix
noSelfTransitionsMatrix = utils.generateTransitionMatrix(noSelfTransitions,'TransitionSource','TransitionTarget')

In [47]:
# Display noSelfTransitionsMatrix
display(noSelfTransitionsMatrix)

TransitionTarget,Paragraph4,Text_Area,Paragraph1,Formula,Paragraph3,Paragraph2,Figure,SubFigure2,SubFigure1,SubFigure3,Paragraph5,Paragraph6
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Paragraph4,0,5,0,4,3,5,2,1,3,0,1,0
Text_Area,4,0,10,2,9,10,5,2,7,0,14,7
Paragraph1,0,9,0,0,4,7,0,0,0,0,2,0
Formula,1,1,1,0,0,4,6,1,8,2,1,0
Paragraph3,2,0,4,1,0,2,0,1,3,0,6,1
Paragraph2,5,11,4,1,1,0,2,0,0,0,0,0
Figure,1,13,0,8,0,0,0,20,13,6,0,7
SubFigure2,0,5,1,1,0,2,20,0,21,20,3,2
SubFigure1,1,7,0,4,0,0,25,18,0,3,1,0
SubFigure3,1,1,0,2,0,0,5,20,1,0,0,1


In [48]:
# Export noSelfTransitions as CSV
noSelfTransitionsMatrix.to_csv("data/noSelfTransitions.csv")

In [49]:
#----------------------------------------------------------------------------------------
# Markov Model
#----------------------------------------------------------------------------------------

In [50]:
# Generate a transition matrix with values normalized over each row (by dividing all values by the sum of values in row)
noSelfTransitionsMatrixNormalized = utils.generateTransitionMatrix(noSelfTransitions,'TransitionSource','TransitionTarget',normalize='index')

In [51]:
# Display noSelfTransitionsMatrixNormalized
display(noSelfTransitionsMatrixNormalized)

TransitionTarget,Paragraph4,Text_Area,Paragraph1,Formula,Paragraph3,Paragraph2,Figure,SubFigure2,SubFigure1,SubFigure3,Paragraph5,Paragraph6
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Paragraph4,0.0,0.208333,0.0,0.166667,0.125,0.208333,0.083333,0.041667,0.125,0.0,0.041667,0.0
Text_Area,0.057143,0.0,0.142857,0.028571,0.128571,0.142857,0.071429,0.028571,0.1,0.0,0.2,0.1
Paragraph1,0.0,0.409091,0.0,0.0,0.181818,0.318182,0.0,0.0,0.0,0.0,0.090909,0.0
Formula,0.04,0.04,0.04,0.0,0.0,0.16,0.24,0.04,0.32,0.08,0.04,0.0
Paragraph3,0.1,0.0,0.2,0.05,0.0,0.1,0.0,0.05,0.15,0.0,0.3,0.05
Paragraph2,0.208333,0.458333,0.166667,0.041667,0.041667,0.0,0.083333,0.0,0.0,0.0,0.0,0.0
Figure,0.014706,0.191176,0.0,0.117647,0.0,0.0,0.0,0.294118,0.191176,0.088235,0.0,0.102941
SubFigure2,0.0,0.066667,0.013333,0.013333,0.0,0.026667,0.266667,0.0,0.28,0.266667,0.04,0.026667
SubFigure1,0.016949,0.118644,0.0,0.067797,0.0,0.0,0.423729,0.305085,0.0,0.050847,0.016949,0.0
SubFigure3,0.032258,0.032258,0.0,0.064516,0.0,0.0,0.16129,0.645161,0.032258,0.0,0.0,0.032258


In [52]:
# Export noSelfTransitionsMatrixNormalized as CSV
noSelfTransitionsMatrixNormalized.to_csv("data/noSelfTransitionsNormalized--MarkovModel.csv")