In [1]:
import pandas as pd
from matplotlib.pyplot import figure
from scipy.stats import entropy
import utils
import numpy as np

In [2]:
#----------------------------------------------------------------------------------------
#
#  1. Transition Matrix and Markov Model
#  (Explanation in Slides 42-43)
#
#----------------------------------------------------------------------------------------

In [3]:
#----------------------------------------------------------------------------------------
#Assumption: the AOIs are not overlapping
#----------------------------------------------------------------------------------------

In [4]:
#AOIs to consider --- **TO UPDATE IF YOU UPDATE THE AOIs**

#Coarse-grained level
#AoisToConsider = ['Text_Area','Figure', 'Formula']

#Fine-grained level
AoisToConsider = ['Paragraph1', 'Paragraph2', 'Paragraph3', 'Paragraph4',
       'Paragraph5', 'Paragraph6', 'SubFigure1',
       'SubFigure2', 'SubFigure3', 'Formula']

In [5]:
# Read dwell data using pandas library
data = pd.read_csv("data/dwells.csv")

# set display.max_columns to none, to show all the columns when using head()
pd.set_option('display.max_columns', None)

In [6]:
# Preview dwells data
data.head()

Unnamed: 0,Respondent,VisitedAOI,Dwell Start,Dwell End,Number of Fixations in Dwell,Number of Saccades in Dwell,Dwell Time
0,P01,Paragraph4,133.1086,433.0785,1,1,299.9699
1,P01,Text_Area,133.1086,783.036,2,4,649.9274
2,P01,Paragraph1,566.4076,716.4244,1,0,150.0168
3,P01,Paragraph1,899.6993,6957.3621,23,22,6057.6628
4,P01,Text_Area,899.6993,6957.3621,23,22,6057.6628


In [7]:
#Keep only aois within AoisToConsider
data = data[data['VisitedAOI'].isin(AoisToConsider)].copy(deep=True)

In [8]:
#----------------------------------------------------------------------------------------
#
# 1.1 Identify transitions (including self-transitions e.g., transitions from AOI1 to AOI1)
#
#----------------------------------------------------------------------------------------

In [9]:
transitions = pd.DataFrame()
transitions[['Respondent','TransitionSource','TransitionTarget']] = data.apply(lambda x: utils.identifyTransition(data,x.name), axis=1)

In [10]:
# Preview transitions dataframe
transitions.head()

Unnamed: 0,Respondent,TransitionSource,TransitionTarget
0,P01,Paragraph4,Paragraph1
2,P01,Paragraph1,Formula
3,P01,Formula,Formula
5,P01,Paragraph1,Paragraph3
6,P01,Paragraph3,Paragraph1


In [11]:
# Generate transition matrix
transitionMatrix = utils.generateTransitionMatrix(transitions,'TransitionSource','TransitionTarget')

In [12]:
# Display transitionMatrix
display(transitionMatrix)

TransitionTarget,Paragraph4,Paragraph1,Formula,Paragraph3,Paragraph2,SubFigure1,SubFigure2,SubFigure3,Paragraph5,Paragraph6
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Paragraph4,7,1,3,5,4,2,4,0,0,0
Paragraph1,0,7,1,5,8,0,0,0,2,0
Formula,1,0,7,1,2,10,0,1,2,1
Paragraph3,3,4,1,3,2,1,3,0,5,1
Paragraph2,5,6,2,2,5,0,3,0,1,0
SubFigure1,3,0,6,3,0,14,21,0,1,8
SubFigure2,1,1,1,0,1,22,16,20,10,1
SubFigure3,0,0,4,0,0,2,18,3,0,5
Paragraph5,5,1,1,3,0,0,11,2,11,6
Paragraph6,1,0,0,0,0,8,1,1,7,24


In [13]:
# Export transitionMatrix as CSV
transitionMatrix.to_csv("data/transitionMatrix.csv")

In [14]:
#----------------------------------------------------------------------------------------
# Markov Model
#----------------------------------------------------------------------------------------

In [15]:
# Generate a transition matrix with values normalized over each row (by dividing all values by the sum of values in row)
transitionMatrixNormalized = utils.generateTransitionMatrix(transitions,'TransitionSource','TransitionTarget',normalize='index')

In [16]:
# Display transitionMatrixNormalized
display(transitionMatrixNormalized)

TransitionTarget,Paragraph4,Paragraph1,Formula,Paragraph3,Paragraph2,SubFigure1,SubFigure2,SubFigure3,Paragraph5,Paragraph6
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Paragraph4,0.269231,0.038462,0.115385,0.192308,0.153846,0.076923,0.153846,0.0,0.0,0.0
Paragraph1,0.0,0.304348,0.043478,0.217391,0.347826,0.0,0.0,0.0,0.086957,0.0
Formula,0.04,0.0,0.28,0.04,0.08,0.4,0.0,0.04,0.08,0.04
Paragraph3,0.130435,0.173913,0.043478,0.130435,0.086957,0.043478,0.130435,0.0,0.217391,0.043478
Paragraph2,0.208333,0.25,0.083333,0.083333,0.208333,0.0,0.125,0.0,0.041667,0.0
SubFigure1,0.053571,0.0,0.107143,0.053571,0.0,0.25,0.375,0.0,0.017857,0.142857
SubFigure2,0.013699,0.013699,0.013699,0.0,0.013699,0.30137,0.219178,0.273973,0.136986,0.013699
SubFigure3,0.0,0.0,0.125,0.0,0.0,0.0625,0.5625,0.09375,0.0,0.15625
Paragraph5,0.125,0.025,0.025,0.075,0.0,0.0,0.275,0.05,0.275,0.15
Paragraph6,0.02381,0.0,0.0,0.0,0.0,0.190476,0.02381,0.02381,0.166667,0.571429


In [17]:
# Export transitionMatrix as CSV
transitionMatrixNormalized.to_csv("data/transitionMatrixNormalized-MarkovModel.csv")

In [18]:
#----------------------------------------------------------------------------------------
#
# 1.2 Identify transitions (with no self-transitions)
#
#----------------------------------------------------------------------------------------

In [19]:
# drop consecutive duplicate VisitedAOIs
dataNoConsDupVisits = data[['VisitedAOI']]     
dataNoConsDupVisits = data[(dataNoConsDupVisits.ne(dataNoConsDupVisits.shift())).any(axis=1)]

In [20]:
#identify transitions
noSelfTransitions = pd.DataFrame()
noSelfTransitions[['Respondent','TransitionSource','TransitionTarget']] = dataNoConsDupVisits.apply(lambda x: utils.identifyTransition(dataNoConsDupVisits,x.name), axis=1)

In [21]:
# Preview noSelfTransitions dataframe
noSelfTransitions.head()

Unnamed: 0,Respondent,TransitionSource,TransitionTarget
0,P01,Paragraph4,Paragraph1
2,P01,Formula,Paragraph1
5,P01,Paragraph1,Paragraph2
7,P01,Paragraph1,Paragraph3
9,P01,Paragraph1,Paragraph2


In [22]:
# Generate transition matrix
noSelfTransitionsMatrix = utils.generateTransitionMatrix(noSelfTransitions,'TransitionSource','TransitionTarget')

In [23]:
# Display noSelfTransitionsMatrix
display(noSelfTransitionsMatrix)

TransitionTarget,Paragraph4,Formula,Paragraph1,Paragraph2,SubFigure1,SubFigure2,Paragraph3,Paragraph5,SubFigure3,Paragraph6
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Paragraph4,0,2,1,5,2,0,2,0,0,1
Formula,1,0,1,3,7,1,1,1,1,1
Paragraph1,0,0,0,8,0,0,2,1,0,0
Paragraph2,4,1,8,0,0,1,1,0,0,0
SubFigure1,2,4,0,1,0,17,3,1,1,4
SubFigure2,2,0,1,0,15,0,2,3,21,0
Paragraph3,1,0,1,2,3,2,0,4,0,1
Paragraph5,1,0,0,0,0,4,1,0,2,4
SubFigure3,0,3,0,0,0,11,0,0,0,3
Paragraph6,0,0,1,0,5,3,0,7,1,0


In [24]:
# Export noSelfTransitions as CSV
noSelfTransitionsMatrix.to_csv("data/noSelfTransitions.csv")

In [25]:
#----------------------------------------------------------------------------------------
# Markov Model
#----------------------------------------------------------------------------------------

In [26]:
# Generate a transition matrix with values normalized over each row (by dividing all values by the sum of values in row)
noSelfTransitionsMatrixNormalized = utils.generateTransitionMatrix(noSelfTransitions,'TransitionSource','TransitionTarget',normalize='index')

In [27]:
# Display noSelfTransitionsMatrixNormalized
display(noSelfTransitionsMatrixNormalized)

TransitionTarget,Paragraph4,Formula,Paragraph1,Paragraph2,SubFigure1,SubFigure2,Paragraph3,Paragraph5,SubFigure3,Paragraph6
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Paragraph4,0.0,0.153846,0.076923,0.384615,0.153846,0.0,0.153846,0.0,0.0,0.076923
Formula,0.058824,0.0,0.058824,0.176471,0.411765,0.058824,0.058824,0.058824,0.058824,0.058824
Paragraph1,0.0,0.0,0.0,0.727273,0.0,0.0,0.181818,0.090909,0.0,0.0
Paragraph2,0.266667,0.066667,0.533333,0.0,0.0,0.066667,0.066667,0.0,0.0,0.0
SubFigure1,0.060606,0.121212,0.0,0.030303,0.0,0.515152,0.090909,0.030303,0.030303,0.121212
SubFigure2,0.045455,0.0,0.022727,0.0,0.340909,0.0,0.045455,0.068182,0.477273,0.0
Paragraph3,0.071429,0.0,0.071429,0.142857,0.214286,0.142857,0.0,0.285714,0.0,0.071429
Paragraph5,0.083333,0.0,0.0,0.0,0.0,0.333333,0.083333,0.0,0.166667,0.333333
SubFigure3,0.0,0.176471,0.0,0.0,0.0,0.647059,0.0,0.0,0.0,0.176471
Paragraph6,0.0,0.0,0.058824,0.0,0.294118,0.176471,0.0,0.411765,0.058824,0.0


In [28]:
# Export noSelfTransitionsMatrixNormalized as CSV
noSelfTransitionsMatrixNormalized.to_csv("data/noSelfTransitionsNormalized--MarkovModel.csv")