In [63]:
import pandas as pd
from matplotlib.pyplot import figure
from scipy.stats import entropy
import utils
import numpy as np

In [64]:
#---------------------------------------------------
#
# Config *** TO UPDATE ***
#
#---------------------------------------------------

# *** TO UPDATE: change the team number with your own team number
AOIS_DEFINITION_FILE = "raw-data/team2/aoisDefinition.csv"

# This file should be automatically generated from "5. AOI Visits"
DWELLS_FILE = "data/dwells.csv"

In [65]:
#----------------------------------------------------------------------------------------
#
#  1. Transition Matrix and Markov Model
#  (Explanation in Slides 42-43)
#
#----------------------------------------------------------------------------------------

In [66]:
#----------------------------------------------------------------------------------------
#Assumption: the AOIs are not overlapping
#----------------------------------------------------------------------------------------

In [67]:
# Read aois definition
aois_df = pd.read_csv(AOIS_DEFINITION_FILE)

In [68]:
# Display aois definition
display(aois_df)

Unnamed: 0,AOI,p1x,p1y,p2x,p2y
0,Chat,1548.984881,676.457883,1683.110151,737.105832
1,CTA_1,287.041037,443.196544,506.306695,508.509719
2,Date_Place_1,192.570194,846.738661,488.812095,957.537797
3,Date_Place_2,757.062635,87.473002,1045.140389,169.114471
4,Description_1,191.403888,968.034557,584.449244,1042.678186
5,Description_2,757.062635,177.278618,1167.602592,319.568035
6,Fav,1365.87473,675.291577,1524.492441,733.606911
7,Food,155.24838,88.639309,636.933045,435.032397
8,Gen_Info,193.736501,523.671706,495.809935,600.647948
9,Ingredients,757.062635,337.062635,989.157667,669.460043


In [69]:
# get list of AOIs
AoisToConsider = aois_df["AOI"].tolist()
print(f'AOIs: {AoisToConsider}')

AOIs: ['Chat', 'CTA_1', 'Date_Place_1', 'Date_Place_2', 'Description_1', 'Description_2', 'Fav', 'Food', 'Gen_Info', 'Ingredients', 'Instructions', 'Online', 'Participants_Overview', 'Participant_Bio', 'Participant_Profile_Picture', 'Place', 'Responsabilities', 'Status', 'Verified']


In [70]:
# Read dwell data using pandas library
data = pd.read_csv(DWELLS_FILE)

# set display.max_columns to none, to show all the columns when using head()
pd.set_option('display.max_columns', None)

In [71]:
# Preview dwells data
data.head()

Unnamed: 0,Respondent,VisitedAOI,Dwell Start,Dwell End,Number of Fixations in Dwell,Number of Saccades in Dwell,Dwell Time
0,P01,Responsabilities,117.4537,375.7792,1,0,258.3255
1,P01,CTA_1,742.4064,1100.6684,2,4,358.262
2,P01,Food,1200.7011,1567.3256,1,1,366.6245
3,P01,Food,1783.9405,2333.8938,2,4,549.9533
4,P01,Food,3658.7234,3867.0633,1,1,208.3399


In [72]:
#Keep only aois within AoisToConsider
data = data[data['VisitedAOI'].isin(AoisToConsider)].copy(deep=True)

In [73]:
#----------------------------------------------------------------------------------------
#
# 1.1 Identify transitions (including self-transitions e.g., transitions from AOI1 to AOI1)
#
#----------------------------------------------------------------------------------------

In [74]:
transitions = pd.DataFrame()
transitions[['Respondent','TransitionSource','TransitionTarget']] = data.apply(lambda x: utils.identifyTransition(data,x.name), axis=1)

In [75]:
# Preview transitions dataframe
transitions.head()

Unnamed: 0,Respondent,TransitionSource,TransitionTarget
0,P01,Responsabilities,CTA_1
1,P01,CTA_1,Food
2,P01,Food,Food
3,P01,Food,Food
4,P01,Food,Food


In [76]:
# Generate transition matrix
transitionMatrix = utils.generateTransitionMatrix(transitions,'TransitionSource','TransitionTarget')

In [77]:
# Display transitionMatrix
display(transitionMatrix)

TransitionTarget,Responsabilities,CTA_1,Food,Gen_Info,Participants_Overview,Date_Place_1,Description_1,Description_2,Ingredients,Instructions,Participant_Profile_Picture,Fav,Chat,Participant_Bio,Online,Verified,Place,Status,Date_Place_2
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Responsabilities,0,1,0,0,0,1,0,1,18,1,0,0,0,0,0,0,0,0,1
CTA_1,0,0,7,7,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
Food,0,6,7,3,5,1,0,0,2,0,0,0,0,0,0,0,0,0,3
Gen_Info,0,6,5,11,13,1,0,2,1,0,1,1,0,0,0,0,0,0,0
Participants_Overview,0,2,1,18,6,15,1,0,2,1,1,0,0,0,0,0,0,0,0
Date_Place_1,1,0,1,0,11,10,14,1,1,2,0,0,0,0,0,0,0,0,0
Description_1,0,0,0,0,3,9,7,2,0,2,1,0,0,0,0,0,0,0,0
Description_2,0,0,1,0,0,1,0,3,5,2,1,0,0,1,0,0,0,0,7
Ingredients,21,1,1,1,1,0,0,7,5,9,0,0,0,0,0,0,0,0,1
Instructions,0,0,0,0,1,0,1,1,9,19,6,1,0,1,1,0,0,0,0


In [78]:
# Export transitionMatrix as CSV
transitionMatrix.to_csv("data/transitionMatrix.csv")

In [79]:
#----------------------------------------------------------------------------------------
# Markov Model
#----------------------------------------------------------------------------------------

In [80]:
# Generate a transition matrix with values normalized over each row (by dividing all values by the sum of values in row)
transitionMatrixNormalized = utils.generateTransitionMatrix(transitions,'TransitionSource','TransitionTarget',normalize='index')

In [81]:
# Display transitionMatrixNormalized
display(transitionMatrixNormalized)

TransitionTarget,Responsabilities,CTA_1,Food,Gen_Info,Participants_Overview,Date_Place_1,Description_1,Description_2,Ingredients,Instructions,Participant_Profile_Picture,Fav,Chat,Participant_Bio,Online,Verified,Place,Status,Date_Place_2
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Responsabilities,0.0,0.043478,0.0,0.0,0.0,0.043478,0.0,0.043478,0.782609,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478
CTA_1,0.0,0.0,0.4375,0.4375,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Food,0.0,0.222222,0.259259,0.111111,0.185185,0.037037,0.0,0.0,0.074074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111
Gen_Info,0.0,0.146341,0.121951,0.268293,0.317073,0.02439,0.0,0.04878,0.02439,0.0,0.02439,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Participants_Overview,0.0,0.042553,0.021277,0.382979,0.12766,0.319149,0.021277,0.0,0.042553,0.021277,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Date_Place_1,0.02439,0.0,0.02439,0.0,0.268293,0.243902,0.341463,0.02439,0.02439,0.04878,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Description_1,0.0,0.0,0.0,0.0,0.125,0.375,0.291667,0.083333,0.0,0.083333,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Description_2,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,0.142857,0.238095,0.095238,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.333333
Ingredients,0.446809,0.021277,0.021277,0.021277,0.021277,0.0,0.0,0.148936,0.106383,0.191489,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277
Instructions,0.0,0.0,0.0,0.0,0.025,0.0,0.025,0.025,0.225,0.475,0.15,0.025,0.0,0.025,0.025,0.0,0.0,0.0,0.0


In [82]:
# Export transitionMatrix as CSV
transitionMatrixNormalized.to_csv("data/transitionMatrixNormalized-MarkovModel.csv")

In [83]:
#----------------------------------------------------------------------------------------
#
# 1.2 Identify transitions (with no self-transitions)
#
#----------------------------------------------------------------------------------------

In [84]:
# drop consecutive duplicate VisitedAOIs
dataNoConsDupVisits = data[['VisitedAOI']]     
dataNoConsDupVisits = data[(dataNoConsDupVisits.ne(dataNoConsDupVisits.shift())).any(axis=1)]

In [85]:
#identify transitions
noSelfTransitions = pd.DataFrame()
noSelfTransitions[['Respondent','TransitionSource','TransitionTarget']] = dataNoConsDupVisits.apply(lambda x: utils.identifyTransition(dataNoConsDupVisits,x.name), axis=1)

In [86]:
# Preview noSelfTransitions dataframe
noSelfTransitions.head()

Unnamed: 0,Respondent,TransitionSource,TransitionTarget
0,P01,Responsabilities,CTA_1
1,P01,CTA_1,Food
2,P01,Food,CTA_1
7,P01,Description_1,Description_2
8,P01,Description_2,Ingredients


In [87]:
# Generate transition matrix
noSelfTransitionsMatrix = utils.generateTransitionMatrix(noSelfTransitions,'TransitionSource','TransitionTarget')

In [88]:
# Display noSelfTransitionsMatrix
display(noSelfTransitionsMatrix)

TransitionTarget,Responsabilities,CTA_1,Food,Description_1,Description_2,Ingredients,Instructions,Participant_Profile_Picture,Fav,Chat,Participant_Bio,Online,Verified,Place,Status,Participants_Overview,Date_Place_1,Gen_Info,Date_Place_2
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Responsabilities,0,1,0,0,1,13,1,0,0,0,0,0,0,0,0,0,1,0,1
CTA_1,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,1,0,5,0
Food,0,4,0,0,0,1,0,0,0,0,0,0,0,0,0,5,1,3,2
Description_1,0,0,0,0,2,0,2,1,0,0,0,0,0,0,0,3,8,0,0
Description_2,0,0,1,0,0,5,2,0,0,0,1,0,0,0,0,0,1,0,6
Ingredients,15,1,0,0,6,0,8,0,0,0,0,0,0,0,0,1,0,1,1
Instructions,0,0,0,1,1,8,0,5,1,0,1,1,0,0,0,1,0,0,0
Participant_Profile_Picture,0,0,3,0,0,1,0,0,4,0,4,0,0,0,0,0,0,1,1
Fav,0,0,0,0,0,0,1,0,0,6,3,1,0,1,0,0,0,0,0
Chat,0,0,0,0,0,1,0,0,1,0,5,0,0,0,0,1,0,0,0


In [89]:
# Export noSelfTransitions as CSV
noSelfTransitionsMatrix.to_csv("data/noSelfTransitions.csv")

In [90]:
#----------------------------------------------------------------------------------------
# Markov Model
#----------------------------------------------------------------------------------------

In [91]:
# Generate a transition matrix with values normalized over each row (by dividing all values by the sum of values in row)
noSelfTransitionsMatrixNormalized = utils.generateTransitionMatrix(noSelfTransitions,'TransitionSource','TransitionTarget',normalize='index')

In [92]:
# Display noSelfTransitionsMatrixNormalized
display(noSelfTransitionsMatrixNormalized)

TransitionTarget,Responsabilities,CTA_1,Food,Description_1,Description_2,Ingredients,Instructions,Participant_Profile_Picture,Fav,Chat,Participant_Bio,Online,Verified,Place,Status,Participants_Overview,Date_Place_1,Gen_Info,Date_Place_2
TransitionSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Responsabilities,0.0,0.055556,0.0,0.0,0.055556,0.722222,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.055556
CTA_1,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.416667,0.0
Food,0.0,0.25,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3125,0.0625,0.1875,0.125
Description_1,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1875,0.5,0.0,0.0
Description_2,0.0,0.0,0.0625,0.0,0.0,0.3125,0.125,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.375
Ingredients,0.454545,0.030303,0.0,0.0,0.181818,0.0,0.242424,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.030303,0.030303
Instructions,0.0,0.0,0.0,0.052632,0.052632,0.421053,0.0,0.263158,0.052632,0.0,0.052632,0.052632,0.0,0.0,0.0,0.052632,0.0,0.0,0.0
Participant_Profile_Picture,0.0,0.0,0.214286,0.0,0.0,0.071429,0.0,0.0,0.285714,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.071429
Fav,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.5,0.25,0.083333,0.0,0.083333,0.0,0.0,0.0,0.0,0.0
Chat,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.125,0.0,0.625,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0


In [93]:
# Export noSelfTransitionsMatrixNormalized as CSV
noSelfTransitionsMatrixNormalized.to_csv("data/noSelfTransitionsNormalized--MarkovModel.csv")