In [22]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt

from utils import *
from word_fixations import *
from word_properties import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
words_dict = get_word_sentence_fixations_dict_from_csv('word_sentence_fixations\words_dict_romanian_theo.csv')

In [24]:
words_dict = compute_average_TRT(words_dict)

In [27]:
for stimulus_key in words_dict:
	print(f"Stimulus: {stimulus_key}")
	for word_idx in words_dict[stimulus_key]:
		print(f"  Word Index: {word_idx}")
		word = words_dict[stimulus_key][word_idx]['word']
		sentence = words_dict[stimulus_key][word_idx]['sentence']
		word_idx_in_sentence = words_dict[stimulus_key][word_idx]['word_idx_in_sentence']
		properties = words_dict[stimulus_key][word_idx]['properties']
		average_TRT = words_dict[stimulus_key][word_idx]['average_TRT']
		print(f"    Word: {word}")
		print(f"    Sentence: {sentence}")
		print(f"    Word Index in Sentence: {word_idx}")
		print(f"    Properties: {properties}")
		print(f"    Average TRT: {average_TRT}")
		for subject_id in words_dict[stimulus_key][word_idx]['subjects_fixations']:
			print(f"      Subject ID: {subject_id}")
			fixations = words_dict[stimulus_key][word_idx]['subjects_fixations'][subject_id]
			print(f"        Fixations: {fixations}")

Stimulus: arg_pisarapanui_11_page_1
  Word Index: 0
    Word: Rapa
    Sentence: Rapa Nui - Introducere
    Word Index in Sentence: 0
    Properties: <word_properties.WordProperties object at 0x000002BE1D651460>
    Average TRT: 0.0
      Subject ID: 23
        Fixations: Fixations: [], TRT: 0
  Word Index: 1
    Word: Nui
    Sentence: Rapa Nui - Introducere
    Word Index in Sentence: 1
    Properties: <word_properties.WordProperties object at 0x000002BE1D651370>
    Average TRT: 398.0
      Subject ID: 23
        Fixations: Fixations: [206, 192], TRT: 398
  Word Index: 2
    Word: -
    Sentence: Rapa Nui - Introducere
    Word Index in Sentence: 2
    Properties: <word_properties.WordProperties object at 0x000002BE1D651C70>
    Average TRT: 0.0
      Subject ID: 23
        Fixations: Fixations: [], TRT: 0
  Word Index: 3
    Word: Introducere
    Sentence: Rapa Nui - Introducere
    Word Index in Sentence: 3
    Properties: <word_properties.WordProperties object at 0x000002BE1D651A

In [25]:
# Create a dictionary excluding non-page stimuli
words_dict_reading = {}
for stimulus_key in words_dict:
	if 'page' in stimulus_key:
		words_dict_reading[stimulus_key] = words_dict[stimulus_key]

In [26]:
# Compute word properties for each word in the words_dict_reading
for stimulus_key in words_dict_reading:
	for word_idx in words_dict_reading[stimulus_key]:
		print(f"Computing properties for {stimulus_key} - Word Index: {word_idx}")
		words_dict_reading[stimulus_key][word_idx]['properties'].compute_properties()

Computing properties for arg_pisarapanui_11_page_1 - Word Index: 0
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 1
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 2
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 3
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 4
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 5
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 6
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 7
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 8
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 9
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 10
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 11
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 12
Computing properties for arg_pisarapanui_11_page_1 - Word Index: 13
Computing properties for arg_pisarapanui_11_page_1 - Word 

In [28]:
# Numpy arrays
trt_reading = np.array([words_dict_reading[stimulus_key][word_idx]['average_TRT'] for stimulus_key in words_dict_reading for word_idx in words_dict_reading[stimulus_key]])
length_reading = np.array([words_dict_reading[stimulus_key][word_idx]['properties'].length for stimulus_key in words_dict_reading for word_idx in words_dict_reading[stimulus_key]])

In [29]:
length_corr_reading = np.corrcoef(trt_reading, length_reading)[0, 1]
print(f"Correlation between TRT and length (reading): {length_corr_reading}")

Correlation between TRT and length (reading): 0.5441491117804048


In [18]:
# Create a new dictionary and eliminate non-words
words_dict_no_nonwords = {}
for stimulus_key in words_dict:
	words_dict_no_nonwords[stimulus_key] = {}
	for word_idx in words_dict[stimulus_key]:
		word = words_dict[stimulus_key][word_idx]['word']
		# Check if the word contains only alphabetic characters
		if word.isalpha():
			words_dict_no_nonwords[stimulus_key][word_idx] = words_dict[stimulus_key][word_idx]

In [19]:
# Compute word properties for each word in the words_dict_no_nonwords
for stimulus_key in words_dict_no_nonwords:
	for word_idx in words_dict_no_nonwords[stimulus_key]:
		words_dict_no_nonwords[stimulus_key][word_idx]['properties'].compute_properties()

In [20]:
# Numpy arrays
trt_no_nonwords = np.array([words_dict_no_nonwords[stimulus_key][word_idx]['average_TRT'] for stimulus_key in words_dict_no_nonwords for word_idx in words_dict_no_nonwords[stimulus_key]])
length_no_nonwords = np.array([words_dict_no_nonwords[stimulus_key][word_idx]['properties'].length for stimulus_key in words_dict_no_nonwords for word_idx in words_dict_no_nonwords[stimulus_key]])
frequency_no_nonwords = np.array([words_dict_no_nonwords[stimulus_key][word_idx]['properties'].frequency for stimulus_key in words_dict_no_nonwords for word_idx in words_dict_no_nonwords[stimulus_key]])

In [21]:
length_corr_no_nonwords = np.corrcoef(trt_no_nonwords, length_no_nonwords)[0, 1]
frequency_corr_no_nonwords = np.corrcoef(trt_no_nonwords, frequency_no_nonwords)[0, 1]
print(f"Correlation between TRT and length: {length_corr_no_nonwords}")
print(f"Correlation between TRT and frequency: {frequency_corr_no_nonwords}")

Correlation between TRT and length: 0.49470941597089924
Correlation between TRT and frequency: -0.0908769013157379
