# Computational Psycholinguistics - Assignment 2

## Imports

In [72]:
import os
import random

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.io
import torch

## Setup

In [74]:
def set_global_seed(seed):
    """Sets the same seed for varying libraries."""
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

RAW_DATA_FILE_PATH = "./data/stimuli_erp.mat"
EXPERIMENTAL_STIMULI_FILE_PATH = "./items/stimuli.txt"

set_global_seed(3131)

## Loading the Raw Data

In [53]:
dataset = scipy.io.loadmat(RAW_DATA_FILE_PATH)
sentences = dataset["sentences"]
erp_data = dataset["ERP"]

print(erp_data.shape) # NOTE: All ERP averages
print(erp_data[0].shape) # NOTE: All ERP averages for sentence 0 as one single tuple
print(erp_data[0][0].shape) # NOTE: All ERP averages for sentence 0 as a tuple of (words, participants, ERP components)
print(erp_data[0][0][0].shape) # NOTE: All ERP averages for word 0 of sentence 0
print(erp_data[0][0][0][0].shape) # NOTE: All ERP averages for participant 0 for word 0 of sentence 0
print(erp_data[0][0][0][0][4]) # NOTE: P600 ERP average for participant 0 for word 0 of sentence 0

(205, 1)
(1,)
(5, 24, 6)
(24, 6)
(6,)
-1.8152039051055908


## Generating the Plain Text Stimuli File

In [76]:
with open(EXPERIMENTAL_STIMULI_FILE_PATH, "w") as stimuli_file:
    for sentence in sentences:
        sentence = sentence[0][0]
        
        for index, word in enumerate(sentence):
            word = str(word[0])
            word = word.lower()
            
            if "." in word:
                word = word.replace(".", " .")
                
            stimuli_file.write(word)
            
            if index == len(sentence) - 1:
                stimuli_file.write("\n")
            else:
                stimuli_file.write(" ")

## Parsing the P600 Data by Sentence