# Computational Psycholinguistics - Assignment 2

## Imports

In [1]:
from get_predictions import get_predictions

import random

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.io
import torch

## Setup

In [2]:
def set_global_seed(seed):
    """Sets the same seed for varying libraries."""
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

RAW_DATA_FILE_PATH = "./data/stimuli_erp.mat"
EXPERIMENTAL_STIMULI_FILE_PATH = "./items/stimuli.txt"

ITEM_SETS = ["stimuli"]
LANGUAGE = "en"
CUDA = True

set_global_seed(3131)

## Loading the Raw Data

In [3]:
dataset = scipy.io.loadmat(RAW_DATA_FILE_PATH)
sentences = dataset["sentences"]
erp_data = dataset["ERP"]

print(erp_data.shape) # NOTE: All ERP averages
print(erp_data[0].shape) # NOTE: All ERP averages for sentence 0 as one single tuple
print(erp_data[0][0].shape) # NOTE: All ERP averages for sentence 0 as a tuple of (words, participants, ERP components)
print(erp_data[0][0][0].shape) # NOTE: All ERP averages for word 0 of sentence 0
print(erp_data[0][0][0][0].shape) # NOTE: All ERP averages for participant 0 for word 0 of sentence 0
print(erp_data[0][0][0][0][4]) # NOTE: P600 ERP average for participant 0 for word 0 of sentence 0

(205, 1)
(1,)
(5, 24, 6)
(24, 6)
(6,)
-1.8152039051055908


## Parsing the Sentence Data and Generating the Plain Text Stimuli File

In [4]:
parsed_sentence_data = []

with open(EXPERIMENTAL_STIMULI_FILE_PATH, "w") as stimuli_file:
    for sentence in sentences:
        parsed_sentence = []
        sentence = sentence[0][0]
        
        for index, word in enumerate(sentence):
            word = str(word[0])
            
            parsed_sentence.append(word)
            
            word = word.lower()
            
            if "." in word:
                word = word.replace(".", " .")
                
            stimuli_file.write(word)
            
            if index == len(sentence) - 1:
                stimuli_file.write("\n")
            else:
                stimuli_file.write(" ")
                
        parsed_sentence_data.append(parsed_sentence)

## Parsing the P600 Data by Sentence

In [5]:
parsed_p600_data = []

for erp_sentence_data in erp_data:
    erp_sentence_data = erp_sentence_data[0]
    parsed_sentence_data = []
    
    for i in range(erp_sentence_data.shape[1]):
        participant_p600_for_sentence = [float(p600) for p600 in erp_sentence_data[:, i, 4]]
        parsed_sentence_data.append(participant_p600_for_sentence)
        
    parsed_p600_data.append(parsed_sentence_data)

## Generating the Model Surprisal Values

In [6]:
get_predictions(ITEM_SETS, LANGUAGE, "surp", CUDA)

stimuli
C:/Users/Daan/Documents/Projecten/ru-computational-psycholinguistics-23-24/assignment-2/code/trained_models/nwp_model_lstm_en_10000
C:/Users/Daan/Documents/Projecten/ru-computational-psycholinguistics-23-24/assignment-2/code/trained_models/nwp_model_lstm_en_30000
C:/Users/Daan/Documents/Projecten/ru-computational-psycholinguistics-23-24/assignment-2/code/trained_models/nwp_model_lstm_en_100000
C:/Users/Daan/Documents/Projecten/ru-computational-psycholinguistics-23-24/assignment-2/code/trained_models/nwp_model_lstm_en_300000
C:/Users/Daan/Documents/Projecten/ru-computational-psycholinguistics-23-24/assignment-2/code/trained_models/nwp_model_lstm_en_1000000
C:/Users/Daan/Documents/Projecten/ru-computational-psycholinguistics-23-24/assignment-2/code/trained_models/nwp_model_lstm_en_3000000
C:/Users/Daan/Documents/Projecten/ru-computational-psycholinguistics-23-24/assignment-2/code/trained_models/nwp_model_lstm_en_10000000
C:/Users/Daan/Documents/Projecten/ru-computational-psycholi

## Generating the Model Gradients

In [7]:
get_predictions(ITEM_SETS, LANGUAGE, "grad", CUDA)

stimuli
C:/Users/Daan/Documents/Projecten/ru-computational-psycholinguistics-23-24/assignment-2/code/trained_models/nwp_model_lstm_en_10000


KeyboardInterrupt: 