In [115]:
import pandas as pd
import numpy as np

In [133]:
n400 = pd.read_csv("N400_by_trial.csv")
p600 = pd.read_csv("P600_by_trial.csv")
spr = pd.read_csv("SPR_by_trial.csv")

In [169]:
def extract_features(dataframe: pd.DataFrame, condition):
    """
    Extracts features from a dataframe of ERPs / SRP's. 
    
    Args:
        dataframe: A dataframe of ERPs / SRP's.
        condition: The condition to extract features from.
    Returns:
        A numpy array of features.
    """
    
    features = dataframe[dataframe["Condition"] == condition]
    features = features.drop(["Condition"], axis=1)
    features = features.set_index("ItemNum")
    features = np.expand_dims(features.to_numpy(), axis=0)
    return features

n400_control = extract_features(n400, "control")
n400_script_related = extract_features(n400, "script-related")
n400_script_unrelated = extract_features(n400, "script-unrelated")

p600_control = extract_features(p600, "control")
p600_script_related = extract_features(p600, "script-related")
p600_script_unrelated = extract_features(p600, "script-unrelated")

spr_control = extract_features(spr, "control")
spr_script_related = extract_features(spr, "script-related")
spr_script_unrelated = extract_features(spr, "script-unrelated")

### Visualise data

In [175]:
def print_item(item = 0):
    print(f"ItemNum: {item+1}")
    print(f"ERP's (n400): \n{n400_control[0][item]}")
    print(f"SPR: {spr_control[0][item]}")

print_item(0)

ItemNum: 1
ERP's (n400): 
[-1.79205279 -1.86712636 -1.70394696 -3.9231718  -4.74066061 -4.90966067
 -2.37030723 -1.98819744 -3.95095261 -3.93343971 -4.15195217 -2.20787836
 -3.72650287 -2.34174259  0.50864363 -1.46534323 -2.34354867 -2.66679243
 -0.31249529  0.80925981 -1.85934109 -1.26222229 -1.31437606 -1.49421967
 -0.46220971 -0.38511491]
SPR: [432.36363636]


In [200]:
# check data shape
print(spr_control.shape)
print(n400_control.shape)
print(p600_control.shape)

# check data type
print(type(spr_control))
print(n400_control.dtype)
print(p600_control.dtype)

(1, 90, 1)
(1, 90, 26)
(1, 90, 26)
<class 'numpy.ndarray'>
float64
float64


### Prepare data for training

In [153]:
from sklearn.model_selection import train_test_split

In [220]:
X = np.concatenate((n400_control, p600_control), axis=0)
y = np.reshape(spr_control, (90, 1, 1))

# reshape x to have 90 in first dimension
X = np.reshape(X, (90, X.shape[0], 26))

In [221]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [222]:
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")

X_train shape: (72, 2, 26)
y_train shape: (72, 1, 1)


### Construct RNN Model

In [99]:
# from tensorflow import Sequential
from keras import layers, Sequential

In [229]:
# make an rnn model that takes in the n400 and p600 data and predicts the spr data

model = Sequential()
model.add(layers.Input(shape=(2, 26)))
model.add(layers.LSTM(64, input_shape=(2, 26)))
model.add(layers.Dense(64, activation="relu"))
model.add(layers.Dense(32, activation="relu"))
model.add(layers.Dense(16, activation="relu"))
model.add(layers.Dense(1, activation="relu"))

model.compile(optimizer="adam", loss="mse", metrics=["accuracy"])

model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_11 (LSTM)              (None, 64)                23296     
                                                                 
 dense_28 (Dense)            (None, 64)                4160      
                                                                 
 dense_29 (Dense)            (None, 32)                2080      
                                                                 
 dense_30 (Dense)            (None, 16)                528       
                                                                 
 dense_31 (Dense)            (None, 1)                 17        
                                                                 
Total params: 30,081
Trainable params: 30,081
Non-trainable params: 0
_________________________________________________________________


In [264]:
model.fit(X_train, y_train, epochs=300, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x27be556aee0>

### Test model

In [244]:
from sklearn.metrics import mean_squared_error

In [265]:
pred = model.predict(X_test)

# remove 1 dim from y_test to match pred
y_test_ = np.squeeze(y_test, axis=1)
pred = np.squeeze(pred, axis=1)

mse = mean_squared_error(y_test_, pred)
print(f"\nMSE: {mse}")

print("\nFirst 5 predictions: ")
for i in range(min(len(pred), 5)):
    print(f"pred: {pred[i]}", end=" ")
    print(f"actual: {y_test_[i]}")


MSE: 11880.073067788797

First 5 predictions: 
pred: 564.1483764648438 actual: 594.9
pred: 381.8934020996094 actual: 427.8
pred: 537.6162109375 actual: 425.4
pred: 319.4360046386719 actual: 471.9
pred: 459.2679138183594 actual: 432.3636363636364
