In [115]:
import pandas as pd
import numpy as np

In [133]:
n400 = pd.read_csv("N400_by_trial.csv")
p600 = pd.read_csv("P600_by_trial.csv")
spr = pd.read_csv("SPR_by_trial.csv")

In [169]:
def extract_features(dataframe: pd.DataFrame, condition):
    """
    Extracts features from a dataframe of ERPs / SRP's. 
    
    Args:
        dataframe: A dataframe of ERPs / SRP's.
        condition: The condition to extract features from.
    Returns:
        A numpy array of features.
    """
    
    features = dataframe[dataframe["Condition"] == condition]
    features = features.drop(["Condition"], axis=1)
    features = features.set_index("ItemNum")
    features = np.expand_dims(features.to_numpy(), axis=0)
    return features

n400_control = extract_features(n400, "control")
n400_script_related = extract_features(n400, "script-related")
n400_script_unrelated = extract_features(n400, "script-unrelated")

p600_control = extract_features(p600, "control")
p600_script_related = extract_features(p600, "script-related")
p600_script_unrelated = extract_features(p600, "script-unrelated")

spr_control = extract_features(spr, "control")
spr_script_related = extract_features(spr, "script-related")
spr_script_unrelated = extract_features(spr, "script-unrelated")

### Visualise data

In [175]:
def print_item(item = 0):
    print(f"ItemNum: {item+1}")
    print(f"ERP's (n400): \n{n400_control[0][item]}")
    print(f"SPR: {spr_control[0][item]}")

print_item(0)

ItemNum: 1
ERP's (n400): 
[-1.79205279 -1.86712636 -1.70394696 -3.9231718  -4.74066061 -4.90966067
 -2.37030723 -1.98819744 -3.95095261 -3.93343971 -4.15195217 -2.20787836
 -3.72650287 -2.34174259  0.50864363 -1.46534323 -2.34354867 -2.66679243
 -0.31249529  0.80925981 -1.85934109 -1.26222229 -1.31437606 -1.49421967
 -0.46220971 -0.38511491]
SPR: [432.36363636]


In [200]:
# check data shape
print(spr_control.shape)
print(n400_control.shape)
print(p600_control.shape)

# check data type
print(type(spr_control))
print(n400_control.dtype)
print(p600_control.dtype)

(1, 90, 1)
(1, 90, 26)
(1, 90, 26)
<class 'numpy.ndarray'>
float64
float64


### Prepare data for training

In [153]:
from sklearn.model_selection import train_test_split

In [220]:
X = np.concatenate((n400_control, p600_control), axis=0)
y = np.reshape(spr_control, (90, 1, 1))

# reshape x to have 90 in first dimension
X = np.reshape(X, (90, X.shape[0], 26))

In [221]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [222]:
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")

X_train shape: (72, 2, 26)
y_train shape: (72, 1, 1)


### Construct RNN Model

In [99]:
# from tensorflow import Sequential
from keras import layers, Sequential

In [280]:
# make an rnn model that takes in the n400 and p600 data and predicts the spr data

model = Sequential()
model.add(layers.Input(shape=(2, 26)))
model.add(layers.LSTM(128, input_shape=(2, 26)))
model.add(layers.Reshape((1, 128)))
model.add(layers.LSTM(128, input_shape=(1, 128)))
model.add(layers.Dense(64, activation="relu"))
model.add(layers.Dense(32, activation="relu"))
model.add(layers.Dense(16, activation="relu"))
model.add(layers.Dense(1, activation="relu"))

model.compile(optimizer="adam", loss="mse", metrics=['mae', 'mse'])

model.summary()

Model: "sequential_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_31 (LSTM)              (None, 128)               79360     
                                                                 
 reshape_6 (Reshape)         (None, 1, 128)            0         
                                                                 
 lstm_32 (LSTM)              (None, 128)               131584    
                                                                 
 dense_57 (Dense)            (None, 64)                8256      
                                                                 
 dense_58 (Dense)            (None, 32)                2080      
                                                                 
 dense_59 (Dense)            (None, 16)                528       
                                                                 
 dense_60 (Dense)            (None, 1)               

In [284]:
model.fit(X_train, y_train, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x27be2abad90>

### Test model

In [244]:
from sklearn.metrics import mean_squared_error

In [285]:
pred = model.predict(X_test)

# remove 1 dim from y_test to match pred
y_test_ = np.squeeze(y_test, axis=1)
pred = np.squeeze(pred, axis=1)

mse = mean_squared_error(y_test_, pred)
print(f"\nMSE: {mse}")

print("\nFirst 5 predictions: ")
for i in range(min(len(pred), 5)):
    print(f"pred: {pred[i]}", end=" ")
    print(f"actual: {y_test_[i]}")


MSE: 24788.251399538858

First 5 predictions: 
pred: 623.7996215820312 actual: 594.9
pred: 387.26104736328125 actual: 427.8
pred: 646.1466674804688 actual: 425.4
pred: 342.8466796875 actual: 471.9
pred: 463.86016845703125 actual: 432.3636363636364
