In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, GaussianNoise, Flatten, Activation, Dropout, RepeatVector, Permute, Lambda, concatenate, dot, multiply
from tensorflow.keras.models import Model
from keras.layers import TimeDistributed
from keras.models import load_model
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
import os
import time
def normalize(data):
  min = np.min(data, axis = 0)
  max = np.max(data, axis = 0)
  #print('min, max',min, max)
  data = (data-min)/(max-min)
  return data

# Generate sample data
def generate_data(n_points, n_features, window_size, data = None):
    if data is None:
      data = np.random.randn(n_points, n_features)
    data = normalize(data)
    #data = np.random.normal(loc= 0.0, scale= 1.0 ,size = (n_points, n_features))
    X = []
    Y = []
    #print(data)
    for i in range(0, n_points-2*window_size+1):
      X.append(data[i:i+window_size,:])
      Y.append(data[i+window_size:i+2*window_size,:])
    return X, Y

In [2]:
#connections =['LPFC --> RPFC ','LPFC --> LPMC ','LPFC --> RPMC ','LPFC --> LPAR ','LPFC --> RPAR ','LPFC --> LSMA ','RPFC --> LPFC ','RPFC --> LPMC ','RPFC --> RPMC ','RPFC --> LPAR ','RPFC --> RPAR ','RPFC --> LSMA ','LPMC --> LPFC ','LPMC --> RPFC ','LPMC --> RPMC ','LPMC --> LPAR ','LPMC --> RPAR ','LPMC --> LSMA ','RPMC --> LPFC ','RPMC --> RPFC ','RPMC --> LPMC ','RPMC --> LPAR ','RPMC --> RPAR ','RPMC --> LSMA ','LPAR --> LPFC ','LPAR --> RPFC ','LPAR --> LPMC ','LPAR --> RPMC ','LPAR --> RPAR ','LPAR --> LSMA ','RPAR --> LPFC ','RPAR --> RPFC ','RPAR --> LPMC ','RPAR --> RPMC ','RPAR --> LPAR ','RPAR --> LSMA ','LSMA --> LPFC ','LSMA --> RPFC ','LSMA --> LPMC ','LSMA --> RPMC ','LSMA --> LPAR ','LSMA --> RPAR ']
connections1 = ['LPFC-->RPFC','LPFC-->LPMC','LPFC-->RPMC','LPFC-->SMA',\
    'RPFC-->LPFC','RPFC-->LPMC','RPFC-->RPMC','RPFC-->SMA',\
    'LPMC-->LPFC','LPMC-->RPFC','LPMC-->RPMC','LPMC-->SMA',\
    'RPMC-->LPFC','RPMC-->RPFC','RPMC-->LPMC','RPMC-->SMA',\
    'SMA-->LPFC','SMA-->RPFC','SMA-->LPMC','SMA-->RPMC','Class']

connections2 = ['LPFC-->LPFC','LPFC-->RPFC','LPFC-->LPMC','LPFC-->RPMC','LPFC-->SMA','RPFC-->LPFC','RPFC-->RPFC','RPFC-->LPMC','RPFC-->RPMC','RPFC-->SMA','LPMC-->LPFC','LPMC-->RPFC','LPMC-->LPMC','LPMC-->RPMC','LPMC-->SMA','RPMC-->LPFC','RPMC-->RPFC','RPMC-->LPMC','RPMC-->RPMC','RPMC-->SMA','SMA-->LPFC','SMA-->RPFC','SMA-->LPMC','SMA-->RPMC','SMA-->SMA','Class']

In [3]:
# "" This function groups the channels in each brain region andsend the regional signals ""
Lpfc_channels = {'AFp1','AFF5h','F7','AFF1h','FFC5h'}
Rpfc_channels = {'AFp2','AFF6H','F8','AFF2h','FFC6h'}
sma_channels = {'FFC3h', 'FCC3h','FFC4h', 'FCC4h'}
lpmc_channels = {'FCC5h','CCP5h','CCP3h','P3','P5','PO7','P7','PO3'}
rpmc_channels = {'FCC6h','CCP6h','CCP4h','P4','P6','PO8','P8','PO4'}
Lpfc_ch_num =[0,1,2,3,4]
Rpfc_ch_num =[16,17,18,19,20]
SMA_ch_num  =[5,7,21,23]
Lpmc_ch_num =[6,8,9,10,11,12,13,14]
Rpmc_ch_num =[22,24,25,26,27,28,29,30]

# channels not in the ROI: 15,31, the removal of these channels from the features set will effect the channel number/order
chan_removed = [15,31]
# Modified channel numbers
Lpfc_ch_num =[0,1,2,3,4]    # no change in order
Rpfc_ch_num =[15,16,17,18,19]   # Modified
SMA_ch_num  =[5,7,20,22]  # Modified
Lpmc_ch_num =[6,8,9,10,11,12,13,14]  # no change in order
Rpmc_ch_num =[21,23,24,25,26,27,28,29]  #Modified

Reg_ch_num =[[0,1,2,3,4],
[15,16,17,18,19],
[5,7,20,22],
[6,8,9,10,11,12,13,14],
[21,23,24,25,26,27,28,29]]

# list of channel in the order it appears in the dataset
chan_list =['AFp1','AFF5h','F7','AFF1h','FFC5h','FFC3h','FCC5h','FCC3h',\
            'CCP5h','CCP3h','P3','P5','PO7','P7','PO3','FT7','AFp2',\
            'AFF6H','F8','AFF2h','FFC6h','FFC4h','FCC6h','FCC4h','CCP6h',\
            'CCP4h','P4','P6','PO8','P8','PO4','FT8']
# for i, item in enumerate(chan_list):
#   print(i,item)

In [4]:
# Parameters
win_size = [3,5] #[3,5,7,9] # input = ouput = window_size
eps = 2000 # epochs
bs = 16 # Batch size
latent_dim =  64 # 128 #(optimal)
n_Bregions = 5
st = time.time()
phase = 'ST10'
Days = ['D10','D11']

for Day in Days:
  # /content/EEG_signal_copy
  # directories for alpha band signals
  dir = # directory of the EEG activation/signal files
  csvfilename = # directory to stoare the computed connectivities of the group
  file_path = # directory to save best window based on R-square value
  
  Causal_allSub = []
  sub_num = 1 # counter for subjects
  r_squares = []  # list of r-squares (max) of full models after tuning
  best_windows = []  # list of best windows full models after model tuning
  for sub in sorted(os.listdir(dir)):
    print(f'{sub_num} filename: {sub}')
    sub_num += 1
    data = pd.read_csv(os.path.join(dir,sub), header=None, index_col = None)
    data.drop(data.columns[chan_removed], axis=0, inplace=True) # remove the out of ROI channels
    data = data.to_numpy().T
    n_features = data.shape[1]
    n_points = data.shape[0]
    R2 = []
    r2_sum_max = 0  # place holder
    best_winsize = 0 # just a place holder, size of 0 is not used in the final model. 
    for window_size in win_size:
      n_train = int((n_points-2*window_size)*0.8)  #80% training samples
      X, Y = generate_data(n_points, n_features, window_size, data)
      X_train, y_train = np.stack(X[:n_train]), np.stack(Y[:n_train])
      X_test, y_test = np.stack(X[n_train:]), np.stack(Y[n_train:])
      dropout_rate = 0.4 
      inputs = Input(shape=(None, n_features))
      # First LSTM layer in the encoder with return_sequences=True
      # encoded = LSTM(latent_dim, activation='selu', return_sequences=True, go_backwards=True)(inputs)
      # encoded = Dropout(dropout_rate)(encoded)  # Adding dropout after the first LSTM layer
      # Second LSTM layer in the encoder
      encoded, state_h, state_c = LSTM(latent_dim, activation='selu', return_sequences=True, return_state=True)(inputs)
      encoder_states = [state_h, state_c]
      attention = Dense(1, activation='tanh')(encoded)
      attention = Flatten()(attention)
      attention = Activation('softmax', name='attention')(attention)
      attention = RepeatVector(latent_dim)(attention)
      attention = Permute([2, 1])(attention)
      encoded = multiply([encoded, attention])
      # context = Lambda(lambda xin: tf.keras.backend.sum(xin, axis=1))(encoded)
      context = Lambda(lambda xin: tf.keras.backend.sum(xin, axis=1))(encoded)
      # Decoder
      decoder_input = Lambda(lambda x: tf.keras.backend.repeat(x[0], tf.shape(x[1])[1]))([context, inputs])
      # First LSTM layer in the decoder with return_sequences=True
      decoded = LSTM(latent_dim, activation='selu', return_state=False, return_sequences=True)(decoder_input, initial_state=encoder_states)
      decoded = Dropout(dropout_rate)(decoded)  # Adding dropout after the first LSTM layer
      # Experiment with this attention module
      attention = dot([decoded, encoded], axes=[2, 2])
      attention = Activation('softmax')(attention)
      attention = dot([attention, encoded], axes=[2,1])
      decoded = concatenate([attention, decoded])
      decoded = TimeDistributed(Dense(n_features), name='autoencoder')(decoded)
      # Define models
      encoder = Model(inputs, context)
      autoencoder = Model(inputs, decoded)
      autoencoder.compile(optimizer="adam", loss="mse")

      # Train model
      es = EarlyStopping(monitor='val_loss', patience=10, verbose=0, restore_best_weights=True)
      history = autoencoder.fit(X_train, y_train, epochs=eps, batch_size=bs, verbose=0, validation_split=0.1, callbacks = [es]) #hide training progress
      y_pred = autoencoder.predict(X_test)
      yy_pred = y_pred.reshape(-1,n_features)
      yy_test = y_test.reshape(-1,n_features)
      mse = []
      mae = []
      rmse = []
      r2 = []
      for i in range(n_features):
        mse.append(mean_squared_error(yy_test[:,i], yy_pred[:,i]))  #self causal when i ==0
        mae.append(mean_absolute_error(yy_test[:,i], yy_pred[:,i]))
        rmse.append(np.sqrt(mean_squared_error(yy_test[:,i], yy_pred[:,i])))
        r2.append(r2_score(yy_test[:,i], yy_pred[:,i]))
      R2.append(r2)
      r2_sum = np.mean(r2)
      if r2_sum > r2_sum_max:   # if r2_sum is smaller than zero, the model is not acceptable.
        r2_sum_max = r2_sum
        best_winsize = window_size
    r_squares.append(r2_sum_max)
    best_windows.append(best_winsize)

    df2 = pd.DataFrame({'R_squares': r_squares, 'best-windows': best_windows})
    df2.to_csv(file_path, index = False)

    np.mean(R2, axis= 1)
    R2max_ind = np.argmax(np.mean(R2, axis= 1))
    best_winsize = win_size[R2max_ind]
    ## START of train/test of final model ##
    if len(win_size) >1 and best_winsize != win_size[-1]:
      n_train = int((n_points-2*best_winsize)*0.8)  
      # Generate data
      X, Y = generate_data(n_points, n_features, best_winsize, data)
      X_train, y_train = np.stack(X[:n_train]), np.stack(Y[:n_train])
      X_test, y_test = np.stack(X[n_train:]), np.stack(Y[n_train:])
      dropout_rate = 0.4  # Example dropout rate
      inputs = Input(shape=(None, n_features))
      # First LSTM layer in the encoder with return_sequences=True
      # encoded = LSTM(latent_dim, activation='selu', return_sequences=True, go_backwards=True)(inputs)
      # encoded = Dropout(dropout_rate)(encoded)  # Adding dropout after the first LSTM layer
      # Second LSTM layer in the encoder
      encoded, state_h, state_c = LSTM(latent_dim, activation='selu', return_sequences=True, return_state=True)(inputs)
      encoder_states = [state_h, state_c]
      # Compute importance for each step
      attention = Dense(1, activation='tanh')(encoded)
      attention = Flatten()(attention)
      attention = Activation('softmax', name='attention')(attention)
      attention = RepeatVector(latent_dim)(attention)
      attention = Permute([2, 1])(attention)
      encoded = multiply([encoded, attention])
      # context = Lambda(lambda xin: tf.keras.backend.sum(xin, axis=1))(encoded)
      context = Lambda(lambda xin: tf.keras.backend.sum(xin, axis=1))(encoded)
      # Decoder
      decoder_input = Lambda(lambda x: tf.keras.backend.repeat(x[0], tf.shape(x[1])[1]))([context, inputs])
      # First LSTM layer in the decoder with return_sequences=True
      decoded = LSTM(latent_dim, activation='selu', return_state=False, return_sequences=True)(decoder_input, initial_state=encoder_states)
      decoded = Dropout(dropout_rate)(decoded)  # Adding dropout after the first LSTM layer
      # Second LSTM layer in the decoder
      # decoded = LSTM(latent_dim, activation='selu', return_sequences=True)(decoded)
      # Experiment with this attention module
      attention = dot([decoded, encoded], axes=[2, 2])
      attention = Activation('softmax')(attention)
      attention = dot([attention, encoded], axes=[2,1])
      decoded = concatenate([attention, decoded])
      decoded = TimeDistributed(Dense(n_features), name='autoencoder')(decoded)
      encoder = Model(inputs, context)
      autoencoder = Model(inputs, decoded)
      # Compile model
      autoencoder.compile(optimizer="adam", loss="mse")
      es = EarlyStopping(monitor='val_loss', patience=10, verbose=0, restore_best_weights=True)
      history = autoencoder.fit(X_train, y_train, epochs=eps, batch_size=bs, verbose=0, validation_split=0.1, callbacks = [es]) #hide training progress

    ## compute the causality
    def modify_feature(X, feature_index):
        X_modified = X.copy()
        X_modified[:, :, Reg_ch_num[feature_index]] = 0 
        return X_modified

    def calculate_metrics(y_test, y_pred, n_features, n_Bregions):
        mse, mae, rmse, r2, variance = [], [], [], [], []
        yy_test = y_test.reshape(-1, n_features)
        yy_pred = y_pred.reshape(-1, n_features)
        for i in range(n_Bregions):
            # mse.append(mean_squared_error(yy_test[:, i], yy_pred[:, i]))
            # mae.append(mean_absolute_error(yy_test[:, i], yy_pred[:, i]))
            # rmse.append(np.sqrt(mse[-1]))
            r2.append(r2_score(yy_test[:, Reg_ch_num[i]], yy_pred[:, Reg_ch_num[i]]))
            variance.append(np.var(yy_test[:, Reg_ch_num[i]] - yy_pred[:, Reg_ch_num[i]]))
        return variance

    original_variance = []  # Store original variances here
    y_pred_original = autoencoder.predict(X_test)
    original_variance = calculate_metrics(y_test, y_pred_original, n_features, n_Bregions)

    # Loop over all features
    causality_mat = []
    for feature_index in range(n_Bregions):
        X_test_modified = modify_feature(X_test, feature_index)
        y_pred_modified = autoencoder.predict(X_test_modified)
        modified_variance = calculate_metrics(y_test, y_pred_modified, n_features, n_Bregions)
        causal_effect = np.log(np.array(modified_variance) / np.array(original_variance))
        causality_mat.append(causal_effect)
    expertise = 1 if sub[0:3]=='Exp' else -1
    causality_flat = np.append(np.array(causality_mat).flatten(), int(expertise))
    Causal_allSub.append(causality_flat)

  # write causality to csv files
  df =pd.DataFrame(np.vstack((connections2, np.array(Causal_allSub))))
  df.to_csv(csvfilename, header=None, index=None)
et = time.time()
print(f'Total time take: {(et-st)/60} min')

1 filename: P01Tr_1_alphafreqBP.csv

[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
2 filename: P01Tr_2_alphafreqBP.csv
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3