In [1]:
import time
import os
import json
import pickle
import tensorflow as tf
from tensorflow.keras import utils
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import ResNet50, VGG19
from tensorflow.keras.applications.imagenet_utils import preprocess_input

#from keras.preprocessing import image
#from keras.applications.imagenet_utils import preprocess_input

from matplotlib.pyplot import imread, imshow

#from keras.preprocessing.image import ImageDataGenerator
#from keras.layers import Dense, Dropout, Activation, Flatten
#from keras.layers import Conv2D, MaxPooling2D

import numpy as np
import matplotlib.pyplot as plt
import random
from datetime import datetime
import nibabel as nib
import re
from collections import Counter
#import imageio
from nst_utils import *

%matplotlib inline
%load_ext autoreload
%autoreload 1

#%aimport 

SEED=1
random.seed(SEED)
np.random.seed(SEED)
#tf.set_random_seed(SEED)
tf.random.set_seed

K.clear_session()
#K.set_image_data_format('channels_last')
#K.set_learning_phase(1)



In [2]:
print(tf.__version__)


2.0.0


# Load data

For phase1, training examples are images shown to 4 participants across multiple sessions.

Images labeled for 3 classes: scenes, coco, imgnet


In [None]:
# Get a global list of images file path with index matching trained data

stimulusDirPath = os.path.join('images', 'BOLD5000_Stimuli', 'Scene_Stimuli', 'Presented_Stimuli')

global_data = {
        "participant_list": ["CSI1", "CSI2", "CSI3", "CSI4"],
        "start_sess": 0,
        "last_sess": 16,
        "start_run": 0,
        "last_run": 15
}

gList = {}
global_index = 0
imagePathList = []
blankImage = 'Blank'

for participant in global_data['participant_list']:
        print()
        # CS1 file are missing 1 after CSI
        if participant == "CSI1":
            CSI = "CSI"
        else:
            CSI = participant
        
        gList[participant] = {}
        for sNum in range(global_data['start_sess'], global_data['last_sess']):
            sSes = "sess" + str(sNum).zfill(2)
            gList[participant][sSes] = {}
            for rNum in range(global_data['start_run'], global_data['last_run']):
                sRun = "run" + str(rNum).zfill(2)
                dir_path = os.path.join("images","BOLD5000_Stimuli", "Stimuli_Presentation_Lists",participant, participant + "_" + sSes)
                if not os.path.exists(dir_path):
                    continue

                stimulusListFilename = os.path.join(dir_path, "_".join([CSI, sSes, sRun]) + ".txt")
                if not os.path.exists(stimulusListFilename):
                    continue
                    
                print("cs: %s sess: %s run: %s" % (participant, sNum, rNum))
                with open(stimulusListFilename) as f:
                    imageList = f.read().splitlines()
                    gList[participant][sSes][sRun] = imageList
                    #global_index += len(fileList)
                    for imageFileName in imageList:
                        for (currDir, _, fileList) in os.walk(stimulusDirPath):
                            currBaseDir = os.path.basename(currDir)
                            for filename in fileList:
                                if filename in imageFileName:
                                    fullFilename = os.path.join(currDir, filename)
                                    imagePathList.append(fullFilename)
                                    #print(fullFilename)
                                    break

                #last index for no image
                #global_index += 1
                imagePathList.append(blankImage)

                    
#print(global_index)
print(len(imagePathList))

# Other way to get global index from events file. just ran this to verify above
#from glob import glob
#import pandas as pd
#import os
#
#events_dir = '/home/ubuntu/cs230Project/dataset/ds001499-download'
#iCount = 0
#imageList = []
#for subname in ['sub-CSI1', 'sub-CSI2', 'sub-CSI3', 'sub-CSI4']:
#    for sNum in range(0, 20):
#        ses = "ses-%s" % str(sNum).zfill(2)
#        event_path = os.path.join(events_dir,subname,ses,'func','*run*' + '_events.tsv')
#        event = glob(event_path)
#        if not event:
#            continue
#
#        event_file = glob(os.path.join(events_dir,subname,ses,'func','*' + 'run*' + '_events.tsv'))
#        for ev in event_file:
#            events = pd.read_csv(ev, sep = '\t')
#            for index, row in events.iterrows():
#                iCount += 1
#                imageList.append(row['ImgName'])
#                #print(iCount)
#
#            # index for no image
#            iCount += 1
#            imageList.append('None')
#
#print(iCount)
#print(len(imageList))


In [None]:
base_model = ResNet50()
base_model.summary()

In [None]:
# Load pretrained model
model = Model(inputs = base_model.input, outputs = base_model.get_layer('avg_pool').output)
model.summary()


In [None]:
image_activations = []
x_images = []
for imgFile in imagePathList:
    if imgFile is blankImage:
        x_images.append(blankImage)
        continue

    img = image.load_img(imgFile, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x_images.append(preprocess_input(x))



In [None]:

blank_array = np.zeros((1, 2048))
image_activations = [model.predict(x) if x is not blankImage else blank_array for x in x_images]
print(len(image_activations))
print(image_activations[0].shape)


In [None]:
activations_all = np.concatenate(image_activations)
print(activations_all.shape)

In [3]:
imageActivation = "imageActivation"
activationFile = "stActivations.npy"
imageActivationFile = os.path.join(imageActivation, activationFile)

In [None]:


!mkdir -p imageActivation
with open(imageActivationFile, 'wb') as f:
    pickle.dump(activations_all, f)


## Model

In [4]:
i=0

(19380, 2048)
15.81203075060473
-0.3625183521784735


(19380, 2048)
[-0.00252447 -0.00222958 -0.00456031 ...  0.10309614 -0.00550695
 -0.0056969 ]
0.2813703920418878
-0.006450906432175714


In [36]:


# train encoder .. fmri to image activation vector
ROI_list = [
    'X_LHPPA.npy', #(19380, 5, 100)
    'X_RHLOC.npy', #(19380, 5, 170)
    'X_LHLOC.npy', #(19380, 5, 130)
    'X_RHEarlyVis.npy', #(19380, 5, 220)
    'X_RHRSC.npy', #(19380, 5, 100)
    'X_RHOPA.npy', #(19380, 5, 80)
    'X_RHPPA.npy', #(19380, 5, 140)
    'X_LHEarlyVis.npy', #(19380, 5, 190)
    'X_LHRSC.npy', #(19380, 5, 30)
    'X_LHOPA.npy', #(19380, 5, 70)
]

train_folder = '/home/ubuntu/cs230Project/dataset/traindata'
ax_length = 5 * 220
array_list = []
for roc_file in ROI_list:
    xt_file_path = os.path.join(train_folder, roc_file)
    xtrain_n = np.load(xt_file_path)
    # concatenate all 5 frame from each sample for now, also append all examples to 220 voxels
    #xt = xtrain_n[:, 3, :]
    # concatenate all 5 time frames together
    xt = np.reshape(xtrain_n, (xtrain_n.shape[0], -1))
    xt_pad = np.pad(xt, ((0, 0), (0, ax_length-xt.shape[1])), mode='constant', constant_values=0)
    #print(xt_pad.shape)
    array_list.append(xt_pad)


x_stack = np.dstack(array_list)
x_all = np.swapaxes(x_stack,1,2)
#x_all = np.concatenate(array_list, axis=1)
print(x_all.shape)

num_samples = x_all.shape[0]
#num_samples = 5000

print(int(num_samples * 0.9))

with open(imageActivationFile, 'rb') as f:
    y_activations = pickle.load(f)

print(y_activations.shape)
y_normalized=[]
for sample in range(0, y_activations.shape[0]):
    c_array = y_activations[sample, :]
    #c_array = c_array - np.mean(c_array) / (np.sqrt(np.var(c_array) + 1e-8))
    c_array = c_array / (np.linalg.norm(c_array) + 1e-8)
    y_normalized.append(c_array)
    
y_all = np.squeeze(np.vstack(y_normalized))
print(y_all.shape)

callbacks = [TensorBoard(log_dir=f'./log/{i}')]

##split data to train and dev
x_train = x_all[0:int(num_samples * 0.9), :, :]
y_train = y_all[0:int(num_samples * 0.9), :]
x_test = x_all[int(num_samples * 0.9):, :, :]
y_test = y_all[int(num_samples * 0.9):, :]
#x_test = x_all[int(num_samples * 0.9):num_samples, :, :]
#y_test = y_all[int(num_samples * 0.9):num_samples, :]

print("xtrain shape: %s" % str(x_train.shape))
print("ytrain shape: %s" % str(y_train.shape))
print("xtest shape: %s" % str(x_test.shape))
print("ytest shape: %s" % str(y_test.shape))

def auto_encoder_lstm(input_shape, encoding_dim):
    X_input = Input(input_shape)
    #X = LSTM(units = 128, return_sequences = True)(X_input)
    #LSTM(128, dropout=0.2, recurrent_dropout=0.2)
    #X = BatchNormalization()(X_input)
    #X = LSTM(128)(X_input)
    X = LSTM(128, dropout=0.2, recurrent_dropout=0.2)(X_input)
    X = BatchNormalization()(X)
    #X = Dense(128, activation = "tanh")(X)
    #X = BatchNormalization()(X)
    #X = TimeDistributed(Dense(encoding_dim, activation = "sigmoid"))(X)
    #X = Dense(64, activation = "sigmoid")(X)
    X = Dense(encoding_dim, activation = "tanh")(X)
    model = Model(inputs = X_input, outputs = X, name='auto_encoder_lstm')
    return model

EPOCHS = 10

assert x_train.shape[2] == ax_length
assert x_train.shape[1] == len(ROI_list)


encoder_model = auto_encoder_lstm((x_train.shape[1], x_train.shape[2]), y_train.shape[1])
#encoder_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['cosine_proximity'])
encoder_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

#from keras import losses
#encoder_model.compile(optimizer='adam', loss=losses.cosine_proximity, metrics=['cosine_proximity'])

train_history = encoder_model.fit(x=x_train, y=y_train, epochs=EPOCHS, callbacks=callbacks, batch_size=50, validation_data=(x_test, y_test))







(19380, 10, 1100)
17442
(19380, 2048)
(19380, 2048)
xtrain shape: (17442, 10, 1100)
ytrain shape: (17442, 2048)
xtest shape: (1938, 10, 1100)
ytest shape: (1938, 2048)
Train on 17442 samples, validate on 1938 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [37]:
encoder_model_file = 'encoder_model.h5'
encoder_model.save(encoder_model_file)
del encoder_model


In [40]:

encoder_model = load_model(encoder_model_file)
encoder_model.summary()


Model: "auto_encoder_lstm"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_13 (InputLayer)        [(None, 10, 1100)]        0         
_________________________________________________________________
lstm_12 (LSTM)               (None, 128)               629248    
_________________________________________________________________
batch_normalization_18 (Batc (None, 128)               512       
_________________________________________________________________
dense_16 (Dense)             (None, 2048)              264192    
Total params: 893,952
Trainable params: 893,696
Non-trainable params: 256
_________________________________________________________________


In [47]:
fmri_features = []
for sample in range(0, x_all.shape[0]):
    fmri_features.append(encoder_model.predict(np.expand_dims(x_all[sample, :, :], axis=0)))

print(len(fmri_features))
print(fmri_features[0].shape)

activations_all = np.concatenate(fmri_features)
print(activations_all.shape)




19380
(1, 2048)
(19380, 2048)


In [52]:
print(activations_all.shape)
fmriActivation = "fmriActivation"
fmriActivationFile = os.path.join(fmriActivation, "fmriActivations.npy")
!mkdir -p fmriActivation
with open(fmriActivationFile, 'wb') as f:
    pickle.dump(activations_all, f)



(19380, 2048)
[0.00991819 0.02032384 0.01259134 ... 0.0058254  0.00718123 0.00934707]


In [None]:
with open(imageActivationFile, 'wb') as f:
    y_train = pickle.load(f)

print(y_train.shape)


In [None]:
# Get activations from pretrained model and saved to file
activations = {x: pretrained_model.predict(X_images[x]) for x in sorted(X_images.keys())}


In [None]:
stList = {}
stimulusDirPath = os.path.join('images', 'BOLD5000_Stimuli', 'Scene_Stimuli', 'Presented_Stimuli')
print("stimulusDirPath: %s" % stimulusDirPath)

   
#data_split = {
#    "train": {
#        "participant_list": ["CSI1", "CSI2", "CSI3"],
#        "start_sess": 1,
#        "last_sess": 14,
#        "start_run": 1,
#        "last_run": 10
#    },
#    "dev": {
#        "participant_list": ["CSI1", "CSI2", "CSI3"],
#        "start_sess": 14,
#        "last_sess": 15,
#        "start_run": 1,
#        "last_run": 10
#    }
#}

data_split = {
    "train": {
        "participant_list": ["CSI1"],
        "start_sess": 1,
        "last_sess": 3,
        "start_run": 1,
        "last_run": 4
    },
    "dev": {
        "participant_list": ["CSI1"],
        "start_sess": 14,
        "last_sess": 15,
        "start_run": 1,
        "last_run": 2
    }
}
classes = {'ImageNet': 0, 'COCO': 1, 'Scene': 2}

# Get list of stimuli pictures shown in each session in each run
for data_type, items in data_split.items():
    stList[data_type] = {}
    for participant in items['participant_list']:
        
        # CS1 file are missing 1 after CSI
        if participant == "CSI1":
            CSI = "CSI"
        else:
            CSI = participant
        
        stList[data_type][participant] = {}
        for sNum in range(items['start_sess'], items['last_sess']):
            sSes = "sess" + str(sNum).zfill(2)
            stList[data_type][participant][sSes] = {}
            for rNum in range(items['start_run'], items['last_run']):
                sRun = "run" + str(rNum).zfill(2)
                dir_path = os.path.join("images","BOLD5000_Stimuli", "Stimuli_Presentation_Lists",participant, participant + "_" + sSes)
                #print(stimulusDirPath)
                stimulusListFilename = os.path.join(dir_path, "_".join([CSI, sSes, sRun]) + ".txt")
                #print(stimulusListFilename)
                with open(stimulusListFilename) as f:
                    stList[data_type][participant][sSes][sRun] = f.read().splitlines() 

            
x_images_path = {}
y_labels = {}
for data_type, participantDict in stList.items():
    x_images_path[data_type] = {}
    y_labels[data_type] = {}
    for participant, sessDict in participantDict.items(): 
        x_images_path[data_type][participant] = {}
        y_labels[data_type][participant] = {}
        for sess, runDict in sessDict.items():
            x_images_path[data_type][participant][sess] = {}
            y_labels[data_type][participant][sess] = {}
            for run, imageList in runDict.items():
                x_images_path[data_type][participant][sess][run] = []
                y_labels[data_type][participant][sess][run] = []
                #print("sess: %s, run: %s" %(sess, run))
                labelList = []
                for imageFileName in imageList:
                    for (currDir, _, fileList) in os.walk(stimulusDirPath):
                        currBaseDir = os.path.basename(currDir)
                        for filename in fileList:
                            if filename in imageFileName:
                                fullFilename = os.path.join(currDir, filename)
                                x_images_path[data_type][participant][sess][run].append(fullFilename)
                                # using directory path to determine class
                                labelList.append(classes.get(currDir.split('/')[-1]))
                                break
        
                y_labels[data_type][participant][sess][run] = np.reshape(np.asarray(labelList), (1, -1))

# Todo: normalize data
# x_train / 255.0, x_val/255.0, x_train/255.0

#print(x_images_path)
#print(y_labels["train"]["CSI1"]['sess01']['run01'].shape)
#print(y_labels["dev"]["CSI3"]['sess01']['run01'].shape)
#print(len(x_images_path["train"]["CSI1"]['sess01']['run02']))
print('done')

# Preprocess 
Compute feature vectors using pretrained imagenet-vgg-verydeep model

Feature vectors saved in file



In [None]:
content_layer = 'avgpool5'
stimuli_features_dir = 'stimulifeatures'
def unrollContentOutput(cOutput):
    m, n_H, n_W, n_C = cOutput.shape
    output = np.transpose(np.reshape(cOutput, (n_H * n_W, n_C)))
    return output

In [None]:
print("start time: %s" % datetime.now().strftime('%Y-%m-%dT%H:%M:%S'))


!mkdir -p stimulifeatures

tf.reset_default_graph()
#sess = tf.InteractiveSession()
#precompute content vectors from presented stimuli
#content_layer = 'conv4_2'
with tf.Session() as ts:
    vmodel = load_vgg_model("imagenet-vgg-verydeep-19.mat")
    for data_type, participantDict in x_images_path.items():
        for participant, sessDict in participantDict.items():
            for sess, runDict in sessDict.items():
                for run, imageList in runDict.items():
                    #x_content = {sess: {run: []}}
                    file_path= os.path.join(stimuli_features_dir, "_".join([participant, sess, run]) + ".npy")
                    if os.path.exists(file_path):
                        #print already computed, skip
                        continue

                    print("file_path: %s" % file_path)
                    print("participant: %s, sess: %s, run: %s" % (participant, sess, run))
                    contentList = []
                    for img_path in imageList:
                        #stImage = imread(cImage)
                        img = image.load_img(img_path, target_size=(375, 375))
                        x = image.img_to_array(img)
                        x = np.expand_dims(x, axis=0)
                        x = preprocess_input(x)
                        #print("img_path: %s" % img_path)
                        #print('Input image shape:', x.shape)
                        #img_array = img_to_array(img)
                        #stImage = imageio.imread(img_path)
                        #print("img_path: %s" % img_path)
                        #print(stImage.shape)
                        #stImage = reshape_and_normalize_image(stImage)
                        #stImage = np.reshape(stImage, (1, 375, 375, 3))
                        ts.run(vmodel['input'].assign(x))
                        #a_C = sess.run(vmodel)
                        out = vmodel[content_layer]
                        contentOut = ts.run(out)
                        contentList.append(unrollContentOutput(contentOut))
            
                    #x_content[sess][run] = np.asarray(contentList)
                    contentArray = np.asarray(contentList)
                    # shape is (35, 512, 144): num of pictures, channels, width*height
                    #print(x_content[sess][run].shape)
                    #x_content[sess][run].append(unrollContentOutput(contentOut))
        
                    #np.save(file_path, x_content)
                    np.save(file_path, contentArray)
                    #del x_content

print('done')
print("end time: %s" % datetime.now().strftime('%Y-%m-%dT%H:%M:%S'))

In [None]:
with tf.Session() as ts:
    vmodel = load_vgg_model("imagenet-vgg-verydeep-19.mat")
    img_path = './images/BOLD5000_Stimuli/Scene_Stimuli/Presented_Stimuli/ImageNet/n01833805_1411.JPEG'
    img = image.load_img(img_path, target_size=(375, 375))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    ts.run(vmodel['input'].assign(x))
    out = vmodel[content_layer]
    predictContentOut = ts.run(out)

# Model

In [None]:
num_classes = 3
VERSION = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
file_path = os.path.join('stimulifeatures', 'CSI2_sess01_run01.npy')

x_content = np.load(file_path, allow_pickle=True)
print(x_content.shape)

def dnn_classifier(input_shape, num_classes):
    X_input = Input(input_shape)
    X = Flatten()(X_input)
    X = Dense(64, activation='tanh')(X)
    #X = Dense(16, activation='tanh')(X)
    X = Dropout(0.2)(X)
    X = Dense(num_classes, activation='softmax')(X)
    model = Model(inputs = X_input, outputs = X, name='dnn_classifier')
    return model

def dnn_gap_classifier(input_shape, num_classes):
    X_input = Input(input_shape)
    X = GlobalAveragePooling1D(data_format='channels_first')(X_input)
    X = Dense(64, activation='relu')(X)
    X = Dropout(0.2)(X)
    #X = Activation('relu')(X)
    #X = Dropout(0.2)(X)
    X = Dense(num_classes, activation='softmax')(X)
    model = Model(inputs = X_input, outputs = X, name='dnn_classifier')
    return model


def auto_encoder(input_shape, encoding_dim):
    X_input = Input(input_shape)
    X = Dense(encoding_dim, activation='relu')(X_input)
    model = Model(inputs = X_input, outputs = X, name='dnn_classifier')
    return model

def cnn_classifier(input_shape, num_classes):
    X_input = Input(input_shape)
    X = Conv2D(32, (3, 3), padding='same')(X_input)
    X = Activation('relu')(X)
    X = Conv2D(32, (3, 3))(X)
    X = Activation('relu')(X)
    X = MaxPooling2D(pool_size=(2, 2))(X)
    X = Dropout(0.25)(X)
    X = Conv2D(64, (3, 3), padding='same')(X)
    X = Activation('relu')(X)
    X = Conv2D(64, (3, 3))(X)
    X = Activation('relu')(X)
    X = MaxPooling2D(pool_size=(2, 2))(X)
    X = Dropout(0.25)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D(pool_size=(2, 2))(X)
    X = Dropout(0.25)(X)
    X = Flatten()(X)
    X = Dense(512)(X)
    X = Activation('relu')(X)
    X = Dense(num_classes)(X)
    X = Activation('softmax')(X)
    model = Model(inputs = X_input, outputs = X, name='cnn_classifier')
    return model

#model = tf.keras.models.Sequential([
#    tf.keras.layers.Flatten(input_shape=[512, 144]),
#    tf.keras.layers.Dense(128, activation='relu'),
#    tf.keras.layers.Dropout(0.2),
#    tf.keras.layers.Dense(num_classes, activation='softmax')
#])

#model = tf.keras.models.Sequential([
#    tf.keras.layers.Conv2D(32, (3, 3), padding='same',
#                 input_shape=x_train.shape[1:]),
#    tf.keras.layers.Activation('relu'),
#    tf.keras.layers.Conv2D(32, (3, 3)),
#    tf.keras.layers.Activation('relu'),
#    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
#    tf.keras.layers.Dropout(0.25),
#    tf.keras.layers.Conv2D(64, (3, 3), padding='same'),
#    tf.keras.layers.Activation('relu'),
#    tf.keras.layers.Conv2D(64, (3, 3)),
#    tf.keras.layers.Activation('relu'),
#    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
#    tf.keras.layers.Dropout(0.25),
#    tf.keras.layers.Flatten(),
#    tf.keras.layers.Dense(512),
#    tf.keras.layers.Activation('relu'),
#    tf.keras.layers.Dense(num_classes),
#    tf.keras.layers.Activation('softmax')
#])


#input_shape=[512, 144]
input_shape = x_content.shape[1:]
print(input_shape)
#model = dnn_classifier(input_shape, num_classes)
model = dnn_gap_classifier(input_shape, num_classes)


In [None]:

initial_epoch = 0


In [None]:


def loadFeatureVector(file_path):
    return np.load(file_path, allow_pickle=True)
    
def featureVectorLoader(data_split, data_type):
    #every file has 35 feature vectors (one batch)   
    x_images = data_split.get(data_type, None)
    while True:
        for participant, sessDict in x_images.items():
            for sess, runDict in sessDict.items():
                for run in runDict.keys():
                    file_path= os.path.join(stimuli_features_dir, "_".join([participant, sess, run]) + ".npy")
                    X = loadFeatureVector(file_path)
                    Y = utils.to_categorical(np.transpose(y_labels[data_type][participant][sess][run]))
                    yield (X,Y)

EPOCHS=20
#callbacks
callbacks = [EarlyStopping(monitor='val_loss', patience=4),
             ModelCheckpoint(filepath='weights.{epoch:02d}.h5', monitor='val_loss', verbose=1)]

#callbacks = [ModelCheckpoint(filepath='weights.{epoch:02d}.h5', monitor='val_loss', verbose=1)]

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
#train_history = model.fit(x=x_train, y=y_train, epochs=EPOCHS, callbacks=callbacks, batch_size=35, validation_data=(x_test, y_test))
#train_history = model.fit(x=x_train, y=y_train, epochs=EPOCHS, callbacks=callbacks, batch_size=35, validation_data=(x_test, y_test))
#train_history = model.fit_generator(featureVectorLoader(x_images_path), steps_per_epoch=5, epochs=EPOCHS, callbacks=callbacks) 
#train_history = model.fit_generator(featureVectorLoader(x_images_path), steps_per_epoch=5, epochs=EPOCHS, callbacks=callbacks, validation_data=(x_test, y_test)) 

#steps_per_epoch = (last_sess - 1) * (last_run - 1)

numberOfSessions = data_split["train"]["last_sess"] - data_split["train"]["start_sess"]
numberOfRuns = data_split["train"]["last_run"] - data_split["train"]["start_run"]
numberOfParticipants = len(data_split["train"]["participant_list"])
steps_per_epoch = numberOfSessions * numberOfRuns * numberOfParticipants

numberOfSessions = data_split["dev"]["last_sess"] - data_split["dev"]["start_sess"]
numberOfRuns = data_split["dev"]["last_run"] - data_split["dev"]["start_run"]
numberOfParticipants = len(data_split["dev"]["participant_list"])
validation_steps = numberOfSessions * numberOfRuns * numberOfParticipants

print("Total number of training examples: %s" % (steps_per_epoch * 37))
print("Total number of dev examples: %s" % (validation_steps * 37))

print("steps_per_epoch: %s" % steps_per_epoch)
#train_history = model.fit_generator(featureVectorLoader(x_images_path), steps_per_epoch=350, epochs=EPOCHS, validation_data=(x_test, y_test)) 
#train_history = model.fit_generator(featureVectorLoader(x_images_path, "train"), steps_per_epoch=350, epochs=EPOCHS, validation_data=featureVectorLoader(x_images_path, "train"), validation_steps=350) 
train_history = model.fit_generator(featureVectorLoader(x_images_path, "train"), steps_per_epoch=steps_per_epoch, epochs=EPOCHS,
                                    callbacks=callbacks, validation_data=featureVectorLoader(x_images_path, "dev"),
                                    validation_steps=validation_steps) 
#train_history = model.fit_generator(featureVectorLoader(x_images_path, "train"), steps_per_epoch=steps_per_epoch, epochs=EPOCHS,
#                                    callbacks=callbacks, validation_data=(x_dev, y_dev))


loss = train_history.history['loss']
val_loss = train_history.history['val_loss']
plt.plot(loss)
plt.plot(val_loss)
plt.legend(['loss', 'val_loss'])
plt.show()



In [None]:
# load model
new_model = load_model('weights.20.h5')
new_model.summary()
print(new_model.get_weights()[0].shape)
print(new_model.get_weights()[1].shape)
print(new_model.get_weights()[2].shape)
print(new_model.get_weights()[3].shape)


In [None]:

N=50
arr1 = new_model.get_weights()[2][:,0]
indices1 = np.argsort(arr1, axis=0)[-N:]
arr2 = new_model.get_weights()[2][:,1]
indices2 = np.argsort(arr2, axis=0)[-N:]
arr3 = new_model.get_weights()[2][:,2]
indices3 = np.argsort(arr3, axis=0)[-N:]
#
print(indices1)
print(indices2)
print(indices3)

In [None]:
from collections import Counter
arr = new_model.get_weights()[0]
N=20
filter_select = []
for index_list in [indices1, indices2, indices3]:
    all_ind = []
    for index in index_list:
        indices = np.argsort(arr, axis=0)[-N:, index]
        sort_ind = np.sort(indices, axis=-1)
        all_ind.extend(list(sort_ind))
        #print(sort_ind)
        #plt.plot(sort_ind)
    a_ind = [key for key,_ in Counter(all_ind).most_common()][0:10]
    print(a_ind)
    filter_select.extend([item for item in a_ind if item not in filter_select])
    
print(filter_select)


In [None]:
#!pip install nibabel
import nibabel as nib
import re
fmri_data_dir = '/home/ubuntu/cs230Project/dataset/ds001499-download'
stimuli_features_dir = 'stimulifeatures'
fmriRegex = re.compile(r'^(.*?)_sess(.*?)_run(.*?).npy$')

# At the beginning and end of each run, a fixation cross was shown for 6 sec (3TORs) and
# 12 sec (6TORs), respectively. hence stIndex goes from 3:-6
# 37 images shows in each run >> 185 TOR
# Each image was presented for 1 sec followed by a 9 sec fixation cross (5TORs)
# For each stimuls, average assocated 5 TORs and map them
def loadFmriData(file_path):
    x_train = []
    epi_img = nib.load(file_path)
    img_data = epi_img.get_fdata()
    for stIndex in range(4,  img_data.shape[-1] - 5, 5):
        x_train.append(np.mean(img_data[:,:,:,stIndex:stIndex+5], axis=-1))

    x = np.asarray(x_train)
    #(37, 106, 106, 69)
    return x

def loadFmriLstmData(file_path):
    x_train = []
    epi_img = nib.load(file_path)
    img_data = epi_img.get_fdata()
    for stIndex in range(4,  img_data.shape[-1] - 5, 5):
        x_train.append(np.mean(img_data[:,:,:,stIndex:stIndex+5], axis=-1))

    x = np.asarray(x_train)
    #(37, 106, 106, 69) > (37, 69, 106*106)
    x = np.swapaxes(np.reshape(x, (37, -1, 69)), 1, 2)
    
    return x

def loadFilterVector(file_path, filterNumList):
    all_features = np.load(file_path, allow_pickle=True)
    features = []
    for filterNum in filterNumList:
        features.append(all_features[:, filterNum, :].T)
    
    ft = np.asarray(features)
    return ft.reshape(-1, 37).T

filterNumList = [452, 209, 327, 377, 33, 16, 433, 19, 66, 467]
data_split = x_images_path
data_type = "train"
x_images = data_split.get(data_type, None)
for participant, sessDict in x_images.items():
    for sess, runDict in sessDict.items():
        for run in runDict.keys():
            #fmri_data_path = os.path.join(fmri_data_dir, "sub-%s" % participant, "sess" "_".join([participant, sess, run]) + ".npy")
            feature_file_name = "_".join([participant, sess, run]) + ".npy"
            #sub-CSI3/ses-01/func
            # sub-CSI3_ses-09_task-5000scenes_run-05_bold.nii.gz
            match = fmriRegex.match(feature_file_name)
            if match:
                  fmri_file_name = "sub-%s_ses-%s_task-5000scenes_run-%s_bold.nii.gz" % ( match.group(1), match.group(2), match.group(3))
                  fmri_data_path = os.path.join(fmri_data_dir, "sub-%s" % match.group(1), "ses-%s" % match.group(2), "func", fmri_file_name)
                  print(fmri_data_path)
                
            feature_vector_path= os.path.join(stimuli_features_dir, feature_file_name)
            X = loadFmriLstmData(fmri_data_path)
            Y = loadFilterVector(feature_vector_path, filterNumList)
            print(X.shape)
            print(Y.shape)
            break

                                                      

In [None]:

ROI_list = [
    'X_LHPPA.npy', #(19380, 5, 100)
    'X_RHLOC.npy', #(19380, 5, 170)
    'X_LHLOC.npy', #(19380, 5, 130)
    'X_RHEarlyVis.npy', #(19380, 5, 220)
    'X_RHRSC.npy', #(19380, 5, 100)
    'X_RHOPA.npy', #(19380, 5, 80)
    'X_RHPPA.npy', #(19380, 5, 140)
    'X_LHEarlyVis.npy', #(19380, 5, 190)
    'X_LHRSC.npy', #(19380, 5, 30)
    'X_LHOPA.npy', #(19380, 5, 70)
]

#x_all (19380, 5, 1230)
#y_all shape (19380, 17)

#one way is to concatenate last dimenesion and just use 5 time series
# so lstm input would be 
# other way is to train each roi seperately to encode to feature vector. and then inout feature vectoers to classify
# or input roi as LSTM nodes to get one feature vector

train_folder = '/home/ubuntu/cs230Project/dataset/traindata'
array_list = []
for roc_file in ROI_list:
    xt_file_path = os.path.join(train_folder, roc_file)
    xtrain_n = np.load(xt_file_path)
    array_list.append(xtrain_n)
    #print(xtrain_n.shape)

#all_x = np.asarray(array_list)
#print(all_x.shape)
x_all = np.dstack(array_list)
print(x_all.shape)

yt_file_path = os.path.join(train_folder, 'Yreal_all.npy')
y_all = np.load(yt_file_path)

num_classes = y_all.shape[1]

x_train = x_all[0:18380, :, :]
y_train = y_all[0:18380, :]
x_test = x_all[18380:, :, :]
y_test = y_all[18380:, :]

def classifer_lstm(input_shape, num_classes):
    X_input = Input(input_shape)
    X = LSTM(512, dropout=0.2)(X_input)
    X = Dense(64, activation='relu')(X)
    X = Dropout(0.2)(X)
    X = Dense(num_classes, activation = "softmax")(X)
    model = Model(inputs = X_input, outputs = X, name='auto_encoder_lstm')
    return model

Tx = x_train.shape[1]
Voxels = x_train.shape[2]
classifier_lstm = classifer_lstm((Tx, Voxels), num_classes)

EPOCHS=100
#callbacks
#callbacks = [EarlyStopping(monitor='val_loss', patience=2),
#             ModelCheckpoint(filepath='weights.{epoch:02d}.h5', monitor='val_loss', verbose=1)]

#callbacks = [ModelCheckpoint(filepath='weights.{epoch:02d}.h5', monitor='val_loss', verbose=1)]
classifier_lstm.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#train_history = classifier_lstm.fit(x=x_train, y=y_train, epochs=EPOCHS, callbacks=callbacks, batch_size=35, validation_data=(x_test, y_test))
train_history = classifier_lstm.fit(x=x_train, y=y_train, epochs=EPOCHS, batch_size=35, validation_data=(x_test, y_test))





In [None]:
print(array_list[0].shape)
print(array_list[1].shape)
test = np.dstack((array_list[0],array_list[1]))
print(test.shape)

In [None]:
# Train a single layer neural network (one network per filter) to map fmri data to above filters
# input: X of shape (106, 106, 69, 194) 
# output: Y of shape (144, 1) image features on specific filters
fmriRegex = re.compile(r'^(.*?)_sess(.*?)_run(.*?).npy$')

fmri_data_dir = '/home/ubuntu/cs230Project/dataset/ds001499-download'
stimuli_features_dir = 'stimulifeatures'

def loadFilterVector(file_path, filterNumList):
    all_features = np.load(file_path, allow_pickle=True)
    #features = []
    #for filterNum in filterNumList:
    #    features.append(all_features[:, filterNum, :].T)
    
    #ft = np.asarray(features)
    #return ft.reshape(-1, 37).T
    #return all_features[:, 377, :]
    feat_sel = all_features[:, filterNumList, :]
    sel_shape = feat_sel.shape[0]
    return feat_sel.reshape(sel_shape, -1)

# At the beginning and end of each run, a fixation cross was shown for 6 sec (3TORs) and
# 12 sec (6TORs), respectively. hence stIndex goes from 3:-6
# 37 images shows in each run >> 185 TOR
# Each image was presented for 1 sec followed by a 9 sec fixation cross (5TORs)
# For each stimuls, average assocated 5 TORs and map them
def loadFmriData(file_path):
    x_train = []
    epi_img = nib.load(file_path)
    img_data = epi_img.get_fdata()
    for stIndex in range(4, img_data.shape[-1] - 5, 5):
        #x_train.append(np.mean(img_data[:,:,:,stIndex:stIndex+5], axis=-1))
        x_train.append((img_data[:,:,:,stIndex+3]))

    x = np.asarray(x_train)
    return x

def loadFmriLstmData(file_path):
    x_train = []
    epi_img = nib.load(file_path)
    img_data = epi_img.get_fdata()
    for stIndex in range(4,  img_data.shape[-1] - 5, 5):
        x_train.append(np.mean(img_data[:,:,:,stIndex:stIndex+5], axis=-1))

    x = np.asarray(x_train)
    #(37, 106, 106, 69) > (37, 69, 106*106)
    x = np.swapaxes(np.reshape(x, (37, -1, 69)), 1, 2)
    
    return x

def featureVectorLoader(data_split, data_type, filterNum):
    #every file has 35 feature vectors (one batch)
    L = len(fileList)   
    x_images = data_split.get(data_type, None)
    while True:
        for participant, sessDict in x_images.items():
            for sess, runDict in sessDict.items():
                for run in runDict.keys():
                    #fmri_data_path = os.path.join(fmri_data_dir, "sub-%s" % participant, "sess" "_".join([participant, sess, run]) + ".npy")
                    feature_file_name = "_".join([participant, sess, run]) + ".npy"
                    #sub-CSI3/ses-01/func
                    # sub-CSI3_ses-09_task-5000scenes_run-05_bold.nii.gz
                    match = fmriRegex.match(feature_file_name)
                    if match:
                        fmri_file_name = "sub-%s_ses-%s_task-5000scenes_run-%s_bold.nii.gz" % ( match.group(1), match.group(2), match.group(3))
                        fmri_data_path = os.path.join(fmri_data_dir, "sub-%s" % match.group(1), "ses-%s" % match.group(2), "func", fmri_file_name)
                
                    feature_vector_path= os.path.join(stimuli_features_dir, feature_file_name)
                    X = loadFmriLstmData(fmri_data_path)
                    Y = loadFilterVector(feature_vector_path, filterNum)
                    yield (X,Y)


# for each y, we have 
def auto_encoder(input_shape, encoding_dim):
    X_input = Input(input_shape)
    #X = Conv2D(2, (5,5), activation='relu')(X_input)
    #X = MaxPooling2D(pool_size=(2, 2))(X)
    #X = Dropout(0.25)(X)
    X = Conv2D(4, (1,1), activation='tanh')(X_input)
    X = Flatten()(X)
    X = Dense(64, activation='relu')(X)
    X = Dropout(0.4)(X)
    X = Dense(encoding_dim, activation='relu')(X)
    model = Model(inputs = X_input, outputs = X, name='auto_encoder')
    return model

# for each y, we have 
def auto_encoder_lstm(input_shape, encoding_dim):
    X_input = Input(input_shape)
    #X = LSTM(units = 128, return_sequences = True)(X_input)
    #LSTM(128, dropout=0.2, recurrent_dropout=0.2)
    X = LSTM(128)(X_input)
    #X = TimeDistributed(Dense(encoding_dim, activation = "sigmoid"))(X)
    X = Dense(encoding_dim, activation = "sigmoid")(X)
    model = Model(inputs = X_input, outputs = X, name='auto_encoder_lstm')
    return model

EPOCHS = 100
filterNumList = [452, 209, 327, 377, 33, 16, 433, 19, 66, 467]
#filterNumList = [377]
numberOfSessions = data_split["train"]["last_sess"] - data_split["train"]["start_sess"]
numberOfRuns = data_split["train"]["last_run"] - data_split["train"]["start_run"]
numberOfParticipants = len(data_split["train"]["participant_list"])
steps_per_epoch = numberOfSessions * numberOfRuns * numberOfParticipants

numberOfSessions = data_split["dev"]["last_sess"] - data_split["dev"]["start_sess"]
numberOfRuns = data_split["dev"]["last_run"] - data_split["dev"]["start_run"]
numberOfParticipants = len(data_split["dev"]["participant_list"])
validation_steps = numberOfSessions * numberOfRuns * numberOfParticipants

encoder_model = auto_encoder_lstm((69, 106*106), len(filterNumList)*144)
#encoder_model = auto_encoder((106,106,69), len(filterNumList)*144)
#cosine_proximity
#encoder_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
encoder_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
train_history = encoder_model.fit_generator(featureVectorLoader(x_images_path, "train", filterNumList), steps_per_epoch=steps_per_epoch, epochs=EPOCHS,
                                    validation_data=featureVectorLoader(x_images_path, "dev", filterNumList),
                                    validation_steps=validation_steps) 

# Predict

In [None]:

x = unrollContentOutput(predictContentOut)
x = np.expand_dims(x, axis=0)
print('Input image shape:', x.shape)
print(model.predict(x))

model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


# Load processed fmri data

In [None]:
num_classes = 4

def dnn_classifier(input_shape, num_classes):
    X_input = Input(input_shape)
    X = Flatten()(X_input)
    X = Dense(512, activation='tanh')(X)
    X = Dense(128, activation='tanh')(X)
    X = Dense(num_classes, activation='softmax')(X)
    model = Model(inputs = X_input, outputs = X, name='dnn_classifier')
    return model

input_shape = x_content.shape[1:]
model2 = dnn_classifier(input_shape, num_classes)

EPOCHS=100
#callbacks
#callbacks = [EarlyStopping(monitor='val_loss', patience=2),
#             ModelCheckpoint(filepath='weights.{epoch:02d}.h5', monitor='val_loss', verbose=1)]

callbacks = [ModelCheckpoint(filepath='weights.{epoch:02d}.h5', monitor='val_loss', verbose=1)]

model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#train_history = model.fit(x=x_train, y=y_train, epochs=EPOCHS, callbacks=callbacks, batch_size=35, validation_data=(x_test, y_test))
#train_history = model.fit(x=x_train, y=y_train, epochs=EPOCHS, callbacks=callbacks, batch_size=35, validation_data=(x_test, y_test))
#train_history = model.fit_generator(featureVectorLoader(x_images_path), steps_per_epoch=5, epochs=EPOCHS, callbacks=callbacks) 
#train_history = model.fit_generator(featureVectorLoader(x_images_path), steps_per_epoch=5, epochs=EPOCHS, callbacks=callbacks, validation_data=(x_test, y_test)) 

#steps_per_epoch = (last_sess - 1) * (last_run - 1)

numberOfSessions = data_split["train"]["last_sess"] - data_split["train"]["start_sess"]
numberOfRuns = data_split["train"]["last_run"] - data_split["train"]["start_run"]
numberOfParticipants = len(data_split["train"]["participant_list"])
steps_per_epoch = numberOfSessions * numberOfRuns * numberOfParticipants

numberOfSessions = data_split["dev"]["last_sess"] - data_split["dev"]["start_sess"]
numberOfRuns = data_split["dev"]["last_run"] - data_split["dev"]["start_run"]
numberOfParticipants = len(data_split["dev"]["participant_list"])
validation_steps = numberOfSessions * numberOfRuns * numberOfParticipants


print("Total number of training examples: %s" % (steps_per_epoch * 37))
print("Total number of dev examples: %s" % (validation_steps * 37))

print("steps_per_epoch: %s" % steps_per_epoch)
#train_history = model.fit_generator(featureVectorLoader(x_images_path), steps_per_epoch=350, epochs=EPOCHS, validation_data=(x_test, y_test)) 
#train_history = model.fit_generator(featureVectorLoader(x_images_path, "train"), steps_per_epoch=350, epochs=EPOCHS, validation_data=featureVectorLoader(x_images_path, "train"), validation_steps=350) 
#train_history = model.fit_generator(featureVectorLoader(x_images_path, "train"), steps_per_epoch=steps_per_epoch, epochs=EPOCHS,
#                                    callbacks=callbacks, validation_data=featureVectorLoader(x_images_path, "dev"),
#                                    validation_steps=validation_steps) 
train_history = model2.fit(x=x_train, y=y_train, epochs=EPOCHS, callbacks=callbacks, batch_size=35, validation_data=(x_test, y_test))





loss = train_history.history['loss']
val_loss = train_history.history['val_loss']
plt.plot(loss)
plt.plot(val_loss)
plt.legend(['loss', 'val_loss'])
plt.show()
