In [None]:
import csv
import numpy as np
import pandas as pd
from scipy import stats

import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
import random
import numpy as np
import json

from pycocotools.coco import COCO

from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler

In [None]:
labels_path = '/data/shared_1000_all_labels_matrix.csv'
labels_data = pd.read_csv(labels_path)
food_cocoIds = list(labels_data[labels_data['food'] == 1].cocoId)

In [None]:
#Filenames 

#path to mask of relevant hcp regions
hcp_base_path = '' 

#path to food v all
food_v_all_base_path = '' 

#path to preprocessed voxels (stimulus x cortical voxels)
voxels_base_path = ''

#path to stimulus cocoids in same order as voxels_base
stim_base_path = ''




# Get PCA input matrix

In [None]:
def getFoodVoxelsIndices(subj):
    #path to HCP mask
    path = hcp_base_path.format(subj)
    voxels = np.load(path)
    indices = voxels.nonzero()

    #path to calculated food vs all
    path2 = food_v_all_base_path.format(subj)
    voxels2 = np.load(path2)
    indices2 = np.where(voxels2<=0.05)

    indices0_set = set(indices[0].tolist())
    indices2_set = set(indices2[0].tolist())
    intersec_indices = list(indices0_set.intersection(indices2_set))
    return intersec_indices

In [None]:
def getFoodImagesIndices(subj):
    stimulus_coco_ids_subj_path = stim_base_path.format(subj)
    stims = np.load(stimulus_coco_ids_subj_path)
    stims_dict = {}
    counter = 0
    for stim in stims:
        stims_dict[stim] = counter
        counter += 1
    
    img_indices = []
    for cocoId in food_cocoIds:
        img_indices.append(stims_dict[cocoId])
        
    return img_indices

In [None]:
def getImgByFoodVoxMatrix(subj):
    voxels_path = voxels_base_path.format(subj)
    voxels = np.load(voxels_path)
    
    food_voxel_inds = getFoodVoxelsIndices(subj)
    img_inds = getFoodImagesIndices(subj)
    return voxels[img_inds, :][:, food_voxel_inds]

In [None]:
subj_matrices = []
for subj in range(1,9):
    subj_matrix = getImgByFoodVoxMatrix(subj)
    subj_matrices.append(subj_matrix)
    print("got food by vox matrix for subj: ", subj, subj_matrix.shape)

all_subjs_matrix = np.hstack(subj_matrices)

got food by vox matrix for subj:  1 (108, 567)


# Run PCA

In [None]:
def getPCA():
    data_rescaled = stats.zscore(all_subjs_matrix.T)
    pca = PCA(n_components = 10)
    pca.fit(data_rescaled)
    pca_test = pca.transform(data_rescaled)
    return pca, pca_test

In [None]:
pca_axis, pca_transform = getPCA()

# Visualize Top Imgs for PCs

In [None]:
#coco_datadir
dataDir = ''

#coco_datatype
dataType = ''
annFile = '{}/instances_{}.json'.format(dataDir, dataType)

In [None]:
coco = COCO(annFile)

loading annotations into memory...
Done (t=18.17s)
creating index...
index created!


In [None]:
def displayCOCOImg(coco_ids):
    for coco_id in coco_ids:
        img = coco.loadImgs(coco_id)
        if img is None:
            print(coco_id, " cocoid invalid")
        img = img[0]
        I = io.imread(img['coco_url'])
        plt.imshow(I); 
        plt.axis('off')
        plt.show()

In [None]:
def displayTopImagesIdxForPC(pc, num_img):
    abs_pc = pca_axis.components_[pc]
    res = np.argpartition(abs_pc, -num_img)[-num_img:] #max pc scores indices
    ids =  res[np.argsort(abs_pc[res])][::-1] #order ids by value
    displayCOCOImg([int(food_cocoIds[i]) for i in ids])
    print("done")

In [None]:
def displayMinImagesIdxForPC(pc, num_img):
    abs_pc = -1 * pca_axis.components_[pc]
    res = np.argpartition(abs_pc, -num_img)[-num_img:]
    ids =  res[np.argsort(abs_pc[res])][::-1]
    displayCOCOImg([int(food_cocoIds[i]) for i in ids])
    print("done")