In [4]:
# Imports
import numpy as np
import h5py
import pandas as pd
import scipy.io as sio
import csv
import os
import sys

sys.path.append("../") # go to parent dir

from mrcode.utils.network_utils import delete_hf5_elements, create_hf5_test

# Creating image semantics in correct order for all subjects

In [8]:
# Importing the h5 file
hf = h5py.File('../data/pred_image_semantics/test.h5', 'r')
keys1 = list(hf.keys())
len(keys1)

345

In [114]:
# Get image ids
def getImageIds(path):
    imageids = []
    with open(path) as f:
        reader = csv.reader(f, delimiter = '\t')
        for row in reader:
            if row[2] != 'image_id':
                imageids.append(row[2])
    return imageids  

def getImageSemantics(imageids):
    imagesVectors = []
    for i in imageids:
        # Get the data
        image = hf.get(i)
        imageSemantics = np.array(image)
        # Append to the list
        imagesVectors.append(imageSemantics)

    npImages = np.asarray(imagesVectors)
    npImagesTrans = npImages.transpose()
    
    return npImagesTrans

In [115]:
# Creating image semantics in the correct order for all the experiments in the below folder
rootdir = 'C:/Users/Dea/Documents/GitHub/Project-MindReading/data/experiment_data'

for subdir, dirs, files in os.walk(rootdir):
    # checking if the image sematics is already created and if image order exists
    if ('image_semantics.mat' not in files) & ('image_order.txt' in files):
        print('Creating image semantics for ' + subdir)
        path = subdir + '/image_order.txt'
        # 'C:/Users/Dea/Documents/GitHub/Project-MindReading/data/experiment_data/exp1/image_order.txt'
        # Getting the image ids
        imageids = getImageIds(path)
        # Collecting the image semantics
        image_semantics = getImageSemantics(imageids)
        # Save the image semantics
        save_path = subdir + '/image_semantics.mat'
        sio.savemat(save_path, {'image_semantics':image_semantics})

Creating image semantics for C:/Users/Dea/Documents/GitHub/Project-MindReading/data/experiment_data\dryelectrodesGreta
Creating image semantics for C:/Users/Dea/Documents/GitHub/Project-MindReading/data/experiment_data\dryelectrodesJeppe
Creating image semantics for C:/Users/Dea/Documents/GitHub/Project-MindReading/data/experiment_data\dryelectrodesNicolai
Creating image semantics for C:/Users/Dea/Documents/GitHub/Project-MindReading/data/experiment_data\dryelectrodesNicolaiNew
Creating image semantics for C:/Users/Dea/Documents/GitHub/Project-MindReading/data/experiment_data\dryelectrodesPaolo
Creating image semantics for C:/Users/Dea/Documents/GitHub/Project-MindReading/data/experiment_data\exp1
Creating image semantics for C:/Users/Dea/Documents/GitHub/Project-MindReading/data/experiment_data\exp10
Creating image semantics for C:/Users/Dea/Documents/GitHub/Project-MindReading/data/experiment_data\exp11
Creating image semantics for C:/Users/Dea/Documents/GitHub/Project-MindReading/da

## Initial code for extracting image semantics for one subject

In [48]:
# loading a csv file with imageids
imageids = []
with open('data/imageids_orders/imageorder_sorted_Jeppe_Paolo.csv', 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        #print(row[0][-16:-4])
        imageids.append(row[0][-16:-4])

In [57]:
# loading text file
imageids = []
with open('data/imageids_orders/imageorder_Jeppe.txt') as f:
    reader = csv.reader(f)
    for row in reader:
        if row[0] != 'image_id':
            imageids.append(row[0])

In [52]:
imageids

['000000005344',
 '000000139130',
 '000000081363',
 '000000038890',
 '000000038628',
 '000000150360',
 '000000195137',
 '000000530855',
 '000000156396',
 '000000356368',
 '000000261344',
 '000000161569',
 '000000157886',
 '000000194551',
 '000000029732',
 '000000323494',
 '000000202298',
 '000000315905',
 '000000084488',
 '000000557220',
 '000000101684',
 '000000576327',
 '000000014377',
 '000000139675',
 '000000200143',
 '000000116792',
 '000000298979',
 '000000446459',
 '000000101657',
 '000000152108',
 '000000049946',
 '000000561529',
 '000000389669',
 '000000068697',
 '000000468363',
 '000000277543',
 '000000473919',
 '000000552876',
 '000000151053',
 '000000216206',
 '000000194956',
 '000000374156',
 '000000515006',
 '000000171315',
 '000000061351',
 '000000400872',
 '000000044467',
 '000000271522',
 '000000221561',
 '000000328888',
 '000000564940',
 '000000538149',
 '000000186336',
 '000000190216',
 '000000338419',
 '000000233780',
 '000000131060',
 '000000575496',
 '000000170980

In [1]:
# importing the h5 file
hf = h5py.File('data/train.h5', 'r')
keys = list(hf.keys())
len(keys)

In [2]:
with h5py.File('data/train.h5',  "a") as f:
    for i in imageids:
        del f[i]

### Create a dataframe with all the images in the experiment and their image semantic vectors

In [53]:
imagesVectors = []
for i in imageids:
    # Get the data
    image = hf.get(i)
    imageSemantics = np.array(image)
    # Append to the list
    imagesVectors.append(imageSemantics)

In [54]:
npImages = np.asarray(imagesVectors)

In [55]:
npImagesTrans = npImages.transpose()

### Make a matrix that matches the matrix from Matlab

In [56]:
sio.savemat('image_semantics_Jeppe.mat', {'image_semantics':npImagesTrans})

## Test why test imsem is not giving nice DM plot in matlab

In [3]:
image_stats = pd.read_csv('../data/our_image_statistics.csv', dtype={'image_id': np.str})

In [4]:
image_stats.head()

Unnamed: 0,image_id,brightness,rms_contrast,supercategories,categories,ratio
0,2061,138.172037,57.002787,furniture,toilet,0.75
1,2342,135.688587,58.412852,animal,giraffe,0.75
2,3084,120.102751,48.705221,indoor,clock,0.75
3,5154,116.898115,45.521527,furniture,bed,0.75
4,5174,119.665706,62.867596,vehicle,bus,0.75


In [12]:
def getImageSemantics(imageids):
    imagesVectors = []
    for i in imageids:
        # Get the data
        image = hf.get(i)
        imageSemantics = np.array(image)
        # Append to the list
        imagesVectors.append(imageSemantics)

    npImages = np.asarray(imagesVectors)
    npImagesTrans = npImages.transpose()
    
    return npImagesTrans

In [13]:
# Importing the h5 file
hf = h5py.File('../data/pred_image_semantics/test.h5', 'r')
keys = list(hf.keys())
len(keys)

690

In [56]:
new_list = []
for imageid in keys:
    index1 = image_stats.index[image_stats['image_id'] == imageid].tolist()[0]
    category = image_stats.iloc[index1]['categories']
    new_list.append([imageid, category])

In [57]:
df = pd.DataFrame.from_records(new_list, columns=['imageid','category'])

In [58]:
df.head()

Unnamed: 0,imageid,category
0,2061,toilet
1,2342,giraffe
2,3084,clock
3,5154,bed
4,5174,bus


In [59]:
new_df = df.sort_values('category')

In [60]:
new_keys = new_df['imageid'].values

In [61]:
image_sem_test = getImageSemantics(new_keys)

In [62]:
image_sem_test.shape

(2048, 690)

In [63]:
sio.savemat('../data/pred_image_semantics/imsem_test.mat', {'image_semantics_test':image_sem_test})

In [101]:
# Importing the h5 file
hf = h5py.File('../data/pred_image_semantics/test_ridge.h5', 'r')
keys = list(hf.keys())
len(keys)

345

In [102]:
new_list = []
for imageid in keys:
    index1 = image_stats.index[image_stats['image_id'] == imageid].tolist()[0]
    category = image_stats.iloc[index1]['categories']
    new_list.append([imageid, category])

In [103]:
df = pd.DataFrame.from_records(new_list, columns=['imageid','category'])

In [104]:
df.head()

Unnamed: 0,imageid,category
0,2061,toilet
1,3084,clock
2,5174,bus
3,8218,clock
4,14686,giraffe


In [105]:
new_df = df.sort_values('category')

In [106]:
new_keys = new_df['imageid'].values

In [107]:
new_keys

array(['000000438704', '000000042893', '000000105096', '000000052498',
       '000000178870', '000000572251', '000000448358', '000000202943',
       '000000270136', '000000135790', '000000084875', '000000069693',
       '000000216115', '000000527510', '000000408965', '000000300123',
       '000000096481', '000000220306', '000000445276', '000000405226',
       '000000437426', '000000436578', '000000521256', '000000371183',
       '000000383413', '000000172036', '000000127110', '000000201308',
       '000000490475', '000000387355', '000000482989', '000000521560',
       '000000205875', '000000109995', '000000105881', '000000162892',
       '000000467206', '000000460901', '000000435787', '000000434394',
       '000000259760', '000000015386', '000000027591', '000000078663',
       '000000406226', '000000153136', '000000399091', '000000164583',
       '000000170960', '000000465092', '000000112805', '000000461278',
       '000000333106', '000000330564', '000000190896', '000000474713',
      

In [71]:
image_sem_test_ridge = getImageSemantics(new_keys)

In [72]:
image_sem_test_ridge.shape

(2048, 345)

In [73]:
sio.savemat('../data/pred_image_semantics/imsem_test_ridge.mat', {'image_semantics_test_ridge':image_sem_test_ridge})

In [143]:
X_test = np.load('../data/pred_image_semantics/X_test.npy')
Y_test = np.load('../data/pred_image_semantics/Y_test.npy')
imageids_test = np.load('../data/pred_image_semantics/imageids_test.npy')

In [109]:
Y_test.shape

(345, 2048)

In [110]:
new_list = []
for imageid in imageids_test:
    index1 = image_stats.index[image_stats['image_id'] == imageid].tolist()[0]
    category = image_stats.iloc[index1]['categories']
    new_list.append([imageid, category])

In [111]:
df = pd.DataFrame.from_records(new_list, columns=['imageid','category'])

In [112]:
new_df = df.sort_values('category')

In [113]:
new_keys = new_df['imageid'].values

In [142]:
new_keys

array(['000000438704', '000000042893', '000000105096', '000000052498',
       '000000178870', '000000572251', '000000448358', '000000202943',
       '000000270136', '000000135790', '000000084875', '000000069693',
       '000000216115', '000000527510', '000000408965', '000000300123',
       '000000096481', '000000220306', '000000445276', '000000405226',
       '000000437426', '000000436578', '000000521256', '000000371183',
       '000000383413', '000000172036', '000000127110', '000000201308',
       '000000490475', '000000387355', '000000482989', '000000521560',
       '000000205875', '000000109995', '000000105881', '000000162892',
       '000000467206', '000000460901', '000000435787', '000000434394',
       '000000259760', '000000015386', '000000027591', '000000078663',
       '000000406226', '000000153136', '000000399091', '000000164583',
       '000000170960', '000000465092', '000000112805', '000000461278',
       '000000333106', '000000330564', '000000190896', '000000474713',
      

In [95]:
image_sem_test_ridge = getImageSemantics(new_keys)

In [96]:
image_sem_test_ridge.shape

(2048, 345)

In [98]:
sio.savemat('../data/pred_image_semantics/imsem_test_ridge_new.mat', {'image_semantics_test_ridge_new':image_sem_test_ridge})

In [144]:
# Importing the h5 file
hf = h5py.File('../data/pred_image_semantics/pred_ridge.h5', 'r')
keys = list(hf.keys())
len(keys)

345

In [145]:
new_list = []
for imageid in keys:
    index1 = image_stats.index[image_stats['image_id'] == imageid].tolist()[0]
    category = image_stats.iloc[index1]['categories']
    new_list.append([imageid, category])

In [146]:
df = pd.DataFrame.from_records(new_list, columns=['imageid','category'])

In [147]:
df.head()

Unnamed: 0,imageid,category
0,2061,toilet
1,3084,clock
2,5174,bus
3,8218,clock
4,14686,giraffe


In [148]:
new_df = df.sort_values('category')

In [149]:
new_keys = new_df['imageid'].values

In [139]:
image_sem_pred_ridge = getImageSemantics(new_keys)

In [140]:
image_sem_pred_ridge.shape

(2048, 345)

In [141]:
sio.savemat('../data/pred_image_semantics/imsem_pred_ridge.mat', {'image_semantics_pred_ridge':image_sem_pred_ridge})

# Make matlab file of train set and test_ridge set and pred_ridge

In [5]:
image_stats = pd.read_csv('../data/our_image_statistics.csv', dtype={'image_id': np.str})

In [6]:
def getImageSemantics(imageids):
    imagesVectors = []
    for i in imageids:
        # Get the data
        image = hf.get(i)
        imageSemantics = np.array(image)
        # Append to the list
        imagesVectors.append(imageSemantics)

    npImages = np.asarray(imagesVectors)
    npImagesTrans = npImages.transpose()
    
    return npImagesTrans

In [5]:
#X_train = np.load('../data/pred_image_semantics/X_trainVal.npy')
Y_train = np.load('../data/pred_image_semantics/Y_trainVal.npy')
imageids_train = np.load('../data/pred_image_semantics/imageids_trainVal.npy')

In [7]:
new_list = []
for imageid in imageids_train:
    index1 = image_stats.index[image_stats['image_id'] == imageid].tolist()[0]
    category = image_stats.iloc[index1]['categories']
    new_list.append([imageid, category])

In [8]:
df = pd.DataFrame.from_records(new_list, columns=['imageid','category'])

In [9]:
new_df = df.sort_values('category')

In [14]:
new_keys = new_df['imageid'].values

In [15]:
len(new_keys)

9660

In [16]:
image_sem_train = getImageSemantics(new_keys)

In [17]:
sio.savemat('../data/pred_image_semantics/imsem_train.mat', {'image_semantics_train':image_sem_train})

In [15]:
# Importing the h5 file
hf = h5py.File('../data/pred_image_semantics/pred_ridge.h5', 'r')
keys = list(hf.keys())
len(keys)

345

In [16]:
new_list = []
for imageid in keys:
    index1 = image_stats.index[image_stats['image_id'] == imageid].tolist()[0]
    category = image_stats.iloc[index1]['categories']
    new_list.append([imageid, category])

In [17]:
df = pd.DataFrame.from_records(new_list, columns=['imageid','category'])

In [18]:
new_df = df.sort_values('category')

In [19]:
new_keys = new_df['imageid'].values

In [20]:
len(new_keys)

345

In [21]:
imsem_test = getImageSemantics(new_keys)

In [22]:
sio.savemat('../data/pred_image_semantics/imsem_pred.mat', {'pred_imsem':imsem_test})