In [6]:
### Packages import
import os
import gc
import time
from time import strftime
import sys
start_time = time.time()

import numpy as np
import pandas as pd

import torch
from torchvision import models
from src.cuda_checker import cuda_torch_check, memory_checker

### My modules import
from src.data_loader import argObj, data_loaders_stimuli_fmri
from src import image_preprocessing
from src.feature_extraction import model_loader, fit_pca, pca_batch_calculator, extract_and_pca_features, extract_features_no_pca
from src.encoding import linear_regression
from src.evaluation_metrics import median_squared_noisenorm_correlation
from src.visualize import histogram, box_plot, noise_norm_corr_ROI, final_subj_corr_dataframe_boxplot_istograms

### Cuda setup and check
import torch

run_mode = 'config'

In [7]:
if run_mode == 'config':
    # Select the device to run the model on
    device = 'cuda' #@param ['cpu', 'cuda'] {allow-input: true}
    # Check if cuda is available
    device = torch.device(device)
    cuda_torch_check()

    ### Parameters definition
    train_percentage = 90 # X% of the training data will be used for training, (100-X)% for validation
    transform = image_preprocessing.imagenet_transform_alt

    batch_size = 64
    
    compute_pca = True
    pca_component = 100
    min_pca_batch_size = pca_component + 300 # pca_component * 2
    
    # feature_model_type = "alexnet" #@param ["alexnet", "ZFNet", "resnet50", "vgg16","vgg19_bn" , "efficientnetb2", "efficientnetb2lib"]
    # model_layers = ["features.2", "features.12"] 

    # create a dictionary to store the model layers
    # ["alexnet", "ZFNet", "resnet50", "vgg16","vgg19_bn" , "efficientnetb2", "efficientnetb2lib"]
    
    combining_mode = 'concat' #@param ["single", "concat", "mixed"]
    test_models_layers = {'alexnet': ['features.4', 'features.11'],
                            'ZFNet': ['features.4', 'features.11']}
    test_the_layers = True #@param ["True", "False"] 

    config_file = 'config.json'

    save = False 

    regression_type = "ridge" #@param ["linear", "ridge"]
    grid_search = True
    alpha_l = 1e5
    alpha_r = 1e5
    

    ### Path definition
    model_layer_full = '_'.join([
        '{}_{}'.format(model.upper(), '+'.join(layers)) 
        for model, layers in test_models_layers.items()
    ])

    datetime_id = strftime("(%Y-%m-%d_%H-%M)")
    submission_name = f'{strftime("(%Y-%m-%d_%H-%M)")}-{model_layer_full}-PCA_{pca_component}-{regression_type.upper()}-ALPHA_{"{:.1e}".format(alpha_l)}'

    ### Path definition
    # Data folder definition
    data_home_dir = '../Datasets/Biomedical'
    data_dir = '../Datasets/Biomedical/algonauts_2023_challenge_data'
    ncsnr_dir = '../Datasets/Biomedical/algonauts_ncsnr'
    images_trials_dir = '../Datasets/Biomedical/algonauts_train_images_trials'


# Used to save the prediction of saved model
parent_submission_dir = f'./files/submissions/{submission_name}'
config_name = '+'.join(test_models_layers.keys()) + datetime_id
# folders where the config files will be saved (global and local best performing layers)
parent_config_dir = f'./files/config/{config_name}'
global_config_dir = './files/config/global'
if test_the_layers == True:
    parent_submission_dir = parent_submission_dir + '_TEST'
if not os.path.isdir(parent_submission_dir) and save:
            os.makedirs(parent_submission_dir)
if not os.path.isdir(parent_config_dir) and save and test_the_layers:
            os.makedirs(parent_config_dir)
print(submission_name + "\n")

Check if GPU is available and if torch is using it ..


Torch Cuda is available?
True
Torch Cuda device count is :
1
Torch Cuda current device is :
0
Torch Cuda device is :
<torch.cuda.device object at 0x000001A997D663D0>
NVIDIA GeForce RTX 3070 Laptop GPU
Pytorch version：
1.13.0
CUDA Version: 
11.6
cuDNN version is :
8302


(2023-08-26_20-21)-ALEXNET_features.4+features.11_ZFNET_features.4+features.11-PCA_100-RIDGE-ALPHA_1.0e+05



In [9]:
def closest_power_of_2(n, d):
    # Calculate the integer division result
    division_result = n // d
    
    # Find the nearest power of 2 to the division result
    power_of_2 = 1
    while power_of_2 <= division_result:
        power_of_2 *= 2
    
    # Determine the closest power of 2 by comparing the two nearest powers
    closest_lower_power = power_of_2 // 2
    closest_higher_power = power_of_2
    
    if abs(division_result - closest_lower_power) <= abs(division_result - closest_higher_power):
        closest_power = closest_lower_power
    else:
        closest_power = closest_higher_power
    
    return closest_power

# Example usage
n = 2048
d = 7
result = closest_power_of_2(n, d)
print(f"The closest power of 2 to {n}/{d} is {result}")
print(f"final batch size: {result * d}")

The closest power of 2 to 2048/7 is 256
final batch size: 1792


In [4]:
config_dict = {'1': {'extraction_config_file' : 'config_0.64732.json', 'config_subj': 1},
                        '2': {'extraction_config_file' : 'config_0.64732.json', 'config_subj': 1},
                        '3': {'extraction_config_file' : 'config_0.64732.json', 'config_subj': 1},
                        '4': {'extraction_config_file' : 'config_0.64732.json', 'config_subj': 1},
                        '5': {'extraction_config_file' : 'config_0.64732.json', 'config_subj': 1},
                        '6': {'extraction_config_file' : 'config_0.55162.json', 'config_subj': 6},
                        '7': {'extraction_config_file' : 'config_0.55162.json', 'config_subj': 6},
                        '8': {'extraction_config_file' : 'config_0.55162.json', 'config_subj': 6}}

In [5]:
result_string = ""

# Iterate through the dictionary and construct the desired string
for key, value in config_dict.items():
    config_subj = value['config_subj']
    extraction_config_file = value['extraction_config_file']
    result_string += f"{key}_{config_subj}_{extraction_config_file}_"

# Remove the trailing underscore
result_string = result_string.rstrip('_')
result_string

'1_1_config_0.64732.json_2_1_config_0.64732.json_3_1_config_0.64732.json_4_1_config_0.64732.json_5_1_config_0.64732.json_6_6_config_0.55162.json_7_6_config_0.55162.json_8_6_config_0.55162.json'

In [3]:
submission_name

'(2023-08-24_02-44)-ALEXNET_features.4+features.11_ZFNET_features.4+features.11-PCA_100-RIDGE-ALPHA_1.0e+05'

In [13]:
def pca_selector(model_and_layer):
    no_pca_model = ['DINOv2s','DINOv2b','DINOv2l','DINOv2g']
    no_pca = [['DINOv2', 'DINOv2'], ['efficientnetb2', 'avgpool'], ['efficientnetb2', 'flatten']]
    yes_pca = ['RetinaNet', 'ZFNet', 'resnet50', 'alexnet', 'vgg16', 'vgg19', 'efficientnetb2']
    
    if (model_and_layer in no_pca) or (model_and_layer[0] == 'DINOv2s') or ((model_and_layer[0] in no_pca_model) and (not isinstance(model_and_layer[1], list))):
        print('PCA will not be used')
        return False
    elif ((model_and_layer[0] == 'DINOv2b') or (model_and_layer[0] == 'DINOv2l')) and (isinstance(model_and_layer[1], list)): 
        if len(model_and_layer[1]) > 2:
            print('PCA will be used')
            return True
        else:
            print('PCA will not be used')
            return False
    else:
        print('PCA will be used')
        return True
    # elif model_and_layer in yes_pca:
    #     print('PCA will be used')
    #     return False
    # else:
    #     print('Cannot determine if PCA will be used or not')

In [34]:
import json
import pandas as pd

csv_path = "D:\\Projects\\Thesis\\files\\config_test\\global\\alpha_subj_layer.csv"
alpha_subj_layer = pd.read_csv(csv_path, index_col=0)
alpha_subj_layer.head()

Unnamed: 0,alpha_l,alpha_r
DINOv2+0+dinov2_transform+ridge,20000.0,20000.0
DINOv2+DINOv2+dinov2_transform+ridge,20000.0,20000.0
DINOv2b+0&1&2&3&4&5+dinov2_transform_V2+ridge,1000.0,1000.0
DINOv2b+0&1&2&3+dinov2_transform_V2+ridge,100.0,100.0
DINOv2b+0&1&2+dinov2_transform_V2+ridge,100.0,100.0


In [37]:
import numpy as np
import math
import os 
import json

def pca_selector(model_and_layer):
    no_pca_model = ['DINOv2s','DINOv2b','DINOv2l','DINOv2g']
    no_pca = [['DINOv2', 'DINOv2'], ['efficientnetb2', 'avgpool'], ['efficientnetb2', 'flatten']]
    yes_pca = ['RetinaNet', 'ZFNet', 'resnet50', 'alexnet', 'vgg16', 'vgg19', 'efficientnetb2']
    
    if (model_and_layer in no_pca) or (model_and_layer[0] == 'DINOv2s') or ((model_and_layer[0] in no_pca_model) and (not isinstance(model_and_layer[1], list))):
        print('PCA will not be used')
        return False
    elif ((model_and_layer[0] == 'DINOv2b') or (model_and_layer[0] == 'DINOv2l')) and (isinstance(model_and_layer[1], list)): 
        if len(model_and_layer[1]) > 2:
            print('PCA will be used')
            return True
        else:
            print('PCA will not be used')
            return False
    else:
        print('PCA will be used')
        return True

def process_csv_file(file_path):
    data = pd.read_csv(csv_path, index_col=0)
    for old_label in df.index:
        if not pd.isna(old_label):
            network = old_label.split('+')[0]
            layers = old_label.split('+')[1].split('&')
            if len(layers) == 1:
                layers = layers[0]
            if pca_selector([network, layers]):
                print([network, layers])
                new_label = old_label + "+2048"
                data = data.rename(index={old_label: new_label})
            else:
                print([network, layers])
                new_label = old_label + "+9999999"
                data = data.rename(index={old_label: new_label})
        
    data.to_csv(csv_path)

file_path = f"D:\\Projects\\Thesis\\files\\config_test\\global\\alpha_subj_layer.csv"
process_csv_file(file_path)
file_path = f"D:\\Projects\\Thesis\\files\\config_test\\global\\scores_subj_layer_roi.csv"
process_csv_file(file_path)

# for subj in range(1, 9):
#     file_path = f"D:\\Projects\\Thesis\\files\\config_subj{subj}\\global\\alpha_subj_layer.csv"
#     process_csv_file(file_path)
#     file_path = f"D:\\Projects\\Thesis\\files\\config_subj{subj}\\global\\scores_subj_layer_roi.csv"
#     process_csv_file(file_path)

NameError: name 'idx' is not defined

In [35]:
"ciao+1+2+ciaone".split('+')

['ciao', '1', '2', 'ciaone']

In [None]:
def find_best_performing_layer(median_roi_correlation_df, parent_config_dir, save):
    """
        Find the best performing layer for each ROI and save it in a dictionary
    """	
    # find the index, for each roi, associated with the maximum value (model+layer)
    max_indices = median_roi_correlation_df.idxmax()
    final_dict = {col: idx.split('+') if not pd.isna(idx) else np.NaN for col, idx in max_indices.items()}
    # If a layer is a concatenation of more than one layer, split it into a list instead of a string
    for chiave, valore in final_dict.items():
        # Se il valore non è una lista, continuiamo senza apportare modifiche
        if not isinstance(valore, list):
            continue
        # Controlliamo se il secondo elemento contiene "&" (quindi è sono più di un layer concatenati)
        if valore is not np.NaN:
            print(valore)
            if '&' in str(valore[1]):
                # Se contiene "&", splittiamolo in una lista di stringhe
                final_dict[chiave][1] = str(valore[1]).split('&')
    if save:
        average_value = median_roi_correlation_df.max().mean()
        filename = "config_" + str(average_value.round(5)) + ".json"
        full_path = os.path.join(parent_config_dir, filename)

        # Salvataggio come JSON
        with open(full_path, 'w') as file:
            json.dump(final_dict, file, indent=4)
     
    print(median_roi_correlation_df)
    print('\n')
    print(json.dumps(final_dict, indent=4))
    return final_dict

In [None]:
def find_best_performing_layer(median_roi_correlation_df, parent_config_dir, save):
    """
        Find the best performing layer for each ROI and save it in a dictionary
    """	
    # find the index, for each roi, associated with the maximum value (model+layer)
    max_indices = median_roi_correlation_df.idxmax()
    final_dict = {col: idx.split('+') if not pd.isna(idx) else np.NaN for col, idx in max_indices.items()}
    # If a layer is a concatenation of more than one layer, split it into a list instead of a string
    for chiave, valore in final_dict.items():
        # Se il valore non è una lista, continuiamo senza apportare modifiche
        if not isinstance(valore, list):
            continue
        # Controlliamo se il secondo elemento contiene "&" (quindi è sono più di un layer concatenati)
        if valore is not np.NaN:
            print(valore)
            if '&' in str(valore[1]):
                # Se contiene "&", splittiamolo in una lista di stringhe
                final_dict[chiave][1] = str(valore[1]).split('&')
    if save:
        average_value = median_roi_correlation_df.max().mean()
        filename = "config_" + str(average_value.round(5)) + ".json"
        full_path = os.path.join(parent_config_dir, filename)

        # Salvataggio come JSON
        with open(full_path, 'w') as file:
            json.dump(final_dict, file, indent=4)
     
    print(median_roi_correlation_df)
    print('\n')
    print(json.dumps(final_dict, indent=4))
    return final_dict

In [32]:
import numpy as np
import math
import os 
import json

def pca_selector(model_and_layer):
    no_pca_model = ['DINOv2s','DINOv2b','DINOv2l','DINOv2g']
    no_pca = [['DINOv2', 'DINOv2'], ['efficientnetb2', 'avgpool'], ['efficientnetb2', 'flatten']]
    yes_pca = ['RetinaNet', 'ZFNet', 'resnet50', 'alexnet', 'vgg16', 'vgg19', 'efficientnetb2']
    
    if (model_and_layer in no_pca) or (model_and_layer[0] == 'DINOv2s') or ((model_and_layer[0] in no_pca_model) and (not isinstance(model_and_layer[1], list))):
        print('PCA will not be used')
        return False
    elif ((model_and_layer[0] == 'DINOv2b') or (model_and_layer[0] == 'DINOv2l')) and (isinstance(model_and_layer[1], list)): 
        if len(model_and_layer[1]) > 2:
            print('PCA will be used')
            return True
        else:
            print('PCA will not be used')
            return False
    else:
        print('PCA will be used')
        return True

def process_json_file(file_path):
    with open(file_path, 'r') as json_file:
        data = json.load(json_file)
        for key, value in data.items():
            if not isinstance(value, list) or (isinstance(value, float) and math.isnan(value)):
                continue
            elif pca_selector(value[:2]):
                print(value[:2])
                value.append("2048")
            else:
                print(value[:2])
                value.append("9999999")
        
    with open(file_path, 'w') as json_file:
        json.dump(data, json_file, indent=4)
        
folder_path = "D:\\Projects\\Thesis\\files\\config_test\\global"

for filename in os.listdir(folder_path):
    if filename.endswith(".json"):
        file_path = os.path.join(folder_path, filename)
        process_json_file(file_path)

PCA will be used
['alexnet', 'features.11']
PCA will be used
['alexnet', 'features.11']
PCA will be used
['alexnet', 'features.11']
PCA will be used
['alexnet', 'features.11']
PCA will be used
['alexnet', 'features.11']
PCA will be used
['alexnet', 'features.11']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will be used
['ZFNet', 'features.stage3.pool3']
PCA will

In [9]:
import pandas as pd

for subj in range(1, 9):
# Load the dataframe from a CSV file (replace 'your_file.csv' with the actual file path)
    print('############################ Subject: ' + str(subj) + ' ############################ \n')
    path = f"D:\\Projects\\Thesis\\files\\config_subj{subj}\\global\\alpha_subj_layer.csv"
    df = pd.read_csv(path)

    # Check for duplicates in the first column
    duplicates = df[df.duplicated(subset=df.columns[0], keep=False)]

    if duplicates.empty:
        print("No duplicates found in the first column.")
    else:
        print("Duplicates found in the first column:")
        print(duplicates)

############################ Subject: 1 ############################ 

No duplicates found in the first column.
############################ Subject: 2 ############################ 

No duplicates found in the first column.
############################ Subject: 3 ############################ 

No duplicates found in the first column.
############################ Subject: 4 ############################ 

No duplicates found in the first column.
############################ Subject: 5 ############################ 

No duplicates found in the first column.
############################ Subject: 6 ############################ 

No duplicates found in the first column.
############################ Subject: 7 ############################ 

No duplicates found in the first column.
############################ Subject: 8 ############################ 

No duplicates found in the first column.


In [None]:
alexnet+features.10&features.11&features.12

In [5]:
lista = [1]
lista[0]

1

In [5]:
zero_rows_lh

Unnamed: 0,V1v,V1d,V2v,V2d,V3v,V3d,hV4,EBA,FBA-1,FBA-2,...,mfs-words,mTL-words,early,midventral,midlateral,midparietal,ventral,lateral,parietal,Unknown


In [23]:
import pandas as pd

cartella = "D:\Projects\Datasets\Biomedical\\algonauts_2023_challenge_data\subj01\\roi_masks_enhanced\\roi_df"

# Carica il dataset lh_challenge_onehot.csv
lh_data = pd.read_csv(cartella + '\lh_challenge_onehot.csv')

# Carica il dataset rh_challenge_onehot.csv
rh_data = pd.read_csv(cartella + '\\rh_challenge_onehot.csv')

# Verifica la presenza di righe con tutti i valori 0 nel dataset lh_challenge_onehot.csv
zero_rows_lh = lh_data[(lh_data == 0).all(axis=1)]

# Verifica la presenza di righe con tutti i valori 0 nel dataset rh_challenge_onehot.csv
zero_rows_rh = rh_data[(rh_data == 0).all(axis=1)]

# Stampa le righe con tutti i valori 0
print("Righe con tutti i valori 0 nel dataset lh_challenge_onehot.csv:")
print(zero_rows_lh)

print("Righe con tutti i valori 0 nel dataset rh_challenge_onehot.csv:")
print(zero_rows_rh)

Righe con tutti i valori 0 nel dataset lh_challenge_onehot.csv:
Empty DataFrame
Columns: [V1v, V1d, V2v, V2d, V3v, V3d, hV4, EBA, FBA-1, FBA-2, mTL-bodies, OFA, FFA-1, FFA-2, mTL-faces, aTL-faces, OPA, PPA, RSC, OWFA, VWFA-1, VWFA-2, mfs-words, mTL-words, early, midventral, midlateral, midparietal, ventral, lateral, parietal, Unknown]
Index: []

[0 rows x 32 columns]
Righe con tutti i valori 0 nel dataset rh_challenge_onehot.csv:
Empty DataFrame
Columns: [V1v, V1d, V2v, V2d, V3v, V3d, hV4, EBA, FBA-1, FBA-2, mTL-bodies, OFA, FFA-1, FFA-2, mTL-faces, aTL-faces, OPA, PPA, RSC, OWFA, VWFA-1, VWFA-2, mfs-words, mTL-words, early, midventral, midlateral, midparietal, ventral, lateral, parietal, Unknown]
Index: []

[0 rows x 32 columns]


In [24]:
list1 = ['V1v', 'V1d', 'V2v', 'V2d', 'V3v', 'V3d', 'hV4', 'OWFA', 'midlateral', 'FBA-1', 'FFA-1', 'OPA', 'PPA', 'VWFA-2', 'midparietal', 'ventral', 'parietal', 'Unknown', 'OFA', 'FBA-1', 'FFA-1', 'OPA', 'PPA', 'VWFA-2', 'midparietal', 'ventral', 'parietal', 'Unknown', 'RSC', 'mfs-words', 'early', 'midventral', 'EBA', 'FBA-2', 'FFA-2', 'VWFA-1', 'lateral']

In [25]:
def find_duplicates(input_list):
    seen = set()
    duplicates = set()
    
    for item in input_list:
        if item in seen:
            duplicates.add(item)
        else:
            seen.add(item)
    
    return duplicates

duplicates = find_duplicates(list1)
print("Valori presenti più di una volta:")
print(duplicates)

Valori presenti più di una volta:
{'Unknown', 'VWFA-2', 'ventral', 'midparietal', 'PPA', 'FFA-1', 'FBA-1', 'parietal', 'OPA'}


In [26]:
missing_columns = [col for col in lh_data.columns if col not in list1]

print("Nomi delle colonne non presenti nella lista:")
print(missing_columns)

missing_columns = [col for col in rh_data.columns if col not in list1]

print("Nomi delle colonne non presenti nella lista:")
print(missing_columns)

Nomi delle colonne non presenti nella lista:
['mTL-bodies', 'mTL-faces', 'aTL-faces', 'mTL-words']
Nomi delle colonne non presenti nella lista:
['mTL-bodies', 'mTL-faces', 'aTL-faces', 'mTL-words']


In [35]:
container = {}

for subj in range(1,9):
    cartella = f"D:\Projects\Datasets\Biomedical\\algonauts_2023_challenge_data\subj0{subj}\\roi_masks_enhanced\\roi_df"

    # Carica il dataset lh_challenge_onehot.csv
    lh_data = pd.read_csv(cartella + '\lh_challenge_onehot.csv')

    # Carica il dataset rh_challenge_onehot.csv
    rh_data = pd.read_csv(cartella + '\\rh_challenge_onehot.csv')
    print(f"#### {subj} ####")
    
    column_sums = lh_data.sum()

    print(list(column_sums[column_sums == 0].index))
    container[f"lh_{subj}"] = list(column_sums[column_sums == 0].index)

    column_sums = rh_data.sum()

    print(list(column_sums[column_sums == 0].index))
    container[f"rh_{subj}"] = column_sums[column_sums == 0].index


    
def find_common_elements(lists_dict):
    if not lists_dict:
        return []

    common_elements = set(lists_dict[next(iter(lists_dict))])
    
    for lst in lists_dict.values():
        common_elements.intersection_update(lst)
    
    return list(common_elements)

common_elements = find_common_elements(container)

print("Elementi in comune tra tutte le liste (sempre nan):")
print(common_elements)

#### 1 ####
['FBA-2', 'mTL-bodies', 'FFA-2', 'mTL-faces', 'aTL-faces', 'mTL-words']
['mTL-bodies', 'mTL-faces', 'aTL-faces', 'mfs-words', 'mTL-words']
#### 2 ####
['FBA-1', 'mTL-bodies', 'mTL-faces', 'aTL-faces', 'mTL-words']
['FBA-1', 'mTL-bodies', 'mTL-faces', 'aTL-faces', 'mTL-words']
#### 3 ####
['mTL-bodies', 'mTL-faces', 'aTL-faces', 'VWFA-2', 'mTL-words']
['mTL-bodies', 'FFA-2', 'mTL-faces', 'aTL-faces', 'VWFA-2', 'mfs-words', 'mTL-words']
#### 4 ####
['FBA-1', 'mTL-bodies', 'mTL-faces', 'aTL-faces', 'VWFA-2', 'mTL-words']
['mTL-bodies', 'mTL-faces', 'aTL-faces', 'VWFA-2', 'mTL-words']
#### 5 ####
['mTL-bodies', 'mTL-faces', 'aTL-faces', 'mTL-words']
['mTL-bodies', 'mTL-faces', 'aTL-faces', 'mTL-words']
#### 6 ####
['mTL-bodies', 'mTL-faces', 'aTL-faces']
['mTL-faces', 'aTL-faces', 'mTL-words']
#### 7 ####
['FBA-1', 'mTL-bodies', 'mTL-faces', 'aTL-faces', 'mTL-words']
['FBA-1', 'mTL-bodies', 'mTL-faces', 'aTL-faces']
#### 8 ####
['mTL-bodies', 'mTL-faces', 'aTL-faces', 'mTL-word

- ['mTL-faces', 'aTL-faces'] sono nan sempre
- NaN nel config: [mTL-bodies, mTL-faces, aTL-faces, mTL-words]
- Devo trovare mTL-words, mTL-bodies perchè in alcuni soggetti non sono NAN
- Fare il testing su subj6 e non su subj1 OPPURE crea dei dataframe masks in cui queste due colonne 1 sono sostituite con le prime colonne più a dx (Stream)

In [39]:
for subj in range(1,9):
    cartella = f"D:\Projects\Datasets\Biomedical\\algonauts_2023_challenge_data\subj0{subj}\\roi_masks_enhanced\\roi_df"

    # Carica il dataset lh_challenge_onehot.csv
    lh_data = pd.read_csv(cartella + '\lh_challenge_onehot.csv')

    # Carica il dataset rh_challenge_onehot.csv
    rh_data = pd.read_csv(cartella + '\\rh_challenge_onehot.csv')
    
    print(f"#### {subj} ####")
    
    print(f"#### LH ####")
    
    column_to_sum = 'mTL-words'
    column_sum = lh_data[column_to_sum].sum()

    print(f"Somma della colonna '{column_to_sum}': {column_sum}")
    
    column_to_sum = 'mTL-bodies'
    column_sum = lh_data[column_to_sum].sum()
    
    print(f"Somma della colonna '{column_to_sum}': {column_sum}")
    
    column_to_sum = 'Unknown'
    column_sum = lh_data[column_to_sum].sum()

    print(f"Somma della colonna '{column_to_sum}': {column_sum}")
    
    print(f"#### RH ####")
    
    column_to_sum = 'mTL-words'
    column_sum = rh_data[column_to_sum].sum()

    print(f"Somma della colonna '{column_to_sum}': {column_sum}")
    
    column_to_sum = 'mTL-bodies'
    column_sum = rh_data[column_to_sum].sum()
    
    print(f"Somma della colonna '{column_to_sum}': {column_sum}")
    
    column_to_sum = 'Unknown'
    column_sum = rh_data[column_to_sum].sum()

    print(f"Somma della colonna '{column_to_sum}': {column_sum}")

#### 1 ####
#### LH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 152
#### RH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 441
#### 2 ####
#### LH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 74
#### RH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 430
#### 3 ####
#### LH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 59
#### RH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 209
#### 4 ####
#### LH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 93
#### RH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 355
###

In [40]:
for subj in range(1,9):
    cartella = f"D:\Projects\Datasets\Biomedical\\algonauts_2023_challenge_data\subj0{subj}\\roi_masks_enhanced\\roi_df"

    # Carica il dataset lh_challenge_onehot.csv
    lh_data = pd.read_csv(cartella + '\lh_challenge_onehot.csv')

    # Carica il dataset rh_challenge_onehot.csv
    rh_data = pd.read_csv(cartella + '\\rh_challenge_onehot.csv')
    
    print(f"#### {subj} ####")
    
    print(f"#### LH ####")
    
    column_to_sum = 'mTL-words'
    column_sum = lh_data[column_to_sum].sum()

    print(f"Somma della colonna '{column_to_sum}': {column_sum}")
    
    column_to_sum = 'mTL-bodies'
    column_sum = lh_data[column_to_sum].sum()
    
    print(f"Somma della colonna '{column_to_sum}': {column_sum}")
    
    column_to_sum = 'Unknown'
    column_sum = lh_data[column_to_sum].sum()

    print(f"Somma della colonna '{column_to_sum}': {column_sum}")
    
    print(f"#### RH ####")
    
    column_to_sum = 'mTL-words'
    column_sum = rh_data[column_to_sum].sum()

    print(f"Somma della colonna '{column_to_sum}': {column_sum}")
    
    column_to_sum = 'mTL-bodies'
    column_sum = rh_data[column_to_sum].sum()
    
    print(f"Somma della colonna '{column_to_sum}': {column_sum}")
    
    column_to_sum = 'Unknown'
    column_sum = rh_data[column_to_sum].sum()

    print(f"Somma della colonna '{column_to_sum}': {column_sum}")

#### 1 ####
#### LH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 152
#### RH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 441
#### 2 ####
#### LH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 74
#### RH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 430
#### 3 ####
#### LH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 59
#### RH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 209
#### 4 ####
#### LH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 93
#### RH ####
Somma della colonna 'mTL-words': 0
Somma della colonna 'mTL-bodies': 0
Somma della colonna 'Unknown': 355
###

In [8]:
print(len(lh_data), len(rh_data))

18978 20220


In [7]:
# Trova le righe con uno e soltanto uno 1 nel dataset lh_challenge_onehot.csv
single_one_rows_lh = lh_data[(lh_data == 1).sum(axis=1) == 1]

# Trova le righe con uno e soltanto uno 1 nel dataset rh_challenge_onehot.csv
single_one_rows_rh = rh_data[(rh_data == 1).sum(axis=1) == 1]

# Stampa le righe con uno e soltanto uno 1
print("Righe con uno e soltanto uno 1 nel dataset lh_challenge_onehot.csv:")
print(len(single_one_rows_lh))

print("Righe con uno e soltanto uno 1 nel dataset rh_challenge_onehot.csv:")
print(len(single_one_rows_rh))

Righe con uno e soltanto uno 1 nel dataset lh_challenge_onehot.csv:
18978
Righe con uno e soltanto uno 1 nel dataset rh_challenge_onehot.csv:
20220


In [28]:
import pandas as pd

alpah = pd.read_csv("D:\Projects\Thesis\\files\config\global\\alpha_subj_layer_fixed.csv", index_col=0)
alpah

Unnamed: 0,alpha_l,alpha_r
DINOv2+DINOv2+dinov2_transform+ridge,20000.0,20000.0
RetinaNet+body.layer3.0.relu_2+imagenet_transform_alt+ridge,50000.0,50000.0
RetinaNet+body.layer3.1.relu+imagenet_transform_alt+ridge,10000.0,10000.0
RetinaNet+body.layer3.5.relu_1+imagenet_transform_alt+ridge,10000.0,10000.0
RetinaNet+body.layer3.5.relu_2+imagenet_transform_alt+ridge,20000.0,20000.0
...,...,...
resnet50+layer4.2.relu+imagenet_V2_transform+ridge,10000.0,10000.0
vgg16+avgpool+imagenet_V2_transform+ridge,20000.0,20000.0
vgg19+avgpool+imagenet_V2_transform+ridge,100000.0,100000.0
vgg19+features.33+imagenet_V2_transform+ridge,2000000.0,2000000.0


In [29]:
params = pd.read_csv("D:\Projects\Thesis\\files\config\global\\scores_subj_layer_roi.csv", index_col=0)
params

Unnamed: 0,V1v,V1d,V2v,V2d,V3v,V3d,hV4,EBA,FBA-1,FBA-2,...,midventral,midlateral,midparietal,ventral,lateral,parietal,All vertices,Unknown ROI,Unknown Stream,Unknown
DINOv2+DINOv2+dinov2_transform+ridge,0.174738,0.154557,0.164305,0.180786,0.179375,0.218366,0.283107,0.584682,0.545540,0.655425,...,0.243839,0.311103,0.383891,0.559127,0.585894,0.484780,0.431469,0.464877,0.199642,0.619094
RetinaNet+body.layer3.0.relu_2+imagenet_transform_alt+ridge,0.650085,0.666336,0.663129,0.679971,0.636124,0.594302,0.528764,0.396341,0.422314,0.449667,...,0.531741,0.472690,0.465061,0.425817,0.395163,0.368918,0.459844,0.388003,0.603110,0.467630
RetinaNet+body.layer3.1.relu+imagenet_transform_alt+ridge,0.641103,0.654712,0.654100,0.670889,0.627110,0.592034,0.532585,0.399679,0.428457,0.457992,...,0.535016,0.470281,0.466375,0.426985,0.396522,0.370256,0.460399,0.388738,0.598572,0.479336
RetinaNet+body.layer3.5.relu_1+imagenet_transform_alt+ridge,0.429110,0.440771,0.530404,0.553579,0.518768,0.509646,0.489950,0.487289,0.474461,0.539182,...,0.458551,0.471128,0.528649,0.484232,0.485130,0.429655,0.478899,0.439648,0.493022,0.548881
RetinaNet+body.layer3.5.relu_2+imagenet_transform_alt+ridge,0.608917,0.639083,0.663956,0.687454,0.646909,0.615080,0.570081,0.522423,0.521557,0.577542,...,0.568409,0.551120,0.554655,0.516256,0.512214,0.455966,0.538149,0.475619,0.614029,0.550584
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
vgg19+avgpool+imagenet_transform_alt+ridge,0.341292,0.367939,0.387774,0.452796,0.401587,0.460881,0.472776,0.560321,0.542239,0.628602,...,0.433964,0.448457,0.522412,0.542124,0.560030,0.470219,0.490187,0.474412,0.419303,0.593145
vgg19+features.33+imagenet_V2_transform+ridge,0.390177,0.429503,0.443093,0.496823,0.434720,0.493805,0.505952,0.574343,0.570456,0.661998,...,0.467449,0.472884,0.543569,0.570730,0.576989,0.494438,0.520412,0.502224,0.467723,0.648359
vgg19+features.33+imagenet_transform_alt+ridge,0.386493,0.418464,0.443805,0.499578,0.447639,0.492781,0.487292,0.530875,0.526550,0.607027,...,0.456052,0.462353,0.526185,0.530254,0.533735,0.461401,0.495577,0.470251,0.469248,0.588873
vgg19+features.35+imagenet_V2_transform+ridge,0.311502,0.333356,0.350663,0.401258,0.351330,0.408325,0.449959,0.566821,0.555309,0.652537,...,0.407098,0.408242,0.490040,0.555737,0.569545,0.473640,0.475893,0.477190,0.380884,0.634391


In [30]:
indices_not_in_params = alpah.index.difference(params.index).tolist()

# Trovare gli indici presenti in params ma non in alpah
indices_not_in_alpha = params.index.difference(alpah.index).tolist()
indices_not_in_alpha = [item for item in indices_not_in_alpha if "+linear" not in item]


# Stampare le liste di indici
print("Indici presenti in alpah ma non in params:", indices_not_in_params)
print("Indici presenti in params ma non in alpah:", indices_not_in_alpha)
print(len(indices_not_in_alpha))

Indici presenti in alpah ma non in params: []
Indici presenti in params ma non in alpah: ['RetinaNet+fpn+imagenet_transform_alt+ridge', 'ZFNet+features.stage2.unit1.activ+imagenet_transform_alt+ridge', 'ZFNet+features.stage2.unittt.activ+imagenet_transform_alt+ridge', 'ZFNet+features.stage3.pool3+imagenet_transform_alt+ridge', 'ZFNet+features.stage3.unit3.activ+imagenet_transform_alt+ridge', 'resnet50+layer1.0.relu', 'resnet50+layer1.0.relu+imagenet_transform_alt+ridge', 'resnet50+layer1.2.relu+imagenet_transform_alt+ridge', 'resnet50+layer2.3.relu+imagenet_transform_alt+ridge', 'resnet50+layer3.5.relu+imagenet_transform_alt+ridge', 'resnet50+layer4.2.relu+imagenet_transform_alt+ridge', 'vgg16+avgpool+imagenet_transform_alt+ridge', 'vgg19+avgpool+imagenet_transform_alt+ridge', 'vgg19+features.33+imagenet_transform_alt+ridge', 'vgg19+features.35+imagenet_transform_alt+ridge']
15


In [34]:
import pickle

In [33]:
params_new

Unnamed: 0,V1v,V1d,V2v,V2d,V3v,V3d,hV4,EBA,FBA-1,FBA-2,...,midventral,midlateral,midparietal,ventral,lateral,parietal,All vertices,Unknown ROI,Unknown Stream,Unknown
DINOv2+DINOv2+dinov2_transform+ridge,0.174738,0.154557,0.164305,0.180786,0.179375,0.218366,0.283107,0.584682,0.545540,0.655425,...,0.243839,0.311103,0.383891,0.559127,0.585894,0.484780,0.431469,0.464877,0.199642,0.619094
RetinaNet+body.layer3.0.relu_2+imagenet_transform_alt+ridge,0.650085,0.666336,0.663129,0.679971,0.636124,0.594302,0.528764,0.396341,0.422314,0.449667,...,0.531741,0.472690,0.465061,0.425817,0.395163,0.368918,0.459844,0.388003,0.603110,0.467630
RetinaNet+body.layer3.1.relu+imagenet_transform_alt+ridge,0.641103,0.654712,0.654100,0.670889,0.627110,0.592034,0.532585,0.399679,0.428457,0.457992,...,0.535016,0.470281,0.466375,0.426985,0.396522,0.370256,0.460399,0.388738,0.598572,0.479336
RetinaNet+body.layer3.5.relu_1+imagenet_transform_alt+ridge,0.429110,0.440771,0.530404,0.553579,0.518768,0.509646,0.489950,0.487289,0.474461,0.539182,...,0.458551,0.471128,0.528649,0.484232,0.485130,0.429655,0.478899,0.439648,0.493022,0.548881
RetinaNet+body.layer3.5.relu_2+imagenet_transform_alt+ridge,0.608917,0.639083,0.663956,0.687454,0.646909,0.615080,0.570081,0.522423,0.521557,0.577542,...,0.568409,0.551120,0.554655,0.516256,0.512214,0.455966,0.538149,0.475619,0.614029,0.550584
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
resnet50+layer4.2.relu+imagenet_V2_transform+ridge,0.268391,0.277567,0.350980,0.368508,0.338624,0.402022,0.448553,0.619121,0.561786,0.659805,...,0.381849,0.421921,0.532018,0.569947,0.619109,0.501369,0.499316,0.498707,0.358765,0.664534
vgg16+avgpool+imagenet_V2_transform+ridge,0.327201,0.351675,0.377862,0.420267,0.374607,0.423484,0.479454,0.577888,0.555531,0.657053,...,0.428019,0.426739,0.513424,0.559919,0.578184,0.480382,0.491501,0.481787,0.401666,0.632303
vgg19+avgpool+imagenet_V2_transform+ridge,0.317700,0.354750,0.365271,0.434295,0.361553,0.429195,0.452326,0.573557,0.531198,0.643510,...,0.399246,0.419883,0.504727,0.537422,0.569295,0.463837,0.478536,0.464323,0.388477,0.613617
vgg19+features.33+imagenet_V2_transform+ridge,0.390177,0.429503,0.443093,0.496823,0.434720,0.493805,0.505952,0.574343,0.570456,0.661998,...,0.467449,0.472884,0.543569,0.570730,0.576989,0.494438,0.520412,0.502224,0.467723,0.648359


In [31]:
params_new = params.drop(index=indices_not_in_alpha)
params_new.to_csv("D:\Projects\Thesis\\files\config\global\\scores_subj_layer_roi_deleted.csv")

In [7]:
alpah = alpah.drop_duplicates()
alpah.set_index('Unnamed: 0', inplace=True)
alpah.sort_index(inplace=True)

In [9]:
alpah.to_csv("D:\Projects\Thesis\\files\config\global\\alpha_subj_layer_fixed.csv")

In [3]:
import pandas as pd

# Creiamo i DataFrame di esempio con indici stringa
data1 = {'Col1': ['A', 'B'], 'Col2': [1, 2]}
data2 = {'Col1': ['A', 'B'], 'Col2': [3, 4]}

dataset1 = pd.DataFrame(data1, index=['row1', 'row2'])
dataset2 = pd.DataFrame(data2, index=['row1', 'row2'])

# Concateniamo i DataFrame verticalmente
dataset3 = pd.concat([dataset1, dataset2], axis=0)

# Stampa il risultato di dataset3.loc['row1']
print(dataset3.loc['row1'])

     Col1  Col2
row1    A     1
row1    A     3


In [22]:
pd.concat([median_roi_correlation_df_global.loc[["alexnet+features.11"]], median_roi_correlation_df_global.loc[["alexnet+features.11"]]])

Unnamed: 0,V1v,V1d,V2v,V2d,V3v,V3d,hV4,EBA,FBA-1,FBA-2,...,midventral,midlateral,midparietal,ventral,lateral,parietal,All vertices,Unknown ROI,Unknown Stream,Unknown
alexnet+features.11,0.370626,0.406731,0.438984,0.485918,0.411672,0.432735,0.423575,0.396427,0.428624,0.474549,...,0.38162,0.380605,0.44556,0.425456,0.399501,0.382386,0.411668,0.382003,0.433564,0.452331
alexnet+features.11,0.370626,0.406731,0.438984,0.485918,0.411672,0.432735,0.423575,0.396427,0.428624,0.474549,...,0.38162,0.380605,0.44556,0.425456,0.399501,0.382386,0.411668,0.382003,0.433564,0.452331


In [24]:
# Creare un DataFrame di esempio
data = {'colonna1': [1, 4, 2],
        'colonna2': [5, 3, 6]}
indici = ['b', 'c', 'a']
df = pd.DataFrame(data, index=indici)

# Ordinare il DataFrame in base all'ordine alfabetico degli indici
df_ordinato = df.sort_index()

print("DataFrame ordinato:")
df_ordinato

DataFrame ordinato:


Unnamed: 0,colonna1,colonna2
a,2,6
b,1,5
c,4,3


In [12]:
data1 = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df1 = pd.DataFrame(data1, index=['row1', 'row2', 'row3'])

# Creare DataFrame df2
data2 = {'A': [7, 8, 9], 'B': [10, 11, 12]}
df2 = pd.DataFrame(data2, index=['row3', 'row4', 'row5'])
df2

Unnamed: 0,A,B
row3,7,10
row4,8,11
row5,9,12


In [16]:
median_roi_correlation = [[0.8, 0.6, 0.7]]
roi_names = ['ROI_1', 'ROI_2', 'ROI_3']
model_layer_1 = 'Model_1'
model_layer_2 = 'Layer_2'
# concatenate the strings assing a + between them
model_layer = model_layer_1 + "+" + model_layer_2

# Creazione del DataFrame
#data = [median_roi_correlation]
df = pd.DataFrame(median_roi_correlation, columns=roi_names, index=[model_layer])

# Stampa il DataFrame risultante
print(df)

median_roi_correlation = [[0.1, 0.7, 0.20]]
roi_names = ['ROI_1', 'ROI_2', 'ROI_3']
model_layer_1 = 'Model_2'
model_layer_2 = 'Layer_3'
# concatenate the strings assing a + between them
model_layer = model_layer_1 + "+" + model_layer_2

# Creazione del DataFrame
#data1 = [median_roi_correlation]
df1 = pd.DataFrame(median_roi_correlation, columns=roi_names, index=[model_layer])

# Concatenate df1 and df2 by rows
df = pd.concat([df, df1])
print(df)

                 ROI_1  ROI_2  ROI_3
Model_1+Layer_2    0.8    0.6    0.7
                 ROI_1  ROI_2  ROI_3
Model_1+Layer_2    0.8    0.6    0.7
Model_2+Layer_3    0.1    0.7    0.2


In [33]:
df = pd.read_csv('D:\Projects\Thesis\\files\submissions\(2023-08-07_18-32)-ALEXNET_features.11_ZFNET_features.stage3.pool3-PCA_100-RIDGE-ALPHA_1.0e+05_TEST\\val_imgs\subj01\scores_subj_layer_roi.csv', index_col=0)
df

Unnamed: 0,V1v,V1d,V2v,V2d,V3v,V3d,hV4,EBA,FBA-1,FBA-2,...,midventral,midlateral,midparietal,ventral,lateral,parietal,All vertices,Unknown ROI,Unknown Stream,Unknown
alexnet+features.11,0.370626,0.406731,0.438984,0.485918,0.411672,0.432735,0.423575,0.396427,0.428624,0.474549,...,0.38162,0.380605,0.44556,0.425456,0.399501,0.382386,0.411668,0.382003,0.433564,0.452331
ZFNet+features.stage3.pool3,0.328243,0.349637,0.389026,0.412768,0.37781,0.418868,0.457928,0.463913,0.486284,0.560257,...,0.406256,0.389046,0.474627,0.482592,0.464997,0.404432,0.436212,0.414729,0.400121,0.507272


In [37]:
# create a vector numpy array of numbers and 1 nan
test = np.array([1,2,3,4,5,6,7,8,9,10,np.nan])
np.nanmedian(test)

5.5

In [39]:
for col, idx in max_indices.items():
    print(f"{col} - {idx}")

V1v - alexnet+features.11
V1d - alexnet+features.11
V2v - alexnet+features.11
V2d - alexnet+features.11
V3v - alexnet+features.11
V3d - alexnet+features.11
hV4 - ZFNet+features.stage3.pool3
EBA - ZFNet+features.stage3.pool3
FBA-1 - ZFNet+features.stage3.pool3
FBA-2 - ZFNet+features.stage3.pool3
mTL-bodies - nan
OFA - ZFNet+features.stage3.pool3
FFA-1 - ZFNet+features.stage3.pool3
FFA-2 - ZFNet+features.stage3.pool3
mTL-faces - nan
aTL-faces - nan
OPA - ZFNet+features.stage3.pool3
PPA - ZFNet+features.stage3.pool3
RSC - ZFNet+features.stage3.pool3
OWFA - ZFNet+features.stage3.pool3
VWFA-1 - ZFNet+features.stage3.pool3
VWFA-2 - ZFNet+features.stage3.pool3
mfs-words - ZFNet+features.stage3.pool3
mTL-words - nan
early - alexnet+features.11
midventral - ZFNet+features.stage3.pool3
midlateral - ZFNet+features.stage3.pool3
midparietal - ZFNet+features.stage3.pool3
ventral - ZFNet+features.stage3.pool3
lateral - ZFNet+features.stage3.pool3
parietal - ZFNet+features.stage3.pool3
All vertices - 

In [44]:


max_indices = df.idxmax()
print(max_indices)

final_dict = {col: idx.split('+') if not pd.isna(idx) else np.NaN for col, idx in max_indices.items()}


average_value = df.max().mean()

# Composizione del nome del file
filename = "config_" + str(average_value.round(2)) + ".json"

print(filename)

V1v                       alexnet+features.11
V1d                       alexnet+features.11
V2v                       alexnet+features.11
V2d                       alexnet+features.11
V3v                       alexnet+features.11
V3d                       alexnet+features.11
hV4               ZFNet+features.stage3.pool3
EBA               ZFNet+features.stage3.pool3
FBA-1             ZFNet+features.stage3.pool3
FBA-2             ZFNet+features.stage3.pool3
mTL-bodies                                NaN
OFA               ZFNet+features.stage3.pool3
FFA-1             ZFNet+features.stage3.pool3
FFA-2             ZFNet+features.stage3.pool3
mTL-faces                                 NaN
aTL-faces                                 NaN
OPA               ZFNet+features.stage3.pool3
PPA               ZFNet+features.stage3.pool3
RSC               ZFNet+features.stage3.pool3
OWFA              ZFNet+features.stage3.pool3
VWFA-1            ZFNet+features.stage3.pool3
VWFA-2            ZFNet+features.s

In [45]:
final_dict

{'V1v': ['alexnet', 'features.11'],
 'V1d': ['alexnet', 'features.11'],
 'V2v': ['alexnet', 'features.11'],
 'V2d': ['alexnet', 'features.11'],
 'V3v': ['alexnet', 'features.11'],
 'V3d': ['alexnet', 'features.11'],
 'hV4': ['ZFNet', 'features.stage3.pool3'],
 'EBA': ['ZFNet', 'features.stage3.pool3'],
 'FBA-1': ['ZFNet', 'features.stage3.pool3'],
 'FBA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-bodies': nan,
 'OFA': ['ZFNet', 'features.stage3.pool3'],
 'FFA-1': ['ZFNet', 'features.stage3.pool3'],
 'FFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-faces': nan,
 'aTL-faces': nan,
 'OPA': ['ZFNet', 'features.stage3.pool3'],
 'PPA': ['ZFNet', 'features.stage3.pool3'],
 'RSC': ['ZFNet', 'features.stage3.pool3'],
 'OWFA': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-1': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mfs-words': ['ZFNet', 'features.stage3.pool3'],
 'mTL-words': nan,
 'early': ['alexnet', 'features.11'],
 'midventral': ['ZFNet', 'featur

In [25]:
a = np.array([3, 4, 5, 6])
b = np.array([5, 6])
print(a.shape, b.shape)
np.concatenate((a, b), axis=0)

(4,) (2,)


array([3, 4, 5, 6, 5, 6])

In [20]:
import json
with open('files\config\global\config_0.45.json', 'r') as json_file:
    dizionario = json.load(json_file)
    
dizionario
initial_dict = dizionario
initial_dict

{'V1v': ['alexnet', 'features.11'],
 'V1d': ['alexnet', 'features.11'],
 'V2v': ['alexnet', 'features.11'],
 'V2d': ['alexnet', 'features.11'],
 'V3v': ['alexnet', 'features.11'],
 'V3d': ['alexnet', 'features.11'],
 'hV4': ['ZFNet', 'features.stage3.pool3'],
 'EBA': ['ZFNet', 'features.stage3.pool3'],
 'FBA-1': ['ZFNet', 'features.stage3.pool3'],
 'FBA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-bodies': nan,
 'OFA': ['ZFNet', 'features.stage3.pool3'],
 'FFA-1': ['ZFNet', 'features.stage3.pool3'],
 'FFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-faces': nan,
 'aTL-faces': nan,
 'OPA': ['ZFNet', 'features.stage3.pool3'],
 'PPA': ['ZFNet', 'features.stage3.pool3'],
 'RSC': ['ZFNet', 'features.stage3.pool3'],
 'OWFA': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-1': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mfs-words': ['ZFNet', 'features.stage3.pool3'],
 'mTL-words': nan,
 'early': ['alexnet', 'features.11'],
 'midventral': ['ZFNet', 'featur

In [22]:
initial_dict = {'V1v': ['alexnet', ['features.11', 'features.12', 'features.13', 'features.14', 'features.15']],
                        'V1v1':['alexnet', ['features.12', 'features.13', 'features.14', 'features.15']]}
initial_dict

{'V1v': ['alexnet',
  ['features.11', 'features.12', 'features.13', 'features.14', 'features.15']],
 'V1v1': ['alexnet',
  ['features.12', 'features.13', 'features.14', 'features.15']]}

In [15]:
initial_dict

{'V1v': ['alexnet', 'features.11'],
 'V1d': ['alexnet', 'features.11'],
 'V2v': ['alexnet', 'features.11'],
 'V2d': ['alexnet', 'features.11'],
 'V3v': ['alexnet', 'features.11'],
 'V3d': ['alexnet', 'features.11'],
 'hV4': ['ZFNet', 'features.stage3.pool3'],
 'EBA': ['ZFNet', 'features.stage3.pool3'],
 'FBA-1': ['ZFNet', 'features.stage3.pool3'],
 'FBA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-bodies': nan,
 'OFA': ['ZFNet', 'features.stage3.pool3'],
 'FFA-1': ['ZFNet', 'features.stage3.pool3'],
 'FFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-faces': nan,
 'aTL-faces': nan,
 'OPA': ['ZFNet', 'features.stage3.pool3'],
 'PPA': ['ZFNet', 'features.stage3.pool3'],
 'RSC': ['ZFNet', 'features.stage3.pool3'],
 'OWFA': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-1': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mfs-words': ['ZFNet', 'features.stage3.pool3'],
 'mTL-words': nan,
 'early': ['alexnet', 'features.11'],
 'midventral': ['ZFNet', 'featur

In [23]:
import json

def json_config_to_feature_extraction_dict(config_dict):
    new_dict = {}
    for key, value in config_dict.items():
        # If the value is not a list or is NaN, skip this iteration
        if not isinstance(value, list) or (isinstance(value, float) and math.isnan(value)):
            continue
        first_string, second_string = value
        
        # If the first string is not a key in the new dictionary, add an empty list
        if first_string not in new_dict:
            new_dict[first_string] = []
        
        # Add the second string to the corresponding list only if it's not already present
        if second_string not in new_dict[first_string]:
            new_dict[first_string].append(second_string)

    # Sort the second strings in each list alphabetically
    for first_string in new_dict:
        new_dict[first_string].sort()
    return new_dict

initial_dict = json_config_to_feature_extraction_dict(initial_dict)

In [24]:
initial_dict

{'alexnet': [['features.11',
   'features.12',
   'features.13',
   'features.14',
   'features.15'],
  ['features.12', 'features.13', 'features.14', 'features.15']]}

In [18]:
for i in initial_dict['alexnet'][0]:
    print(i)

features.11
features.12
features.13
features.14
features.15


In [39]:
import pandas as pd
testone = pd.read_csv('files\config\global\scores_subj_layer_roi.csv', index_col=0)
testone
# change the index of the rest 
old_index = "alexnet+features.11"  # L'indice della riga che desideri cambiare
new_index = "alexnet+features.11&features.12"  # Il nuovo indice che desideri assegnare

# Cambia l'indice della riga specifica
testone = testone.rename(index={old_index: new_index})
testone

Unnamed: 0,V1v,V1d,V2v,V2d,V3v,V3d,hV4,EBA,FBA-1,FBA-2,...,midventral,midlateral,midparietal,ventral,lateral,parietal,All vertices,Unknown ROI,Unknown Stream,Unknown
ZFNet+features.stage3.pool3,0.328243,0.349637,0.389026,0.412768,0.37781,0.418868,0.457928,0.463913,0.486284,0.560257,...,0.406256,0.389046,0.474627,0.482592,0.464997,0.404432,0.436212,0.414729,0.400121,0.507272
alexnet+features.11&features.12,0.370626,0.406731,0.438984,0.485918,0.411672,0.432735,0.423575,0.396427,0.428624,0.474549,...,0.38162,0.380605,0.44556,0.425456,0.399501,0.382386,0.411668,0.382003,0.433564,0.452331
alexnet+features.12,0.34543,0.378702,0.408333,0.453459,0.377628,0.421666,0.429113,0.4177,0.444921,0.49328,...,0.373912,0.38096,0.45177,0.440818,0.417688,0.382503,0.415177,0.388199,0.415135,0.469049


In [55]:
import numpy as np
import pandas as pd
max_indices = testone.idxmax()
final_dict = {col: idx.split('+') if not pd.isna(idx) else np.NaN for col, idx in max_indices.items()}
final_dict

{'V1v': ['alexnet', 'features.11&features.12'],
 'V1d': ['alexnet', 'features.11&features.12'],
 'V2v': ['alexnet', 'features.11&features.12'],
 'V2d': ['alexnet', 'features.11&features.12'],
 'V3v': ['alexnet', 'features.11&features.12'],
 'V3d': ['alexnet', 'features.11&features.12'],
 'hV4': ['ZFNet', 'features.stage3.pool3'],
 'EBA': ['ZFNet', 'features.stage3.pool3'],
 'FBA-1': ['ZFNet', 'features.stage3.pool3'],
 'FBA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-bodies': nan,
 'OFA': ['ZFNet', 'features.stage3.pool3'],
 'FFA-1': ['ZFNet', 'features.stage3.pool3'],
 'FFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-faces': nan,
 'aTL-faces': nan,
 'OPA': ['ZFNet', 'features.stage3.pool3'],
 'PPA': ['ZFNet', 'features.stage3.pool3'],
 'RSC': ['ZFNet', 'features.stage3.pool3'],
 'OWFA': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-1': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mfs-words': ['ZFNet', 'features.stage3.pool3'],
 'mTL-words': nan

In [56]:
for chiave, valore in final_dict.items():
    # Se il valore non è una lista, continuiamo senza apportare modifiche
    if not isinstance(valore, list):
        continue
    # Controlliamo se il secondo elemento contiene "&"
    if '&' in str(valore[1]):
        # Se contiene "&", splittiamolo in una lista di stringhe
        final_dict[chiave][1] = str(valore[1]).split('&')

final_dict

{'V1v': ['alexnet', ['features.11', 'features.12']],
 'V1d': ['alexnet', ['features.11', 'features.12']],
 'V2v': ['alexnet', ['features.11', 'features.12']],
 'V2d': ['alexnet', ['features.11', 'features.12']],
 'V3v': ['alexnet', ['features.11', 'features.12']],
 'V3d': ['alexnet', ['features.11', 'features.12']],
 'hV4': ['ZFNet', 'features.stage3.pool3'],
 'EBA': ['ZFNet', 'features.stage3.pool3'],
 'FBA-1': ['ZFNet', 'features.stage3.pool3'],
 'FBA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-bodies': nan,
 'OFA': ['ZFNet', 'features.stage3.pool3'],
 'FFA-1': ['ZFNet', 'features.stage3.pool3'],
 'FFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-faces': nan,
 'aTL-faces': nan,
 'OPA': ['ZFNet', 'features.stage3.pool3'],
 'PPA': ['ZFNet', 'features.stage3.pool3'],
 'RSC': ['ZFNet', 'features.stage3.pool3'],
 'OWFA': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-1': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mfs-words': ['ZFNet', 'features.sta

In [7]:
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

for subj in list(range(1, 9)):
    print(f'############################ {color.BOLD + color.RED} Subject: {str(subj)} {color.END + color.END} ############################ \n')
    if test_the_layers == True:
        '''
        If test_the_layers is True, we will test the layers of the models defined in test_models_layers one by one.
        At the end of the testing procedure, we will save the correlation values and visualitazions for each layer
        separately. 
        One single config file will be created at the end of the testing procedure, associating the best layer for
        each ROI. 
        '''
        print(f'######## Starting the {color.RED} LAYERS TESTING PROCEDURE {color.END} (limited to subj 1) ######## \n')
        totale_layers_to_test = sum(len(lista) for lista in test_models_layers.values())
        counter_layers_to_test = 0
        for feature_model_type, model_layers in test_models_layers.items():
            for model_layer in model_layers:
                counter_layers_to_test += 1
                print(f'######## Testing Model: {color.BOLD + feature_model_type + color.END} Layer: {color.BOLD + model_layer + color.END} {counter_layers_to_test}/{totale_layers_to_test} ############################ \n')
    if test_the_layers:
        print("Testing rotine is completed and executed only for the first subject. Exiting...")
        #sys.exit()  # Termina completamente l'esecuzione dello script

############################ [1m[91m Subject: 1 [0m[0m ############################ 

######## Starting the [91m LAYERS TESTING PROCEDURE [0m (limited to subj 1) ######## 

######## Testing Model: [1malexnet[0m Layer: [1mfeatures.4[0m 1/4 ############################ 

######## Testing Model: [1malexnet[0m Layer: [1mfeatures.11[0m 2/4 ############################ 

######## Testing Model: [1mZFNet[0m Layer: [1mfeatures.4[0m 3/4 ############################ 

######## Testing Model: [1mZFNet[0m Layer: [1mfeatures.11[0m 4/4 ############################ 

Testing rotine is completed and executed only for the first subject. Exiting...
############################ [1m[91m Subject: 2 [0m[0m ############################ 

######## Starting the [91m LAYERS TESTING PROCEDURE [0m (limited to subj 1) ######## 

######## Testing Model: [1malexnet[0m Layer: [1mfeatures.4[0m 1/4 ############################ 

######## Testing Model: [1malexnet[0m Layer: [1mfeatures

In [8]:
subj = 1
print(submission_name + "\n")
print('############################ Subject: ' + str(subj) + ' ############################ \n')
# Definining paths to data and submission directories ##
args = argObj(subj, data_home_dir, data_dir, parent_submission_dir, ncsnr_dir, images_trials_dir, save)
# Obtain the indices of the training, validation and test data
idxs_train, idxs_val, idxs_test, train_imgs_paths, test_imgs_paths = args.images_idx_splitter(train_percentage)

# Defining the images data loaderds
data_loaders = data_loaders_stimuli_fmri(idxs_train, 
                                            idxs_val, 
                                            idxs_test, 
                                            train_imgs_paths, 
                                            test_imgs_paths,
                                            lh_fmri_path = args.lh_fmri,
                                            rh_fmri_path = args.rh_fmri)

train_imgs_dataloader, val_imgs_dataloader, test_imgs_dataloader = data_loaders.images_dataloader(batch_size, transform)


(2023-08-11_20-11)-ALEXNET_features.4+features.11_ZFNET_features.4+features.11-PCA_100-RIDGE-ALPHA_1.0e+05

############################ Subject: 1 ############################ 

## Stimulus Images Loading: Info
Total train images: 9841
Training stimulus images: 8857
Validation stimulus images: 984
Test stimulus images: 159




## Prediction masking

Fake preds

In [54]:
lh_fmri_val_pred = np.random.rand(200, 19004)
rh_fmri_val_pred = np.random.rand(200, 20544)
lh_fmri_val_pred

array([[0.58941392, 0.95896382, 0.08835963, ..., 0.39384017, 0.60003216,
        0.56150445],
       [0.69153232, 0.94607038, 0.74390669, ..., 0.98634444, 0.10680846,
        0.92221459],
       [0.43205871, 0.27646631, 0.70142702, ..., 0.15766113, 0.36889967,
        0.84922873],
       ...,
       [0.13768773, 0.95278614, 0.49167684, ..., 0.26299793, 0.57794542,
        0.58881766],
       [0.29831251, 0.84926733, 0.10170244, ..., 0.85600533, 0.65356542,
        0.95430676],
       [0.66434695, 0.57276844, 0.89109163, ..., 0.29000872, 0.99928911,
        0.66327764]])

Importing the one-hot masks (subj specific)

In [89]:
args.roi_dir_enhanced

'../Datasets/Biomedical/algonauts_2023_challenge_data\\subj01\\roi_masks_enhanced'

In [50]:
import pandas as pd
roi_masks_enhanced_path_df =  os.path.join(args.roi_dir_enhanced, 'roi_df')
lh_roi_challenge_onehot = pd.read_csv(os.path.join(roi_masks_enhanced_path_df, 'lh_challenge_onehot.csv'))
rh_roi_challenge_onehot = pd.read_csv(os.path.join(roi_masks_enhanced_path_df, 'rh_challenge_onehot.csv'))

In [52]:
rh_roi_challenge_onehot

Unnamed: 0,V1v,V1d,V2v,V2d,V3v,V3d,hV4,EBA,FBA-1,FBA-2,...,mfs-words,mTL-words,early,midventral,midlateral,midparietal,ventral,lateral,parietal,Unknown
0,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20539,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
20540,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
20541,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
20542,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


ROI-model-layer dicts (subj specific)

In [47]:
import json

global_config_dir = './files/config/global'
extraction_config_file = 'config_0.45.json'
with open(os.path.join(global_config_dir, extraction_config_file), 'r') as json_file:
                final_extraction_config = json.load(json_file)
final_extraction_config['V1v'] = ['alexnet', ['features.11', 'features.12']]
final_extraction_config

{'V1v': ['alexnet', ['features.11', 'features.12']],
 'V1d': ['alexnet', 'features.11'],
 'V2v': ['alexnet', 'features.11'],
 'V2d': ['alexnet', 'features.11'],
 'V3v': ['alexnet', 'features.11'],
 'V3d': ['alexnet', 'features.11'],
 'hV4': ['ZFNet', 'features.stage3.pool3'],
 'EBA': ['ZFNet', 'features.stage3.pool3'],
 'FBA-1': ['ZFNet', 'features.stage3.pool3'],
 'FBA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-bodies': nan,
 'OFA': ['ZFNet', 'features.stage3.pool3'],
 'FFA-1': ['ZFNet', 'features.stage3.pool3'],
 'FFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-faces': nan,
 'aTL-faces': nan,
 'OPA': ['ZFNet', 'features.stage3.pool3'],
 'PPA': ['ZFNet', 'features.stage3.pool3'],
 'RSC': ['ZFNet', 'features.stage3.pool3'],
 'OWFA': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-1': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mfs-words': ['ZFNet', 'features.stage3.pool3'],
 'mTL-words': nan,
 'early': ['alexnet', 'features.11'],
 'midventral': 

In [69]:
# remove useless ROIs from the config file
keys_to_remove = ['All vertices', 'Unknown ROI', 'Unknown Stream']
for key in keys_to_remove:
    if key in final_extraction_config:
        del final_extraction_config[key]
final_extraction_config

{'V1v': ['alexnet', ['features.11', 'features.12']],
 'V1d': ['alexnet', 'features.11'],
 'V2v': ['alexnet', 'features.11'],
 'V2d': ['alexnet', 'features.11'],
 'V3v': ['alexnet', 'features.11'],
 'V3d': ['alexnet', 'features.11'],
 'hV4': ['ZFNet', 'features.stage3.pool3'],
 'EBA': ['ZFNet', 'features.stage3.pool3'],
 'FBA-1': ['ZFNet', 'features.stage3.pool3'],
 'FBA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-bodies': nan,
 'OFA': ['ZFNet', 'features.stage3.pool3'],
 'FFA-1': ['ZFNet', 'features.stage3.pool3'],
 'FFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mTL-faces': nan,
 'aTL-faces': nan,
 'OPA': ['ZFNet', 'features.stage3.pool3'],
 'PPA': ['ZFNet', 'features.stage3.pool3'],
 'RSC': ['ZFNet', 'features.stage3.pool3'],
 'OWFA': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-1': ['ZFNet', 'features.stage3.pool3'],
 'VWFA-2': ['ZFNet', 'features.stage3.pool3'],
 'mfs-words': ['ZFNet', 'features.stage3.pool3'],
 'mTL-words': nan,
 'early': ['alexnet', 'features.11'],
 'midventral': 

In [70]:
def json_config_to_feature_extraction_dicti(config_dict):
    new_dict = {}
    for key, value in config_dict.items():
        # If the value is not a list or is NaN, skip this iteration
        if not isinstance(value, list) or (isinstance(value, float) and math.isnan(value)):
            continue
        first_string, second_string = value
        
        # If the first string is not a key in the new dictionary, add an empty list
        if first_string not in new_dict:
            new_dict[first_string] = []
        
        # Add the second string to the corresponding list only if it's not already present
        if second_string not in new_dict[first_string]:
            new_dict[first_string].append(second_string)

    # Sort the second strings in each list alphabetically
    # for first_string in new_dict:
        
    #     new_dict[first_string].sort()
    return new_dict

In [71]:
from src.visualize import json_config_to_feature_extraction_dict

final_models_layers = json_config_to_feature_extraction_dicti(final_extraction_config)
final_models_layers

{'alexnet': [['features.11', 'features.12'], 'features.11'],
 'ZFNet': ['features.stage3.pool3']}

In [78]:
 

# itero sui modelli e sulle feature
for feature_model_type, model_layers in final_models_layers.items():
    for model_layer in model_layers:
        # rigenerola struttura ['model', ['features.11', 'features.12']] dei valori di config
        model_layer_id = [feature_model_type, model_layer]
        print([feature_model_type, model_layer])
        # verifico in config i quali ROI sono associati a quella coppia model-layer 
        keys_with_target_value = [key for key, value in final_extraction_config.items() if value == model_layer_id]
        print(keys_with_target_value)
        ### create the voxel mask for the current model-layer-specific set of ROIs
        # subset of ROIs from the one-hot encoded dataframe
        lh_roi_challenge_onehot_subset = lh_roi_challenge_onehot[keys_with_target_value]
        rh_roi_challenge_onehot_subset = rh_roi_challenge_onehot[keys_with_target_value]
        lh_mask = []
        rh_mask = []
        for index, row in lh_roi_challenge_onehot_subset.iterrows():
            if row.sum() == 1:
                lh_mask.append(1)
            elif row.sum() == 0:
                lh_mask.append(0)
            else:
                raise ValueError(f"Errore: la riga {index} ha più di un 1.")
        for index, row in rh_roi_challenge_onehot_subset.iterrows():
            if row.sum() == 1:
                rh_mask.append(1)
            elif row.sum() == 0:
                rh_mask.append(0)
            else:
                raise ValueError(f"Errore: la riga {index} ha più di un 1.")
        lh_mask = np.array(lh_mask)
        rh_mask = np.array(rh_mask)
        print(lh_mask, sum(lh_mask))
        print(rh_mask)
        

['alexnet', ['features.11', 'features.12']]
['V1v']
[0 0 0 ... 0 0 0] 710
[0 0 0 ... 0 0 0]
['alexnet', 'features.11']
['V1d', 'V2v', 'V2d', 'V3v', 'V3d', 'early']
[0 0 0 ... 0 0 0] 4788
[1 0 0 ... 0 0 0]
['ZFNet', 'features.stage3.pool3']
['hV4', 'EBA', 'FBA-1', 'FBA-2', 'OFA', 'FFA-1', 'FFA-2', 'OPA', 'PPA', 'RSC', 'OWFA', 'VWFA-1', 'VWFA-2', 'mfs-words', 'midventral', 'midlateral', 'midparietal', 'ventral', 'lateral', 'parietal', 'Unknown']
[1 1 1 ... 1 1 1] 13506
[0 1 1 ... 1 1 1]


In [75]:
len(lh_mask)
sum(lh_mask)

13506

In [84]:
lh_fmri_val_pred_final = np.copy(lh_fmri_val_pred)
lh_fmri_val_pred_final[:, lh_mask == 0] = np.nan
lh_fmri_val_pred_final
np.isnan(lh_fmri_val_pred_final[0]).sum()

5498

In [86]:
matrix1 = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

matrix2 = np.array([
    [10, 20, 30],
    [40, 50, 60],
    [70, 80, 90]
])

vector = np.array([0, 1, 0])

# Sostituisci le colonne della prima matrice con quelle della seconda
result_matrix = np.where(vector == 1, matrix2, matrix1)
result_matrix

array([[ 1, 20,  3],
       [ 4, 50,  6],
       [ 7, 80,  9]])

In [None]:
# creating

In [None]:
model, feature_extractor = model_loader(feature_model_type, model_layer, device)


In [4]:
# Fit the PCA model
if compute_pca:
    # Fit the PCA model
    pca_batch_size, n_stacked_batches = pca_batch_calculator(len(idxs_train),
                                                            batch_size,
                                                            min_pca_batch_size,
                                                            pca_component)
    
    pca = fit_pca(feature_extractor,
                    train_imgs_dataloader,
                    pca_component,
                    n_stacked_batches,
                    pca_batch_size,
                    device)
    print("Comulative Explained variance ratio: ", sum(pca.explained_variance_ratio_))
    print("Number of components: ", pca.n_components_)
    
    print('## Extracting features from training, validation and test data...')
    features_train = extract_and_pca_features(feature_extractor, train_imgs_dataloader, pca, n_stacked_batches, device)
    features_val = extract_and_pca_features(feature_extractor, val_imgs_dataloader, pca, n_stacked_batches, device)
    features_test = extract_and_pca_features(feature_extractor, test_imgs_dataloader, pca, n_stacked_batches, device)
    
    # print("\n")
    # print('## Checking and Freeing  GPU memory...')
    # memory_checker()
    model.to('cpu') # sposto sulla ram
    feature_extractor.to('cpu') # sposto sulla ram
    del model, feature_extractor, pca, train_imgs_dataloader, val_imgs_dataloader, test_imgs_dataloader  # elimino dalla ram
    torch.cuda.empty_cache() # elimino la chache vram
    gc.collect() # elimino la cache ram
    # memory_checker()
else:
    print('## Extracting features from training, validation and test data...')
    features_train = extract_features_no_pca(feature_extractor, train_imgs_dataloader, device)
    features_val = extract_features_no_pca(feature_extractor, val_imgs_dataloader, device)
    features_test = extract_features_no_pca(feature_extractor, test_imgs_dataloader, device)
    
    model.to('cpu') # sposto sulla ram
    feature_extractor.to('cpu') # sposto sulla ram
    del model, feature_extractor, train_imgs_dataloader, val_imgs_dataloader, test_imgs_dataloader  # elimino dalla ram
    torch.cuda.empty_cache() # elimino la chache vram
    gc.collect() # elimino la cache ram



## Calculating PCA batch size...
Batches size: 64
Total train instances: 8857
PCA components: 300
Minimum pca batch size: 500
Number of stacked batches for pca: 10
PCA batch size (batch_size * n_stacked_batches): 640
Last pca batch size: 537
## Fitting Incremental PCA (300 components) to training data...


100%|██████████| 139/139 [01:46<00:00,  1.30it/s]


Comulative Explained variance ratio:  0.6437733876120811
Number of components:  300
## Extracting features from training, validation and test data...


100%|██████████| 139/139 [01:11<00:00,  1.93it/s]


Inital features number: 9216, final features number: 300


100%|██████████| 16/16 [00:07<00:00,  2.10it/s]


Inital features number: 9216, final features number: 300


100%|██████████| 3/3 [00:01<00:00,  2.36it/s]

Inital features number: 9216, final features number: 300





In [7]:
lh_fmri_train, lh_fmri_val, rh_fmri_train, rh_fmri_val = data_loaders.fmri_splitter()

In [109]:
ciao = pd.read_csv('D:\Projects\Thesis\\files\config\global\\alpha_subj_layer.csv', index_col=0)
ciao

Unnamed: 0,alpha_l,alpha_r
ZFNet+features.stage3.pool3,0.328243,0.349637


In [114]:
model_layer_id = 'ZFNet+features.stage3.pool3'
alpha_l_value = ciao.loc[model_layer_id, 'alpha_l']
alpha_r_value = ciao.loc[model_layer_id, 'alpha_r']
print('alpha_l_value: ', alpha_l_value)

alpha_l_value:  0.3282425553637004


In [107]:
# eliminate from ciao all columns but V1v
ciaov1v = ciao[['V1v', 'V1d']]
# eliminate all rows but the first 
ciaov1v = pd.DataFrame(ciaov1v.iloc[[0]])
# rename the first column to alpha_l and the second to alpha_r
ciaov1v = ciaov1v.rename(columns={'V1v': 'alpha_l', 'V1d': 'alpha_r'})
ciaov1v.to_csv('D:\Projects\Thesis\\files\config\global\\alpha_subj_layer_1.csv')

In [108]:
best_alpha_l = 0.1
best_alpha_r = 0.2
model_layer_id = 'layer123'

# Creazione del DataFrame in modo compatto
df = pd.DataFrame({
    'alpha_l': pd.Series([best_alpha_l], index=[model_layer_id]),
    'alpha_r': pd.Series([best_alpha_r], index=[model_layer_id])
})
df

Unnamed: 0,alpha_l,alpha_r
layer123,0.1,0.2


# Ridge/Linear Regression

## Grid Search (ridgeR)

In [5]:
## Fit the linear model ##
print('\n ## Fit Encoder and Predict...')
lh_fmri_train, lh_fmri_val, rh_fmri_train, rh_fmri_val = data_loaders.fmri_splitter()
print('LH fMRI number of vertices:', lh_fmri_train.shape)
print('RH fMRI number of vertices:', rh_fmri_train.shape)
# param_grid = {'alpha': [0.0001, 0.0002, 0.001, 0.01, 0.1, 1, 10, 100, 1e3, 5e3, 1e4, 2e4, 5e4, 1e5, 1e6]}

param_grid = {'alpha': [1, 10, 100, 1e3, 5e3, 1e4, 2e4, 5e4, 1e5, 2e5, 5e5, 1e6]}
#param_grid = {'alpha': [1e6, 2e6, 5e6, 1e7, 2e7, 5e7]}


 ## Fit Encoder and Predict...


LH fMRI number of vertices: (8857, 19004)
RH fMRI number of vertices: (8857, 20544)


In [5]:
grid_search_l = GridSearchCV(Ridge(), param_grid=param_grid, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
grid_search_l.fit(X=features_train, y=lh_fmri_train)
print("Best Param: {}".format(grid_search_l.best_params_))
print("Best Score: {}".format(grid_search_l.best_score_))
alpha_l = grid_search_l.best_params_['alpha']

Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Param: {'alpha': 100000.0}
Best Score: 0.4171876543623486


In [23]:
alpha_r = alpha_l = 1e5

In [16]:
print(alpha_r)

1000000.0


In [7]:
grid_search_r = GridSearchCV(Ridge(), param_grid=param_grid, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
grid_search_r.fit(X=features_train, y=rh_fmri_train)
print("Best Param: {}".format(grid_search_r.best_params_))
print("Best Score: {}".format(grid_search_r.best_score_))
alpha_r = grid_search_r.best_params_['alpha']

Fitting 5 folds for each of 10 candidates, totalling 50 fits


## Predict and evaluate 

In [24]:
lh_fmri_val_pred, lh_fmri_test_pred, rh_fmri_val_pred, rh_fmri_test_pred = linear_regression(regression_type, 
                                                                                                features_train, 
                                                                                                features_val, 
                                                                                                features_test, 
                                                                                                lh_fmri_train, 
                                                                                                rh_fmri_train, 
                                                                                                save,
                                                                                                args.subject_test_submission_dir,
                                                                                                alpha_l,
                                                                                                alpha_r,
                                                                                                grid_search= False)

noise_norm_corr_dict[f'lh_{subj}'], noise_norm_corr_dict[f'rh_{subj}'] = median_squared_noisenorm_correlation(lh_fmri_val_pred, 
                                                                                                                rh_fmri_val_pred,
                                                                                                                lh_fmri_val,
                                                                                                                rh_fmri_val,
                                                                                                                args.data_dir,
                                                                                                                args.ncsnr_dir,
                                                                                                                args.images_trials_dir,
                                                                                                                idxs_val)
print("\n Score -> Median Noise Normalized Squared Correlation Percentage (LH and RH)")
print("LH subj",subj,"| Score: ",np.median(noise_norm_corr_dict[f'lh_{subj}'])*100)
print("RH subj",subj,"| Score: ",np.median(noise_norm_corr_dict[f'rh_{subj}'])*100)

Fitting ridge regressions on the training data...
Predicting fMRI data on the validation and test data...
Computing the correlation between the predicted and actual fMRI data...


100%|██████████| 19004/19004 [00:01<00:00, 13989.54it/s]
100%|██████████| 20544/20544 [00:01<00:00, 14226.32it/s]


 Score -> Median Noise Normalized Squared Correlation Percentage (LH and RH)
LH subj 1 | Score:  44.3897836454705
RH subj 1 | Score:  43.29265074582257





# Support Vector Regression with non-linear kernels

## Grid Search

In [6]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor

In [7]:
print('\n ## Fit Encoder and Predict...')
lh_fmri_train, lh_fmri_val, rh_fmri_train, rh_fmri_val = data_loaders.fmri_splitter()
print('LH fMRI number of vertices:', lh_fmri_train.shape)
print('RH fMRI number of vertices:', rh_fmri_train.shape)


 ## Fit Encoder and Predict...
LH fMRI number of vertices: (8857, 19004)
RH fMRI number of vertices: (8857, 20544)


In [11]:
svr = MultiOutputRegressor(SVR(),n_jobs=-1)

In [8]:
param_grid = {'estimator__C': [0.1, 1, 10, 100], 
              'estimator__epsilon': [0.01, 0.1, 1, 10],  
              'estimator__kernel': ['rbf', 'poly']}
# 'gamma': [0.1, 1, 10, 100],
# Crea un oggetto SVR

grid_search_svr_l = GridSearchCV(svr, param_grid=param_grid, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=10)

# Addestra il modello SVR utilizzando la griglia di ricerca degli iperparametri
grid_search_svr_l.fit(X=features_train, y=lh_fmri_train)

# Ottieni i migliori parametri trovati dalla griglia di ricerca
best_params_svr_l = grid_search_svr_l.best_params_

print("Best Param: {}".format(best_params_svr_l))
print("Best Score: {}".format(grid_search_svr_l.best_score_))


Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params_svr_r = best_params_svr_l

In [None]:
reg_svr_l = SVR(C=best_params_svr_l['C'], epsilon=best_params_svr_l['epsilon'], gamma="auto", kernel=best_params_svr_l['kernel'])
reg_svr_l.fit(features_train, lh_fmri_train)

reg_svr_r = SVR(C=best_params_svr_r['C'], epsilon=best_params_svr_r['epsilon'], gamma="auto", kernel=best_params_svr_r['kernel'])
reg_svr_r.fit(features_train, rh_fmri_train)

No GridSearch

In [None]:
reg_svr_l = MultiOutputRegressor(SVR(gamma="auto", kernel='rbf'),n_jobs=-1)
reg_svr_l.fit(features_train, lh_fmri_train)

In [None]:

reg_svr_r = MultiOutputRegressor(SVR(gamma="auto", kernel='rbf'),n_jobs=-1)
reg_svr_r.fit(features_train, rh_fmri_train)

## Prediction and evaluation

In [28]:
lh_fmri_val_pred = reg_svr_l.predict(features_val)
rh_fmri_val_pred = reg_svr_l.predict(features_val)

NameError: name 'reg_svr_l' is not defined

In [None]:
noise_norm_corr_dict[f'lh_{subj}'], noise_norm_corr_dict[f'rh_{subj}'] = median_squared_noisenorm_correlation(lh_fmri_val_pred, 
                                                                                                                rh_fmri_val_pred,
                                                                                                                lh_fmri_val,
                                                                                                                rh_fmri_val,
                                                                                                                args.data_dir,
                                                                                                                args.ncsnr_dir,
                                                                                                                args.images_trials_dir,
                                                                                                                idxs_val)
print("\n Score -> Median Noise Normalized Squared Correlation Percentage (LH and RH)")
print("LH subj",subj,"| Score: ",np.median(noise_norm_corr_dict[f'lh_{subj}'])*100)
print("RH subj",subj,"| Score: ",np.median(noise_norm_corr_dict[f'rh_{subj}'])*100)

# Regression Trees

## Grid Search

In [10]:
from sklearn.tree import DecisionTreeRegressor

In [11]:
print('\n ## Fit Encoder and Predict...')
lh_fmri_train, lh_fmri_val, rh_fmri_train, rh_fmri_val = data_loaders.fmri_splitter()
print('LH fMRI number of vertices:', lh_fmri_train.shape)
print('RH fMRI number of vertices:', rh_fmri_train.shape)


 ## Fit Encoder and Predict...
LH fMRI number of vertices: (8857, 19004)
RH fMRI number of vertices: (8857, 20544)


In [7]:
# param_grid = {'max_depth': [None, 5, 10, 15],
#               'min_samples_split': [2, 5, 10],
#               'min_samples_leaf': [1, 2, 4],
#               'max_features': ['auto', 'sqrt', 'log2']}

param_grid = {'max_depth': [None, 5]}

In [8]:
tree_reg = DecisionTreeRegressor()
grid_search_tree_l = GridSearchCV(tree_reg, param_grid=param_grid, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=10)

grid_search_tree_l.fit(X=features_train, y=lh_fmri_train)

Fitting 5 folds for each of 2 candidates, totalling 10 fits


In [None]:
# Ottieni i migliori parametri trovati dalla griglia di ricerca
best_params_tree_l = grid_search_tree_l.best_params_

print("Best Param: {}".format(best_params_tree_l))
print("Best Score: {}".format(grid_search_tree_l.best_score_))

In [None]:
best_params_tree_r = best_params_tree_l

CV Fit

In [None]:
reg_tree_l = DecisionTreeRegressor(max_depth=best_params_tree_l['max_depth'],
                                 min_samples_split=best_params_tree_l['min_samples_split'],
                                 min_samples_leaf=best_params_tree_l['min_samples_leaf'],
                                 max_features=best_params_tree_l['max_features'])
reg_tree_l.fit(features_train, lh_fmri_train)

reg_tree_r = DecisionTreeRegressor(max_depth=best_params_tree_r['max_depth'],
                                 min_samples_split=best_params_tree_r['min_samples_split'],
                                 min_samples_leaf=best_params_tree_r['min_samples_leaf'],
                                 max_features=best_params_tree_r['max_features'])
reg_tree_r.fit(features_train, lh_fmri_train)

No CV fit

In [12]:
reg_svr_l = DecisionTreeRegressor(criterion = "squared_error", max_depth=5, max_features = None)
reg_svr_l.fit(features_train, lh_fmri_train)

In [18]:
reg_svr_r = DecisionTreeRegressor(max_features = "auto")
reg_svr_r.fit(features_train, rh_fmri_train)



In [13]:
reg_svr_r = reg_svr_l

## Predict and evaluate

In [16]:
lh_fmri_val_pred = reg_svr_l.predict(features_val)
rh_fmri_val_pred = reg_svr_l.predict(features_val)
rh_fmri_val_pred = rh_fmri_val

In [17]:
noise_norm_corr_dict[f'lh_{subj}'], noise_norm_corr_dict[f'rh_{subj}'] = median_squared_noisenorm_correlation(lh_fmri_val_pred, 
                                                                                                                rh_fmri_val_pred,
                                                                                                                lh_fmri_val,
                                                                                                                rh_fmri_val,
                                                                                                                args.data_dir,
                                                                                                                args.ncsnr_dir,
                                                                                                                args.images_trials_dir,
                                                                                                                idxs_val)
print("\n Score -> Median Noise Normalized Squared Correlation Percentage (LH and RH)")
print("LH subj",subj,"| Score: ",np.median(noise_norm_corr_dict[f'lh_{subj}'])*100)
print("RH subj",subj,"| Score: ",np.median(noise_norm_corr_dict[f'rh_{subj}'])*100)

Computing the correlation between the predicted and actual fMRI data...


100%|██████████| 19004/19004 [00:01<00:00, 14119.53it/s]
100%|██████████| 20544/20544 [00:00<00:00, 22544.37it/s]


 Score -> Median Noise Normalized Squared Correlation Percentage (LH and RH)
LH subj 1 | Score:  16.77595593341632
RH subj 1 | Score:  100.0





# Visualize

In [10]:
histogram(args.data_dir, noise_norm_corr_dict[f'lh_{subj}'], 
          noise_norm_corr_dict[f'rh_{subj}'], 
          submission_name, 
          save = args.subject_images_submission_dir)



TypeError: expected str, bytes or os.PathLike object, not bool

In [None]:
box_plot(args.data_dir, noise_norm_corr_dict[f'lh_{subj}'], 
          noise_norm_corr_dict[f'rh_{subj}'], 
          submission_name, 
          save = args.subject_images_submission_dir)