In [3]:
import os
import sys
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import loadmat
from matplotlib.ticker import MultipleLocator
import nibabel as nib
import pickle
from importlib import reload
import h5py
from nilearn import plotting
import nibabel as nib
from sklearn.cross_decomposition import PLSRegression
import seaborn as sns
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import r2_score
from colorama import Fore, Style

os.chdir('/home/rfpred')
sys.path.append('/home/rfpred')
sys.path.append('/home/rfpred/envs/rfenv/lib/python3.11/site-packages/')
sys.path.append('/home/rfpred/envs/rfenv/lib/python3.11/site-packages/nsdcode')

In [11]:

class DataFetch():
    
    def __init__(self):
        pass
    # Function to get the visual contrast features and predictability estimates
    # IMPROVE: make sure that it also works for all subjects later on. Take subject arg, clean up paths.
    def features(self):
        feature_paths = [
            './data/custom_files/all_visfeats_rms.pkl',
            './data/custom_files/all_visfeats_rms_crop_prior.pkl',
            '/home/rfpred/data/custom_files/all_visfeats_scce.pkl',
            '/home/rfpred/data/custom_files/all_visfeats_scce_large.pkl',
            '/home/rfpred/data/custom_files/subj01/pred/all_predestims.h5'
        ]
        return {os.path.basename(file): self._fetch_file(file) for file in feature_paths}
    # Function to get the pRF-based voxel selections
    # IMPROVE: make sure that it also works for all subjects later on. Take subject arg, clean up paths.
    def prf_selections(self):
        prf_selection_paths = [
            './data/custom_files/subj01/prf_mask_center_strict.pkl',
            './data/custom_files/subj01/prf_mask_central_strict_l.pkl',
            './data/custom_files/subj01/prf_mask_central_halfloose.pkl',
            './data/custom_files/subj01/prf_mask_central_loose.pkl',
            './data/custom_files/subj01/prf_mask_periphery_strict.pkl'
        ]
        return {os.path.basename(file): self._fetch_file(file) for file in prf_selection_paths}
    
    def _fetch_file(self, file_path:str):
        """
        General function to acquire saved data from various file types
        file_type: str, the types of files to be fetched, either features or prf_selections
        """
        _, ext = os.path.splitext(file_path)
        
        # Check if file is of .h5 type
        if ext == '.h5':
            with h5py.File(file_path, 'r') as hf:
                data = hf.keys()
                return {key: np.array(hf[key]).flatten() for key in data}
        # Check if file is of .pkl type
        elif ext == '.pkl':
            with open(file_path, 'rb') as fp:
                return pickle.load(fp)

In [13]:
class NatSpatPred():
    
    def __init__(self, datapath:str='/home/rfpred/data', verbose:bool=False):
        self.datafetch = None
        self._datapath = datapath
        self._verbose = verbose
        self.subjects = sorted(os.listdir(f'{datapath}/natural-scenes-dataset/nsddata/ppdata'), key=lambda s: int(s.split('subj')[-1]))
        self.rois = None
        self.roi_masks = None
        self.prf_dict = None
        self.anat_temps = None
        self.feats = None
        self.prfloc_vox_selections = None
        self.attributes = None
        self.attributes_unfiltered = None


    # TODO: Expand this initialise in such way that it creates all the globally relevant attributes by calling on methods from the
    # nested classes
    def initialise(self):
        self.datafetch = DataFetch()
        self.rois, self.roi_masks = self._make_visrois_dict()
        self.prf_dict = self._write_prf_dict()
        self.anat_temps = self._get_anat_templates()
        self.feats = self.datafetch.features()
        self.prfloc_vox_selections = self.datafetch.prf_selections()
        self.attributes = [attr for attr in dir(self) if not attr.startswith('_')] # Filter out both the 'dunder' and hidden methods
        self.attributes_unfiltered = [attr for attr in dir(self) if not attr.startswith('__')] # Filter out only the 'dunder' methods
        print(f'Naturalistic Spatial Prediction class: {Fore.LIGHTGREEN_EX}Initialised{Style.RESET_ALL}')
        print('\nClass contains the following attributes:')
        for attr in self.attributes:
            print(f"{Fore.LIGHTWHITE_EX}\t.{attr}{Style.RESET_ALL}")


    def _make_visrois_dict(self):
        rois = []
        binary_masks = {}

        for subj_no in range(1, len(self.subjects) + 1):
            if self._verbose:
                print(f'Fetching roi masks for subject {Fore.LIGHTBLUE_EX}{subj_no}{Style.RESET_ALL}')
            mask_dir = f'{self._datapath}/natural-scenes-dataset/nsddata/ppdata/subj0{subj_no}/func1mm/roi'

            # read in and sort all the filenames in the mapped masks folder for each subject
            non_binary_masks = sorted([file for file in os.listdir(mask_dir) if '_mask.nii' in file])
            subj_binary_masks = {}

            for idx, mask_file in enumerate(non_binary_masks):
                # Load the mask file
                subj_binary_masks[non_binary_masks[idx][:-7]] = (nib.load(os.path.join(mask_dir, mask_file)).get_fdata()).astype(int)
            if self._verbose:
                # Print the amount of non-zero voxels in the roi
                for key, subj_binary_mask in subj_binary_masks.items():
                    print(f" - Voxels in {Fore.BLUE}{key[:2]}{Style.RESET_ALL}: {np.sum(subj_binary_mask)}")
                    
            binary_masks[f'subj0{subj_no}'] = subj_binary_masks
            rois = [roi[:2] for roi in binary_masks['subj01'].keys()]
        return rois, binary_masks
    
    # Function to create a list solely containing roi-based voxels
    def _roi_filter(self, roi_mask, input_array, nan2null:bool=False):
        roi_ices = np.argwhere(roi_mask != 0)

        # Create list that only contains the voxels of the specific roi
        roi_ar = np.column_stack((roi_ices, input_array[roi_ices[:, 0], roi_ices[:, 1], roi_ices[:, 2]]))

        # Turn the nan values into zeros for the angle parameter
        if nan2null:
            output_roi = np.nan_to_num(roi_ar, nan=0)
            
        # Filter away the nan values
        output_roi = roi_ar[~np.isnan(roi_ar).any(axis=1)]
        rounded_output_roi = np.round(roi_ar, 5)
        
        # Set print options to control precision and suppress scientific notation
        np.set_printoptions(precision=5, suppress=True)
        
        return rounded_output_roi
        
    # Function to load in nifti (.nii.gz) data and create some useful variables 
    def _get_dat(self, path:str):
        full_dat = nib.load(path)
        dat_array = full_dat.get_fdata()
        
        # Calculate the range of values
        flat_arr = dat_array[~np.isnan(dat_array)]
        dat_dim = dat_array.shape

        return full_dat, dat_array, dat_dim, {'min': round(np.nanmin(flat_arr),7), 'max': np.nanmax(flat_arr), 'mean': round(np.nanmean(flat_arr),5)}
    
    # This function provides a dictionary with all the pRF data for all subjects and rois
    def _write_prf_dict(self):
        prf_dict = {}

        # Make a loop to go over all the subjects
        for subject in self.subjects:
            prf_dict[subject] = {}
            prf_dict[subject]['nsd_dat'] = {}
            
            # Initialize dictionaries if they don't exist
            prf_dict[subject]['proc'] = {}

            # Get the overall prf results, save them in a dict
            prf_types = ['angle', 'eccentricity', 'exponent', 'gain', 'meanvol', 'R2', 'size']

            for prf_type in prf_types:
                prf_path = f'{self._datapath}/natural-scenes-dataset/nsddata/ppdata/{subject}/func1mm/prf_{prf_type}.nii.gz'
                prf_dat, prf_ar, prf_dim, prf_range = self._get_dat(prf_path)
                prf_dict[subject]['nsd_dat'][prf_type] = {
                    'prf_dat': prf_dat,
                    'prf_ar': prf_ar,
                    'prf_dim': prf_dim,
                    'prf_range': prf_range
                }
            roi_list =  [f'{roistr}_mask' for roistr in self.rois]
            for roi in roi_list:
                prf_dict[subject]['proc'][roi] = {
                    prf_type : None for prf_type in prf_types
                } 
                for prf_type in prf_types:
                    prf_dict[subject]['proc'][roi][prf_type] = self._roi_filter(self.roi_masks[subject][roi], prf_dict[subject]['nsd_dat'][prf_type]['prf_ar'])

            # Calculate the linear pRF sigma values, these tend to be smaller and don't take
            # into account the nonlinear relationship between input and neural respons
                lin_sigmas = prf_dict[subject]['proc'][roi]['size'][:,3] * np.sqrt(prf_dict[subject]['proc'][roi]['exponent'][:,3])
                prf_dict[subject]['proc'][roi]['lin_sigma'] = np.column_stack([prf_dict[subject]['proc'][roi]['size'][:,0:3], lin_sigmas])

        return prf_dict
    
    def _get_anat_templates(self):
        # Get subject-specific T1 anatomical maps to use as base for later overlays
        anat_temps = {}
        for subject in self.prf_dict.keys():
            anat_temps[subject] = nib.load(f'{self._datapath}/natural-scenes-dataset/nsddata/ppdata/{subject}/func1mm/T1_to_func1mm.nii.gz')
        return anat_temps
    


In [14]:
NSP = NatSpatPred()
NSP.initialise()

Naturalistic Spatial Prediction class: [92mInitialised[0m

Class contains the following attributes:
[97m	.anat_temps[0m
[97m	.attributes[0m
[97m	.attributes_unfiltered[0m
[97m	.datafetch[0m
[97m	.feats[0m
[97m	.initialise[0m
[97m	.prf_dict[0m
[97m	.prfloc_vox_selections[0m
[97m	.roi_masks[0m
[97m	.rois[0m
[97m	.subjects[0m


In [20]:
print(NSP.prfloc_vox_selections['prf_mask_center_strict.pkl']['subj01']['V1_mask'].shape)

print(NSP.roi_masks['subj01']['V1_mask'].shape)


(145, 186, 148)
(145, 186, 148)


In [21]:
import psutil
import os

def print_memory_usage():
    process = psutil.Process(os.getpid())
    print(f"Memory usage: {process.memory_info().rss / 1024 ** 2} MB")

print_memory_usage()

Memory usage: 10375.7265625 MB


In [24]:
def print_user_variables():
    print("\n".join("%s: %s" % item for item in globals().items() if not item[0].startswith("__")))

# print_user_variables()

In [25]:
import psutil
import os

def print_cpu_usage():
    process = psutil.Process(os.getpid())
    print(f"CPU usage: {process.cpu_percent()}%")

print_cpu_usage()

CPU usage: 0.0%


In [28]:
import sys

x = "Hello, world!"
print(f"Memory usage of x: {sys.getsizeof(NSP.roi_masks)} bytes")

Memory usage of x: 272 bytes


In [None]:
# AlexNet(
#   (features): Sequential(
#     (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
#     (1): ReLU(inplace=True)
#     (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
#     (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
#     (4): ReLU(inplace=True)
#     (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
#     (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (7): ReLU(inplace=True)
#     (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (9): ReLU(inplace=True)
#     (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (11): ReLU(inplace=True)
#     (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
#   )
#   (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
#   (classifier): Sequential(
#     (0): Dropout(p=0.5, inplace=False)
#     (1): Linear(in_features=9216, out_features=4096, bias=True)
#     (2): ReLU(inplace=True)
#     (3): Dropout(p=0.5, inplace=False)
#     (4): Linear(in_features=4096, out_features=4096, bias=True)
#     (5): ReLU(inplace=True)
#     (6): Linear(in_features=4096, out_features=1000, bias=True)
#   )
# )