In [1]:
import os, sys

os.chdir('Petreanu_MEI_generation')
sys.path.append(os.getcwd())

print(os.getcwd())

d:\Python\Anastasia\Petreanu Lab\Petreanu_MEI_generation


In [2]:
import sys
import os
import shutil
import numpy as np
import glob
from tqdm.auto import tqdm
import datetime
from scipy.stats import zscore
from pathlib import Path
from sensorium.utility.training import read_config, set_seed
from nnfabrik.builder import get_data, get_model
from sensorium.utility import prediction
from sklearn.decomposition import PCA, NMF

# Set working directory to root of repo
current_path = os.getcwd()
# Identify if path has 'molanalysis' as a folder in it
if 'Petreanu_MEI_generation' in current_path:
    # If so, set the path to the root of the repo
    current_path = current_path.split('Petreanu_MEI_generation')[0] + 'Petreanu_MEI_generation'
else:
    raise FileNotFoundError(
        f'This needs to be run somewhere from within the Petreanu_MEI_generation folder, not {current_path}')
os.chdir(current_path)
sys.path.append(current_path)

run_config = read_config('run_config.yaml') # Must be set

RUN_NAME = run_config['current_vals']['RUN_NAME'] # MUST be set. Creates a subfolder in the runs folder with this name, containing data, saved models, etc. IMPORTANT: all values in this folder WILL be deleted.
RUN_FOLDER = run_config['current_vals']['RUN_FOLDER']
INPUT_FILES = f'{RUN_FOLDER}/data_preprocessed' # relative to the root directory (Petreanu_MEI_generation)

if run_config['ASK_FOR_CONFIRMATION']:
    input(f'This will delete all files in the {RUN_FOLDER}/data folder. Press Enter to continue or Ctrl+C to cancel.')
else:
    print(f'This will delete all files in the {RUN_FOLDER}/data folder. Automatically continuing...')

# delete all files in the run folder
if os.path.exists(f'{RUN_FOLDER}/data'):
    print(f'Deleting existing folder {RUN_FOLDER}/data')
    shutil.rmtree(f'{RUN_FOLDER}/data')
else:
    os.makedirs(RUN_FOLDER, exist_ok=True)

# copy data to the run folder
print(f'Copying data from {INPUT_FILES} to {RUN_FOLDER}/data')
shutil.copytree(INPUT_FILES, f'{RUN_FOLDER}/data')

# Created by Anastasia Simonoff for the Leopoldo Petreanu lab at the Champalimaud Centre for the Unknown.
# Created on 11 Oct 2024
# Based off of work by Adrian Roggenbach

# Create additional variables

# ## Part 1: Merge individual trial files into one matrix

# Loading data from individual files is sometimes slow on the CSCS Piz Daint. Therefore the dataloader was adapted to load in one file containing all trials.
# 
# This notebook reads in the individual trial files and saves one larger file which contains all trials. The data is saved in the folder merged_data with the file name corresponding to the variable

set_seed(4534)

# Add folders two levels deep from INPUT_FILES into a list
data_folder_in = f'{RUN_FOLDER}/data'
folders = [os.path.join(data_folder_in, name) for name in os.listdir(
    data_folder_in) if os.path.isdir(os.path.join(data_folder_in, name)) and not "merged_data" in name]
# Second level
folders = [os.path.join(folder, name) for folder in folders for name in os.listdir(
    folder) if os.path.isdir(os.path.join(folder, name)) and not "merged_data" in name]
folders = [x.replace("\\", "/") for x in folders]

print(f'Folders to process: {folders}')

for folder in folders:
    print('Working on folder: {}'.format(folder), flush=True)

    # find the different data folders with individual files per trial
    subfolders = glob.glob(os.path.join(folder, 'data', '*'))
    subnames = [os.path.basename(f) for f in subfolders]
    # subnames are e.g. ['behavior', 'pupil_center', 'images', 'responses']

    # create new directory if it does not exist
    save_folder = os.path.join(folder, 'merged_data')
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    # iterate through all folders, load individual files and save as one large matrix
    for subname in subnames:
        if subname == 'respmat.npy':
            continue
        data_folder = os.path.join(folder, 'data', subname)
        nr_files = len(glob.glob(os.path.join(data_folder, '*')))
        shape = np.load(os.path.join(data_folder, '0.npy')).shape
        data = np.empty((nr_files, *shape))

        for nr in tqdm(range(nr_files), desc=subname):
            data[nr] = np.load(os.path.join(data_folder, '{}.npy'.format(nr)))

        # (nr_trials, *) e.g. (5983, 3) for behavior
        data_mat = np.array(data)

        # assert no nans
        assert not np.isnan(data_mat).any(), f'Nans in data_mat {subname}'
        
        np.save(os.path.join(save_folder, '{}.npy'.format(subname)), data_mat)

# ## Part 2: Extract trial order from timestamps and save

all_sorting = dict()

for folder in folders:
    print('Working on: ', folder)

    # read and sort timestamps
    ts_file = '/meta/trials/frame_trial_ts.npy'
    ts_raw = np.load(os.path.join(folder + ts_file), allow_pickle=True)

    ts = list()
    for string in ts_raw:
        t = datetime.datetime.strptime(string[11:-2], '%Y-%m-%d %H:%M:%S')
        ts.append(t)
    ts = np.array(ts)

    ts_argsort = np.argsort(ts)

    # save the trial_id and sorted_id as .npy arrays in merged_data
    nr_trials = len(ts_argsort)
    trial_id = np.arange(nr_trials, dtype=int)
    # transform to 2d array for batch loader
    trial_id = np.expand_dims(trial_id, axis=1)

    sort_id = np.copy(ts_argsort)
    # transform to 2d array for batch loader
    sort_id = np.expand_dims(sort_id, axis=1)

    # extract rank of each trial. This array maps each trial value to it's position
    # in time. Rank value 2 means that this value occured at time 2
    rank = np.argsort(ts_argsort)  # rank is double argsorted time
    rank_id = np.expand_dims(rank, axis=1)

    np.save(os.path.join(folder, 'merged_data', 'trial_id.npy'), trial_id)
    np.save(os.path.join(folder, 'merged_data', 'sort_id.npy'), sort_id)
    np.save(os.path.join(folder, 'merged_data', 'rank_id.npy'), rank_id)

    # add sorting to dict to save the sorting for all keys together
    # data_key = folder.split("static")[-1].split("-GrayImageNet")[0]
    data_key = f"{folder.split('/')[-2]}-{folder.split('/')[-2]}_{folder.split('/')[-1]}-0"
    all_sorting[data_key] = ts_argsort

    # read in behavior data and sort as sanity check
    behav_file = os.path.join(folder, 'merged_data/behavior.npy')
    behav_data = np.load(behav_file)
    sort_behav = behav_data[ts_argsort]

    # plt.figure(figsize=(12, 4))
    # plt.plot(sort_behav[0:600, 0], label='Pupil')
    # plt.plot(sort_behav[0:600, 2] * 10, label='Running 10x')
    # plt.legend()

data_folder = os.path.dirname(folder)  # go back to main data directory
data_folder = Path(data_folder).parents[1]
data_folder = str(data_folder)
np.save(os.path.join(data_folder, 'dataset_sortings.npy'), all_sorting)

# ## Part 3: Create additional regressors

# ### First pass the neural activity through model for correct normalization


This will delete all files in the runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data folder. Automatically continuing...
Deleting existing folder runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data
Copying data from runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data_preprocessed to runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data
Folders to process: ['runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE09665/2023_03_20', 'runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10883/2023_10_23', 'runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10883/2023_10_31', 'runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10884/2023_10_12', 'runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10885/2023_10_20', 'runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10919/2023_11_09', 'runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE11086/2023_12_1

behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on folder: runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10883/2023_10_23


behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on folder: runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10883/2023_10_31


behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on folder: runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10884/2023_10_12


behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on folder: runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10885/2023_10_20


behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on folder: runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10919/2023_11_09


behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on folder: runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE11086/2023_12_16


behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on folder: runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE11086/2024_01_09


behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on folder: runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE11495/2024_02_29


behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on folder: runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE11998/2024_05_08


behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on folder: runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE12223/2024_06_11


behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on folder: runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE13998/2025_03_10


behavior:   0%|          | 0/5600 [00:00<?, ?it/s]

images:   0%|          | 0/5600 [00:00<?, ?it/s]

pupil_center:   0%|          | 0/5600 [00:00<?, ?it/s]

responses:   0%|          | 0/5600 [00:00<?, ?it/s]

Working on:  runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE09665/2023_03_20
Working on:  runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10883/2023_10_23
Working on:  runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10883/2023_10_31
Working on:  runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10884/2023_10_12
Working on:  runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10885/2023_10_20
Working on:  runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE10919/2023_11_09
Working on:  runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE11086/2023_12_16
Working on:  runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE11086/2024_01_09
Working on:  runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE11495/2024_02_29
Working on:  runs/V1_MEI_in_vivo_validation_with_grid_mean_predictor_2/data/LPE11998/2024_05_08
Working on:  runs/V1_MEI_in_vivo_validat

In [3]:

dataset_fn = 'sensorium.datasets.static_loaders'
dataset_config = {'paths': folders,
                  'normalize': True,
                  'include_behavior': True,
                  'include_eye_position': True,
                  'batch_size': 128,
                  'scale': .25,
                  'preload_from_merged_data': True,
                  'adjusted_normalization': True,  # new normalization for behavior variables
                  'include_trial_id': True,
                  }


In [4]:
dataset_config['paths'] = dataset_config['paths']

In [5]:
dataloaders = get_data(dataset_fn, dataset_config)
dataloaders

  threshold = 0.01 * s.mean()
  ret = ret.dtype.type(ret / rcount)


[]
[]
[]


UnboundLocalError: local variable 'data_key' referenced before assignment