In [None]:
%pip install -q numpy scipy h5py tqdm

In [None]:
%pip install -q numpy scipy h5py tqdm

In [None]:
from pathlib import Path
import numpy as np
from tqdm import tqdm

BASE_DIR = Path('/content/drive/MyDrive/algonauts_2023_tutorial_data')  # 公式チュートリアルのデータ配置先
SUBJECT = 'subj01'  # subj01〜subj08
USE_RELIABILITY_MASK = False  # 信頼度マスクを掛ける場合 True
TIME_AGG = 'mean'  # 'mean' | 'median' | 'first' | 'last'
SAMPLE_INDEX = 0  # 抽出サンプル index (test→val→train の優先順)

ROOT = BASE_DIR / SUBJECT
print('ROOT:', ROOT)

## 1. パス解決 (左右別ファイル)

In [None]:
def path_for(split: str, hemi: str):
    # 例: training_split/training_fmri/lh_training_fmri.npy
    candidates = [
        ROOT / f'{split}_split' / f'{split}_fmri' / f'{hemi}_{split}_fmri.npy',
        ROOT / f'{split}_fmri' / f'{hemi}_{split}_fmri.npy',
    ]
    for p in candidates:
        if p.exists():
            return p
    return None

paths = {
    'training_lh': path_for('training', 'lh'),
    'training_rh': path_for('training', 'rh'),
    'val_lh': path_for('val', 'lh'),
    'val_rh': path_for('val', 'rh'),
    'test_lh': path_for('test', 'lh'),
    'test_rh': path_for('test', 'rh'),
    'mask_lh': ROOT / 'reliability_masks' / 'lh_reliability_mask.npy',
    'mask_rh': ROOT / 'reliability_masks' / 'rh_reliability_mask.npy',
    'mask_all': ROOT / 'reliability_masks' / 'reliability_mask.npy',
}
for k, v in paths.items():
    print(f"{k}: {v if v and v.exists() else None}")

if not (paths['training_lh'] and paths['training_rh']):
    raise FileNotFoundError('training の左右 fMRI が見つかりません。パスを確認してください。')

## 2. ロード + 時間集約 + 左右結合

In [None]:
def reduce_time(x: np.ndarray, mode: str):
    if x.ndim == 2:  # (n, voxels)
        return x
    if x.ndim != 3:
        raise ValueError(f'Unexpected ndim {x.ndim}, expected 2 or 3')
    if mode == 'mean':
        return x.mean(axis=1)
    if mode == 'median':
        return np.median(x, axis=1)
    if mode == 'first':
        return x[:, 0]
    if mode == 'last':
        return x[:, -1]
    raise ValueError(f'Unknown TIME_AGG: {mode}')

def load_split(split: str):
    lh_path = paths[f'{split}_lh']
    rh_path = paths[f'{split}_rh']
    if lh_path is None or rh_path is None:
        return None
    lh = np.load(lh_path)
    rh = np.load(rh_path)
    lh = reduce_time(lh, TIME_AGG)
    rh = reduce_time(rh, TIME_AGG)
    if lh.shape[0] != rh.shape[0]:
        raise ValueError(f'LH/RH sample count mismatch: {lh.shape} vs {rh.shape}')
    return np.concatenate([lh, rh], axis=-1)

train = load_split('training')
val = load_split('val')
test = load_split('test')
print('train shape:', None if train is None else train.shape)
print('val shape:', None if val is None else val.shape)
print('test shape:', None if test is None else test.shape)

## 3. 信頼度マスク (任意) 適用

In [None]:
def apply_mask(arr, mask):
    return arr[:, mask] if arr is not None else None

if USE_RELIABILITY_MASK:
    if paths['mask_all'].exists():
        mask = np.load(paths['mask_all']).astype(bool)
        train = apply_mask(train, mask)
        val = apply_mask(val, mask)
        test = apply_mask(test, mask)
    else:
        mask_l = np.load(paths['mask_lh']).astype(bool) if paths['mask_lh'].exists() else None
        mask_r = np.load(paths['mask_rh']).astype(bool) if paths['mask_rh'].exists() else None
        if mask_l is None or mask_r is None:
            raise FileNotFoundError('信頼度マスクが見つかりません。USE_RELIABILITY_MASK=False にするかパスを確認してください。')
        mask = np.concatenate([mask_l, mask_r])
        train = apply_mask(train, mask)
        val = apply_mask(val, mask)
        test = apply_mask(test, mask)
    print('mask applied, new shapes:', None if train is None else train.shape)
else:
    print('mask not applied')

## 4. z-score (train 統計) と無分散ボクセル除外

In [None]:
if train is None:
    raise RuntimeError('train split が読み込めません。パス設定を確認してください。')
mu = train.mean(axis=0, keepdims=True)
sigma = train.std(axis=0, keepdims=True)
nonzero = (sigma > 1e-8).ravel()
train = train[:, nonzero]
val = val[:, nonzero] if val is not None else None
test = test[:, nonzero] if test is not None else None
mu = mu[:, nonzero]
sigma = sigma[:, nonzero]
train_z = (train - mu) / sigma
val_z = (val - mu) / sigma if val is not None else None
test_z = (test - mu) / sigma if test is not None else None
print('kept voxels:', train_z.shape[1])

## 5. 1 サンプル抽出して voxel.npy 保存

In [None]:
source_arr = test_z if test_z is not None else val_z if val_z is not None else train_z
if source_arr is None:
    raise RuntimeError('抽出元がありません')
SAMPLE_INDEX = min(SAMPLE_INDEX, source_arr.shape[0]-1)
voxel_vec = source_arr[SAMPLE_INDEX].astype(np.float32)
np.save('voxel.npy', voxel_vec)
print('saved voxel.npy', voxel_vec.shape, voxel_vec.dtype)

## 6. 簡易チェック

In [None]:
print('train_z mean/std:', train_z.mean(), train_z.std())
if test_z is not None: print('test_z mean/std:', test_z.mean(), test_z.std())
if val_z is not None: print('val_z mean/std:', val_z.mean(), val_z.std())
print('voxel head:', voxel_vec[:5])

In [None]:
%pip install -q numpy scipy h5py tqdm

In [None]:
from pathlib import Path
import numpy as np
from tqdm import tqdm

BASE_DIR = Path('/content/drive/MyDrive/algonauts_2023_tutorial_data')  # 公式チュートリアルのデータ配置先
SUBJECT = 'subj01'  # subj01〜subj08
USE_RELIABILITY_MASK = False  # 信頼度マスクを掛ける場合 True
TIME_AGG = 'mean'  # 'mean' | 'median' | 'first' | 'last'
SAMPLE_INDEX = 0  # 抽出サンプル index (test→val→train の優先順)

ROOT = BASE_DIR / SUBJECT
print('ROOT:', ROOT)

## 1. パス解決 (左右別ファイル)

In [None]:
def path_for(split: str, hemi: str):
    # 例: training_split/training_fmri/lh_training_fmri.npy
    candidates = [
        ROOT / f'{split}_split' / f'{split}_fmri' / f'{hemi}_{split}_fmri.npy',
        ROOT / f'{split}_fmri' / f'{hemi}_{split}_fmri.npy',
    ]
    for p in candidates:
        if p.exists():
            return p
    return None

paths = {
    'train_lh': path_for('training', 'lh'),
    'train_rh': path_for('training', 'rh'),
    'val_lh': path_for('val', 'lh'),
    'val_rh': path_for('val', 'rh'),
    'test_lh': path_for('test', 'lh'),
    'test_rh': path_for('test', 'rh'),
    'mask_lh': ROOT / 'reliability_masks' / 'lh_reliability_mask.npy',
    'mask_rh': ROOT / 'reliability_masks' / 'rh_reliability_mask.npy',
    'mask_all': ROOT / 'reliability_masks' / 'reliability_mask.npy',
}
for k, v in paths.items():
    print(f"{k}: {v if v and v.exists() else None}")

if not (paths['train_lh'] and paths['train_rh']):
    raise FileNotFoundError('training の左右 fMRI が見つかりません。パスを確認してください。')

## 2. ロード + 時間集約 + 左右結合

In [None]:
def reduce_time(x: np.ndarray, mode: str):
    if x.ndim == 2:  # (n, voxels)
        return x
    if x.ndim != 3:
        raise ValueError(f'Unexpected ndim {x.ndim}, expected 2 or 3')
    if mode == 'mean':
        return x.mean(axis=1)
    if mode == 'median':
        return np.median(x, axis=1)
    if mode == 'first':
        return x[:, 0]
    if mode == 'last':
        return x[:, -1]
    raise ValueError(f'Unknown TIME_AGG: {mode}')

def load_split(split: str):
    lh_path = paths[f'{split}_lh']
    rh_path = paths[f'{split}_rh']
    if lh_path is None or rh_path is None:
        return None
    lh = np.load(lh_path)
    rh = np.load(rh_path)
    lh = reduce_time(lh, TIME_AGG)
    rh = reduce_time(rh, TIME_AGG)
    if lh.shape[0] != rh.shape[0]:
        raise ValueError(f'LH/RH sample count mismatch: {lh.shape} vs {rh.shape}')
    return np.concatenate([lh, rh], axis=-1)

train = load_split('train') if 'train_lh' in paths else load_split('training')
if train is None:
    train = load_split('training')
val = load_split('val')
test = load_split('test')
print('train shape:', None if train is None else train.shape)
print('val shape:', None if val is None else val.shape)
print('test shape:', None if test is None else test.shape)

## 3. 信頼度マスク (任意) 適用

In [None]:
def apply_mask(arr, mask):
    return arr[:, mask] if arr is not None else None

if USE_RELIABILITY_MASK:
    if paths['mask_all'].exists():
        mask = np.load(paths['mask_all']).astype(bool)
        train = apply_mask(train, mask)
        val = apply_mask(val, mask)
        test = apply_mask(test, mask)
    else:
        mask_l = np.load(paths['mask_lh']).astype(bool) if paths['mask_lh'].exists() else None
        mask_r = np.load(paths['mask_rh']).astype(bool) if paths['mask_rh'].exists() else None
        if mask_l is None or mask_r is None:
            raise FileNotFoundError('信頼度マスクが見つかりません。USE_RELIABILITY_MASK=False にするかパスを確認してください。')
        mask = np.concatenate([mask_l, mask_r])
        train = apply_mask(train, mask)
        val = apply_mask(val, mask)
        test = apply_mask(test, mask)
    print('mask applied, new shapes:', None if train is None else train.shape)
else:
    print('mask not applied')

## 4. z-score (train 統計) と無分散ボクセル除外

In [None]:
if train is None:
    raise RuntimeError('train split が読み込めません。パス設定を確認してください。')
mu = train.mean(axis=0, keepdims=True)
sigma = train.std(axis=0, keepdims=True)
nonzero = (sigma > 1e-8).ravel()
train = train[:, nonzero]

In [None]:
from pathlib import Path
import numpy as np
from tqdm import tqdm

BASE_DIR = Path('/content/drive/MyDrive/algonauts_2023_tutorial_data')  # Colab Drive 上の公式チュートリアルデータ
SUBJECT = 'subj01'  # subj01〜subj08
USE_RELIABILITY_MASK = False  # 信頼度マスクを掛ける場合 True
TIME_AGG = 'mean'  # 'mean' | 'median' | 'first' | 'last'
SAMPLE_INDEX = 0  # 抽出サンプル index (test→val→train 優先)

ROOT = BASE_DIR / SUBJECT
print('ROOT:', ROOT)

## 1. パス解決 (左右別ファイル)

In [None]:
def path_for(split: str, hemi: str):
    # 例: training_split/training_fmri/lh_training_fmri.npy
    candidates = [
        ROOT / f'{split}_split' / f'{split}_fmri' / f'{hemi}_{split}_fmri.npy',
        ROOT / f'{split}_fmri' / f'{hemi}_{split}_fmri.npy',
    ]
    for p in candidates:
        if p.exists():
            return p
    return None

paths = {
    'train_lh': path_for('training', 'lh'),
    'train_rh': path_for('training', 'rh'),
    'val_lh': path_for('val', 'lh'),
    'val_rh': path_for('val', 'rh'),
    'test_lh': path_for('test', 'lh'),
    'test_rh': path_for('test', 'rh'),
    'mask_lh': ROOT / 'reliability_masks' / 'lh_reliability_mask.npy',
    'mask_rh': ROOT / 'reliability_masks' / 'rh_reliability_mask.npy',
    'mask_all': ROOT / 'reliability_masks' / 'reliability_mask.npy',
}
for k, v in paths.items():
    print(f

if not (paths['train_lh'] and paths['train_rh']):
    raise FileNotFoundError('training の左右 fMRI が見つかりません。パスを確認してください。')

## 2. ロード + 時間集約 + 左右結合

In [None]:
def reduce_time(x: np.ndarray, mode: str):
    if x.ndim == 2:  # (n, voxels)
        return x
    if x.ndim != 3:
        raise ValueError(f'Unexpected ndim {x.ndim}, expected 2 or 3')
    if mode == 'mean':
        return x.mean(axis=1)
    if mode == 'median':
        return np.median(x, axis=1)
    if mode == 'first':
        return x[:, 0]