In [1]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np

In [16]:
def sigzi(x, axis=None):
    """
Compute the interquartile range (IQR) of x along the specified axis.
    Args:
        x: array-like, shape (P, H, W) or (H, W) or (N, C, H, W)
        axis: axis along which to compute the IQR.
              If None, computes over the flattened array.

    Returns: float, the IQR of x.

    """
    return 0.741 * (np.percentile(x, 75, axis=axis) - np.percentile(x, 25, axis=axis))

def split_stack(arr, nrows, ncols):
    """
    Split a stack of 2D panels into (nrows × ncols) tiles.
    arr: ndarray, shape (P, H, W)
    Returns: ndarray, shape (P * (H//nrows)*(W//ncols), nrows, ncols)
    """
    P, H, W = arr.shape
    pad_h = (-H) % nrows
    pad_w = (-W) % ncols
    if pad_h or pad_w:
        arr = np.pad(arr,
                     ((0, 0),
                      (0, pad_h),
                      (0, pad_w)),
                     mode='constant',
                     constant_values=0)
    H2, W2 = arr.shape[1], arr.shape[2]
    blocks = (arr
              .reshape(P,
                       H2 // nrows, nrows,
                       W2 // ncols, ncols)
              .swapaxes(2, 3))
    P2, Hb, Wb, nr, nc = blocks.shape
    out = blocks.reshape(P2 * Hb * Wb, nr, nc)
    return out

def build_datasets(npz_file, tile_size=128):
    """
    Load data from .npz, clip exactly as TF did, split into tiles, return PyTorch tensors.
      - Clips x to [-166.43, 169.96]
      - Splits each large image into (tile_size × tile_size) patches
      - Adds a channel dimension (→ shape (N, 1, tile_size, tile_size))
    """
    data = np.load(npz_file)
    x = data['x']  # shape (P, H, W)
    y = data['y']

    x = x/sigzi(x)  # normalize by interquartile range
    x = np.clip(x, -5, 5) # clip to [-5, 5]

    # Split into tiles (tile_size × tile_size)
    x_tiles = split_stack(x, tile_size, tile_size)  # (N_tiles, tile_size, tile_size)
    y_tiles = split_stack(y, tile_size, tile_size)

    # Convert to FloatTensor and add channel dimension
    x_tiles = torch.from_numpy(x_tiles).float().unsqueeze(1)  # (N, 1, tile_size, tile_size)
    y_tiles = torch.from_numpy(y_tiles).float().unsqueeze(1)  # (N, 1, tile_size, tile_size)

    return x_tiles, y_tiles

def split_train_val(x_tiles, y_tiles, train_frac=0.8, seed=42):
    """
    Shuffle and split x_tiles, y_tiles into two TensorDatasets: train (80%) and val (20%).
    """
    n = x_tiles.shape[0]
    idx = torch.randperm(n, generator=torch.Generator().manual_seed(seed))
    split = int(train_frac * n)
    train_idx = idx[:split]
    val_idx   = idx[split:]
    # sort indices to keep order in each dataset
    train_idx, val_idx = train_idx.sort().values, val_idx.sort().values
    x_tr, y_tr = x_tiles[train_idx], y_tiles[train_idx]
    x_val, y_val = x_tiles[val_idx], y_tiles[val_idx]
    return TensorDataset(x_tr, y_tr), TensorDataset(x_val, y_val)

In [4]:
x_test, y_test = np.load("../DATA/test.npz").values() # load the npz file

In [5]:
x_tiles, y_tiles = build_datasets("../DATA/test.npz", tile_size=128)
# 5) Split into train/val datasets:
test_ds, _ = split_train_val(x_tiles, y_tiles, train_frac=1, seed=42)

IndexError: index 18613 is out of bounds for dimension 1 with size 1

In [17]:
test_ds, _ = split_train_val(x_tiles, y_tiles, train_frac=1, seed=42)