In [1]:
import numpy as np
import nibabel as nib
from dipy.io import read_bvals_bvecs
from dipy.core.gradients import gradient_table

In [2]:
subject_id = "100206"
subject_path = f"diffusion_data/{subject_id}/T1w/Diffusion"
dwi_img = nib.load(f'{subject_path}/data.nii.gz')
mask_img = nib.load(f'{subject_path}/nodif_brain_mask.nii.gz')

In [3]:
# Convert to numpy arrays for processing
dwi_data = dwi_img.get_fdata()
mask = mask_img.get_fdata()

In [4]:
print(f"DWI data shape: {dwi_data.shape}")  # Should be (X, Y, Z, num_volumes)
print(f"Mask shape: {mask.shape}")          # Should be (X, Y, Z)

DWI data shape: (145, 174, 145, 288)
Mask shape: (145, 174, 145)


In [5]:
# Load gradient information (bvals and bvecs)
print("\nLoading gradient information...")
bvals, bvecs = read_bvals_bvecs(f'{subject_path}/bvals', 
                               f'{subject_path}/bvecs')
print(f"Number of gradient directions: {len(bvals)}")
print(f"bvals shape: {bvals.shape}")     # Should match number of volumes
print(f"bvecs shape: {bvecs.shape}")     # Should be (num_volumes, 3)


Loading gradient information...
Number of gradient directions: 288
bvals shape: (288,)
bvecs shape: (288, 3)


In [6]:
# Create gradient table for DIPY
gtab = gradient_table(bvals, bvecs)

In [8]:
# Identify and extract B0 (non-diffusion weighted) volumes
b0_mask = gtab.b0s_mask
b0_data = dwi_data[..., b0_mask]
print(f"\nNumber of B0 volumes: {np.sum(b0_mask)}")
print(f"B0 data shape: {b0_data.shape}")
# Average B0 volumes to get a single reference image
b0_avg = np.mean(b0_data, axis=-1)
print(f"Average B0 shape: {b0_avg.shape}")


Number of B0 volumes: 18
B0 data shape: (145, 174, 145, 18)
Average B0 shape: (145, 174, 145)


In [9]:
# Extract and normalize diffusion weighted volumes
dwi_mask = ~b0_mask  # Mask for diffusion weighted volumes
dwi_vols = dwi_data[..., dwi_mask]
print(f"\nNumber of DWI volumes: {np.sum(dwi_mask)}")
print(f"DWI volumes shape: {dwi_vols.shape}")


Number of DWI volumes: 270
DWI volumes shape: (145, 174, 145, 270)


In [10]:
# Normalize DWI volumes by B0 (avoid division by zero with small epsilon)
dwi_norm = dwi_vols / (b0_avg[..., None] + 1e-6)
print(f"Normalized DWI shape: {dwi_norm.shape}")

Normalized DWI shape: (145, 174, 145, 270)


In [15]:
# Find valid voxels using the brain mask
valid_idx = np.where(mask > 0)
print(f"\nNumber of valid voxels in mask: {len(valid_idx[0])}")


Number of valid voxels in mask: 936256


In [22]:
# Sample random voxels for training
n_samples = 1000  # Adjust this number based on your needs
sample_idx = np.random.choice(len(valid_idx[0]), 
                            min(n_samples, len(valid_idx[0])), 
                            replace=False)

In [25]:
# Extract features (signal intensities) from sampled voxels
features = []
for idx in sample_idx:
    x, y, z = valid_idx[0][idx], valid_idx[1][idx], valid_idx[2][idx]
    features.append(dwi_norm[x, y, z, :])

In [37]:
features = np.array(features)
gradient_directions = bvecs[dwi_mask]  # Only keep directions for DWI volumes

In [40]:
print("\nFinal data shapes:")
print(f"Features shape: {features.shape}")           # Should be (n_samples, n_directions)
print(f"Gradient directions shape: {gradient_directions.shape}")  # Should be (n_directions, 3)

# Basic sanity checks
print("\nSanity checks:")
print(f"Max normalized value: {np.max(features)}")
print(f"Min normalized value: {np.min(features)}")
print(f"Gradient directions magnitude close to 1: {np.allclose(np.linalg.norm(gradient_directions, axis=1), 1, atol=1e-3)}")


Final data shapes:
Features shape: (1000, 270)
Gradient directions shape: (270, 3)

Sanity checks:
Max normalized value: 2.5465984403372413
Min normalized value: 0.0
Gradient directions magnitude close to 1: True
