In [12]:
# Import required libraries
import numpy as np
from sklearn.model_selection import train_test_split
from datetime import datetime
import matplotlib.pyplot as plt
import time
import tensorflow as tf
from pathlib import Path
import multiprocessing
import concurrent.futures

# Set random seeds for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

# Global configuration
INPUT_SHAPE = (128, 128, 128, 1)
BATCH_SIZE = 8
LEARNING_RATE = 0.001
EPOCHS = 200
VALIDATION_SPLIT = 0.2

# Basic GPU configuration (handles CUDA compatibility issues)
try:
    gpus = tf.config.list_physical_devices("GPU")
    if gpus:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print(f"✅ GPU configured: {len(gpus)} device(s) found")
    else:
        print("⚠️  No GPU found - using CPU")
except Exception as e:
    print(f"⚠️  GPU setup failed: {e} - using CPU fallback")

print("✅ Environment setup complete")

✅ GPU configured: 1 device(s) found
✅ Environment setup complete


In [21]:
def load_volume(file_path, target_shape=(128, 128, 128)):
    try:
        with np.load(file_path) as data:
            # Get volume data
            volume = data['img']
            
            volume = (volume - volume.min()) / (volume.max() - volume.min() + 1e-8)
            return volume.astype(np.float32)
    except Exception as e:
        print(f"❌ Failed: {file_path.name}")
        return None

def load_dataset(num_samples=None):
    # Paths
    
    raw_dir = Path('/mnt/home/dchhantyal/centroid_model_blastospim/data/raw/')
    labels_dir = Path('/mnt/home/dchhantyal/centroid_model_blastospim/data/labels/Blast')
    
    # Get data files from all subdirectories
    data_files = sorted(list(raw_dir.rglob('*.npz')))
    if num_samples:
        data_files = data_files[:num_samples]
    
    print(f"📦 Processing {len(data_files)} files...")
    
    volumes, centroids = [], []
    
    # Process with threading for I/O
    def process_file(file_path):
        volume = load_volume(file_path)
        if volume is None:
            return None, None
            
        # Load corresponding label
        label_file = labels_dir / f"label_{file_path.stem}" / "data" / "label.npz"
        try:
            with np.load(label_file) as label_data:
                centroid = label_data['centroid']
                # Normalize centroid [0,1]
                norm_centroid = np.array([
                    centroid[0], 
                    centroid[1], 
                    centroid[2]
                ], dtype=np.float32)
                print("volumne shape ",volume.shape, " file name:", file_path.stem)
                return volume, norm_centroid
        except:
            print(file_path, " loading failed")
            return None, None
    
    # Parallel processing
    with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
        results = list(executor.map(process_file, data_files))
    
    # Collect successful results
    for volume, centroid in results:
        if volume is not None:
            volumes.append(volume)
            centroids.append(centroid)
    
    print(f"✅ Loaded {len(volumes)} samples successfully")
    return np.array(volumes), np.array(centroids)

In [22]:
# Load Dataset
print("🚀 Loading dataset...")
start_time = time.time()

# Load data 
X, y = load_dataset(num_samples=20)

# Add channel dimension for CNN
X = X[..., np.newaxis]

# Train/validation split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=VALIDATION_SPLIT, random_state=RANDOM_SEED
)

loading_time = time.time() - start_time

print(f"✅ Dataset loaded successfully!")
print(f"📊 Training set: {X_train.shape}, {y_train.shape}")
print(f"📊 Validation set: {X_val.shape}, {y_val.shape}")
print(f"⏱️  Loading time: {loading_time:.2f} seconds")
print(f"💾 Memory usage: {(X.nbytes + y.nbytes) / (1024**3):.2f} GB")

🚀 Loading dataset...
📦 Processing 20 files...
volumne shape  (91, 2048, 2048)  file name: Blast_001
volumne shape  (96, 2048, 2048)  file name: Blast_002
volumne shape  (96, 2048, 2048)  file name: Blast_003
volumne shape  (101, 2048, 2048)  file name: Blast_005
volumne shape  (101, 2048, 2048)  file name: Blast_008
volumne shape  (101, 2048, 2048)  file name: Blast_007
volumne shape  (101, 2048, 2048)  file name: Blast_006
volumne shape  (101, 2048, 2048)  file name: Blast_004
volumne shape  (101, 2048, 2048)  file name: Blast_009
volumne shape  (101, 2048, 2048)  file name: Blast_010
volumne shape  (101, 2048, 2048)  file name: Blast_011
volumne shape  (101, 2048, 2048)  file name: Blast_016
volumne shape  (101, 2048, 2048)  file name: Blast_013
volumne shape  (101, 2048, 2048)  file name: Blast_015
volumne shape  (101, 2048, 2048)  file name: Blast_012
volumne shape  (101, 2048, 2048)  file name: Blast_014
volumne shape  (101, 2048, 2048)  file name: Blast_019
volumne shape  (101, 2

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (20,) + inhomogeneous part.