In [2]:
from datasets import load_dataset
import torchvision.transforms as T
from train_cnn_imagenet import SafeImageNetDataset

train_dataset = load_dataset("imagenet-1k", split="train").shuffle(seed=42).select(range(100))
eval_dataset = load_dataset("imagenet-1k", split="validation").shuffle(seed=42).select(range(100))

mean = [0.485, 0.456, 0.406]
std  = [0.229, 0.224, 0.225]

# Define the data augmentation and preprocessing pipeline for training images
train_transform = T.Compose([
    T.Lambda(lambda x: x.convert('RGB') if x.mode != 'RGB' else x),  # Ensure 3 channels (convert grayscale to RGB)
    T.RandomResizedCrop(224, scale=(0.08, 1.0)),      # Randomly crop and resize to 224x224 (simulates zoom/scale)
    T.RandomHorizontalFlip(),                         # Randomly flip images horizontally (augmentation)
    T.RandAugment(num_ops=2, magnitude=9),            # Apply 2 random augmentations with magnitude 9 (extra augmentation)
    T.ToTensor(),                                     # Convert PIL Image or numpy.ndarray to tensor and scale to [0, 1]
    T.Normalize(mean, std),                           # Normalize using ImageNet mean and std
    T.RandomErasing(p=0.25, scale=(0.02, 0.1)),       # Randomly erase a rectangle region (extra augmentation, 25% chance)
])

# Define the preprocessing pipeline for evaluation images (no heavy augmentation)
eval_transform = T.Compose([
    T.Lambda(lambda x: x.convert('RGB') if x.mode != 'RGB' else x),  # Ensure 3 channels (convert grayscale to RGB)
    T.Resize(256),                                    # Resize shorter side to 256 pixels
    T.CenterCrop(224),                                # Crop the center 224x224 region
    T.ToTensor(),                                     # Convert to tensor and scale to [0, 1]
    T.Normalize(mean, std),                           # Normalize using ImageNet mean and std
])

def train_transform_fn(examples):
    # Handle both single examples and batches
    if isinstance(examples['image'], list):
        # Batch processing
        examples["pixel_values"] = [train_transform(image) for image in examples["image"]]
    else:
        # Single example processing  
        examples["pixel_values"] = train_transform(examples["image"])

    # Remove the original image to avoid DataLoader issues
    del examples["image"]
    return examples

def eval_transform_fn(examples):
    # Handle both single examples and batches
    if isinstance(examples['image'], list):
        # Batch processing
        examples["pixel_values"] = [eval_transform(image) for image in examples["image"]]
    else:
        # Single example processing
        examples["pixel_values"] = eval_transform(examples["image"])

    # Remove the original image to avoid DataLoader issues
    del examples["image"]
    return examples

# Wrap datasets with safe wrapper to handle EXIF errors on-demand
train_dataset = SafeImageNetDataset(train_dataset, train_transform_fn)
eval_dataset = SafeImageNetDataset(eval_dataset, eval_transform_fn)

Loading dataset shards:   0%|          | 0/257 [00:00<?, ?it/s]

In [3]:
train_dataset[0], eval_dataset[0]

({'label': 126,
  'pixel_values': tensor([[[-2.1179, -2.1179, -2.1179,  ...,  2.2489,  2.2489,  2.1290],
           [-2.1179, -2.1179, -2.1179,  ...,  2.2489,  2.2489,  2.2489],
           [-2.1179, -2.1179, -2.1179,  ...,  2.2489,  2.2489,  2.2489],
           ...,
           [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
           [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
           [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179]],
  
          [[-2.0357, -2.0357, -2.0357,  ...,  1.8859,  1.7283,  1.5007],
           [-2.0357, -2.0357, -2.0357,  ...,  1.9209,  1.8158,  1.6583],
           [-2.0357, -2.0357, -2.0357,  ...,  2.0259,  1.9734,  1.9034],
           ...,
           [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
           [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
           [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357]],
  
          [[-1.8044, -1.8044, -1.8044,  ...,  1.6117

In [4]:
train_dataset[0]['pixel_values'].shape, eval_dataset[0]['pixel_values'].shape

(torch.Size([3, 224, 224]), torch.Size([3, 224, 224]))

In [5]:
# Test with actual grayscale image
from PIL import Image
import numpy as np
import torch

print("Testing RGB conversion with actual grayscale image...")

# Create a test grayscale image
gray_array = np.random.randint(0, 256, (300, 400), dtype=np.uint8)
grayscale_image = Image.fromarray(gray_array, mode='L')  # 'L' mode = grayscale

print(f"Original grayscale image mode: {grayscale_image.mode}")
print(f"Original grayscale image size: {grayscale_image.size}")

# Test train transform on grayscale image
print("\nTesting train transform on grayscale image:")
try:
    train_result = train_transform(grayscale_image)
    print(f"‚úÖ Train transform result shape: {train_result.shape}")
    print(f"‚úÖ Successfully converted to {train_result.shape[0]} channels")
    
    # Verify all 3 channels have the same values (since it was grayscale)
    channel_equality = torch.allclose(train_result[0], train_result[1]) and torch.allclose(train_result[1], train_result[2])
    print(f"‚úÖ All 3 channels identical (as expected): {channel_equality}")
    
except Exception as e:
    print(f"‚ùå Error in train transform: {e}")

# Test eval transform on grayscale image  
print("\nTesting eval transform on grayscale image:")
try:
    eval_result = eval_transform(grayscale_image)
    print(f"‚úÖ Eval transform result shape: {eval_result.shape}")
    print(f"‚úÖ Successfully converted to {eval_result.shape[0]} channels")
    
    # Verify all 3 channels have the same values (since it was grayscale)
    channel_equality = torch.allclose(eval_result[0], eval_result[1]) and torch.allclose(eval_result[1], eval_result[2])
    print(f"‚úÖ All 3 channels identical (as expected): {channel_equality}")
    
except Exception as e:
    print(f"‚ùå Error in eval transform: {e}")

print("\nüéØ Grayscale to RGB conversion test completed!")


Testing RGB conversion with actual grayscale image...
Original grayscale image mode: L
Original grayscale image size: (400, 300)

Testing train transform on grayscale image:
‚úÖ Train transform result shape: torch.Size([3, 224, 224])
‚úÖ Successfully converted to 3 channels
‚úÖ All 3 channels identical (as expected): False

Testing eval transform on grayscale image:
‚úÖ Eval transform result shape: torch.Size([3, 224, 224])
‚úÖ Successfully converted to 3 channels
‚úÖ All 3 channels identical (as expected): False

üéØ Grayscale to RGB conversion test completed!


In [6]:
# Check pixel value range after normalization for both train and eval sets

def check_pixel_range(dataset, name="dataset"):
    """Check pixel value range for any dataset type."""
    import numpy as np
    
    try:
        # Sample up to 100 items from the dataset
        sample_size = min(100, len(dataset))
        arr = []
        
        print(f"Sampling {sample_size} items from {name}...")
        
        for i in range(sample_size):
            try:
                sample = dataset[i]
                
                # Handle different sample formats
                if isinstance(sample, dict):
                    if 'pixel_values' in sample:
                        pixel_values = sample['pixel_values']
                    elif 'image' in sample:
                        # Apply transform if available
                        if hasattr(dataset, 'transform_fn') and dataset.transform_fn:
                            sample = dataset.transform_fn(sample)
                            pixel_values = sample['pixel_values']
                        else:
                            continue  # Skip if no transform available
                    else:
                        continue  # Skip if no pixel data found
                elif isinstance(sample, tuple) and len(sample) >= 1:
                    pixel_values = sample[0]  # Assume first element is image
                else:
                    continue  # Skip unknown format
                
                # Convert to numpy if it's a tensor
                if torch.is_tensor(pixel_values):
                    pixel_values = pixel_values.cpu().numpy()
                
                arr.append(pixel_values)
                
                # Progress indicator
                if (i + 1) % 20 == 0:
                    print(f"  Processed {i + 1}/{sample_size} samples...")
                    
            except Exception as e:
                print(f"  ‚ö†Ô∏è Error processing sample {i}: {e}")
                continue
        
        if not arr:
            print(f"‚ùå No valid pixel data found in {name}")
            return
            
        # Stack all pixel values
        arr = np.stack(arr)
        
        print(f"\nüìä {name} pixel value statistics:")
        print(f"  Shape: {arr.shape}")
        print(f"  Min: {arr.min():.4f}")
        print(f"  Max: {arr.max():.4f}")
        print(f"  Mean: {arr.mean():.4f}")
        print(f"  Std: {arr.std():.4f}")
        
        # Check if values are in expected range for normalized images
        if arr.min() < -3 or arr.max() > 3:
            print(f"  ‚ö†Ô∏è Warning: Pixel values outside typical normalized range [-3, 3]")
        else:
            print(f"  ‚úÖ Pixel values in expected normalized range")
            
    except Exception as e:
        print(f"‚ùå Error checking pixel range for {name}: {e}")

# Check train set
check_pixel_range(train_dataset, name="train_dataset")

# Check eval set
check_pixel_range(eval_dataset, name="eval_dataset")


Sampling 100 items from train_dataset...
  Processed 20/100 samples...
  Processed 40/100 samples...
  Processed 60/100 samples...
  Processed 80/100 samples...
  Processed 100/100 samples...

üìä train_dataset pixel value statistics:
  Shape: (100, 3, 224, 224)
  Min: -2.1179
  Max: 2.6400
  Mean: -0.1905
  Std: 1.2783
  ‚úÖ Pixel values in expected normalized range
Sampling 100 items from eval_dataset...
  Processed 20/100 samples...
  Processed 40/100 samples...
  Processed 60/100 samples...
  Processed 80/100 samples...
  Processed 100/100 samples...

üìä eval_dataset pixel value statistics:
  Shape: (100, 3, 224, 224)
  Min: -2.1179
  Max: 2.6400
  Mean: -0.0460
  Std: 1.1827
  ‚úÖ Pixel values in expected normalized range


In [7]:
# üîß FINAL TEST: Disable shuffling to verify labels are correct
import torch
import numpy as np
from transformers import TrainingArguments
from prelu_cnn import CNN, CNNTrainer
from torch.utils.data import DataLoader, SequentialSampler

print("üîß FINAL VERIFICATION: Disabling shuffling to check labels")
print("=" * 60)

# Create a custom trainer that disables shuffling
class NoShuffleCNNTrainer(CNNTrainer):
    """CNNTrainer with shuffling disabled for testing."""
    
    def get_train_dataloader(self):
        """Override to use SequentialSampler instead of RandomSampler."""
        train_dataset = self.train_dataset
        
        return DataLoader(
            train_dataset,
            batch_size=self.args.per_device_train_batch_size,
            sampler=SequentialSampler(train_dataset),  # üîß Use SequentialSampler = no shuffling
            num_workers=self.args.dataloader_num_workers,
            pin_memory=self.args.dataloader_pin_memory,
            drop_last=self.args.dataloader_drop_last,
        )

# Create trainer with normal training arguments
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
test_model = CNN(use_prelu=False, use_builtin_conv=True, num_classes=1000).to(device)

test_args = TrainingArguments(
    output_dir="./test_output",
    per_device_train_batch_size=3,
    per_device_eval_batch_size=3,
    learning_rate=1e-4,
    num_train_epochs=1,
    logging_steps=1,
    save_steps=1000,
    remove_unused_columns=False,
    dataloader_num_workers=0,
    label_names=["labels"],
    seed=42,
    data_seed=42,
)

print("‚úÖ Created custom trainer that disables shuffling via SequentialSampler")

trainer_no_shuffle = NoShuffleCNNTrainer(
    model=test_model,
    args=test_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

print("\n1Ô∏è‚É£ Testing individual sample access...")
individual_samples = []
for i in range(3):
    sample = train_dataset[i]
    label = sample.get('label', sample.get('labels'))
    individual_samples.append(label)
    print(f"  train_dataset[{i}]: label={label}")

print("\n2Ô∏è‚É£ Testing Trainer DataLoader with shuffling disabled...")
train_dataloader = trainer_no_shuffle.get_train_dataloader()
print(f"DataLoader sampler type: {type(train_dataloader.sampler)}")

# Get batch from non-shuffled trainer
batch = next(iter(train_dataloader))
batch_labels = batch.get('label', batch.get('labels'))

if torch.is_tensor(batch_labels):
    trainer_labels = batch_labels.cpu().tolist()
else:
    trainer_labels = batch_labels

print(f"Trainer batch labels: {trainer_labels}")

print("\n3Ô∏è‚É£ Comparing results...")
print(f"Individual access: {individual_samples}")
print(f"Trainer (no shuffle): {trainer_labels}")

# Check if they match
if individual_samples == trainer_labels:
    print("\nüéâ SUCCESS: Individual labels EXACTLY match Trainer batch!")
    print("‚úÖ Your data pipeline is working perfectly!")
    print("‚úÖ Labels are correctly preserved through all transformations!")
    print("‚úÖ The previous mismatch was just normal shuffling behavior!")
else:
    print("\n‚ùå MISMATCH: Even without shuffling, labels don't match")
    print("üîç This indicates a real data pipeline issue that needs investigation")
    
    # Additional debugging
    print("\nüîç Additional debugging info:")
    for i in range(3):
        print(f"  Index {i}: Individual={individual_samples[i]}, Trainer={trainer_labels[i]}")

print("\n4Ô∏è‚É£ Testing pixel values consistency (without shuffling)...")
batch_pixels = batch.get('pixel_values')
individual_pixels = []

for i in range(3):
    sample = train_dataset[i]
    pixels = sample['pixel_values']
    if torch.is_tensor(pixels):
        pixels = pixels.cpu().numpy()
    individual_pixels.append(pixels.mean())

print("Pixel value means:")
for i in range(3):
    trainer_pixel_mean = batch_pixels[i].cpu().numpy().mean()
    print(f"  Sample {i}: Individual={individual_pixels[i]:.6f}, Trainer={trainer_pixel_mean:.6f}")
    
    # Note: These might still differ due to random augmentations in train_transform
    if abs(individual_pixels[i] - trainer_pixel_mean) < 0.1:
        print(f"    ‚úÖ Pixel values are very close (difference: {abs(individual_pixels[i] - trainer_pixel_mean):.6f})")
    else:
        print(f"    ‚ö†Ô∏è Pixel values differ (difference: {abs(individual_pixels[i] - trainer_pixel_mean):.6f}) - likely due to random augmentation")

print("\nüéØ Final verification completed!")
print("=" * 60)

# If everything matches, give final confirmation
if individual_samples == trainer_labels:
    print("\nüèÜ FINAL CONCLUSION:")
    print("Your data pipeline is 100% correct!")
    print("The shuffling detection confirmed normal training behavior.")
    print("You can proceed with confidence! üöÄ")


üîß FINAL VERIFICATION: Disabling shuffling to check labels
‚úÖ Created custom trainer that disables shuffling via SequentialSampler

1Ô∏è‚É£ Testing individual sample access...
  train_dataset[0]: label=126
  train_dataset[1]: label=77
  train_dataset[2]: label=136

2Ô∏è‚É£ Testing Trainer DataLoader with shuffling disabled...
DataLoader sampler type: <class 'torch.utils.data.sampler.SequentialSampler'>
Trainer batch labels: [126, 77, 136]

3Ô∏è‚É£ Comparing results...
Individual access: [126, 77, 136]
Trainer (no shuffle): [126, 77, 136]

üéâ SUCCESS: Individual labels EXACTLY match Trainer batch!
‚úÖ Your data pipeline is working perfectly!
‚úÖ Labels are correctly preserved through all transformations!
‚úÖ The previous mismatch was just normal shuffling behavior!

4Ô∏è‚É£ Testing pixel values consistency (without shuffling)...
Pixel value means:
  Sample 0: Individual=-0.468658, Trainer=-0.384804
    ‚úÖ Pixel values are very close (difference: 0.083853)
  Sample 1: Individual=-0

In [8]:
# Check loss against eval dataset for randomly initialized model
import torch
import torch.nn as nn
from transformers import TrainingArguments
from prelu_cnn import CNN, CNNTrainer
import numpy as np

print("üîç Testing randomly initialized model loss on evaluation dataset...")
print("=" * 70)

# Create a randomly initialized model (no training)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Initialize model with random weights (same as would be used for training)
random_model = CNN(use_prelu=False, use_builtin_conv=True, num_classes=1000).to(device)
print(f"‚úÖ Created randomly initialized CNN model")
print(f"   - Total parameters: {sum(p.numel() for p in random_model.parameters()):,}")
print(f"   - Trainable parameters: {sum(p.numel() for p in random_model.parameters() if p.requires_grad):,}")

# Set up training arguments for evaluation
eval_args = TrainingArguments(
    output_dir="../results/eval_output",
    per_device_eval_batch_size=8,  # Slightly larger batch for eval
    dataloader_num_workers=0,
    remove_unused_columns=False,
    label_names=["labels"],
    seed=42,
)

# Create trainer for evaluation
trainer = CNNTrainer(
    model=random_model,
    args=eval_args,
    eval_dataset=eval_dataset,
)

print(f"\nüìä Evaluating on {len(eval_dataset)} samples...")

# Run evaluation to get loss and metrics
eval_results = trainer.evaluate()

print(f"\nüìà Evaluation Results for Randomly Initialized Model:")
print("-" * 50)
for key, value in eval_results.items():
    if isinstance(value, float):
        print(f"  {key}: {value:.6f}")
    else:
        print(f"  {key}: {value}")

# For reference, let's also compute what we'd expect for random predictions
print(f"\nüéØ Expected Loss for Random Predictions:")
print("-" * 40)
num_classes = 1000
expected_random_loss = np.log(num_classes)  # -log(1/num_classes) = log(num_classes)
print(f"  Cross-entropy loss with {num_classes} classes: {expected_random_loss:.6f}")

# Compare actual vs expected
actual_loss = eval_results.get('eval_loss', 0)
loss_ratio = actual_loss / expected_random_loss
print(f"\nüìä Comparison:")
print(f"  Actual loss: {actual_loss:.6f}")
print(f"  Expected random loss: {expected_random_loss:.6f}")
print(f"  Ratio (actual/expected): {loss_ratio:.3f}")

if 0.9 <= loss_ratio <= 1.1:
    print("  ‚úÖ Loss is very close to random expectation - model is properly initialized!")
elif 0.7 <= loss_ratio <= 1.3:
    print("  ‚úÖ Loss is reasonably close to random expectation - this is normal for initialized models")
else:
    print("  ‚ö†Ô∏è  Loss deviates significantly from random expectation - check initialization")

print(f"\nüèÅ Evaluation completed!") 

# If Loss = 1.876, then P_correct = e^(-1.876) ‚âà 0.153 (15.3%)
# Expected Top-1 Accuracy:
# For ImageNet classification, there's an empirical relationship:
# Loss ~6.9 (random) ‚Üí ~0.1% accuracy
# Loss ~4.0 ‚Üí ~1-5% accuracy
# Loss ~2.5 ‚Üí ~10-20% accuracy
# Loss ~1.9 ‚Üí ~15-25% accuracy
# Loss ~1.0 ‚Üí ~40-60% accuracy
# Loss ~0.5 ‚Üí ~70-85% accuracy
# For ImageNet state-of-the-art: 15-25% would be poor (SOTA is 80-90%)

üîç Testing randomly initialized model loss on evaluation dataset...
Using device: cuda
‚úÖ Created randomly initialized CNN model
   - Total parameters: 74,993,896
   - Trainable parameters: 74,993,896

üìä Evaluating on 100 samples...



üìà Evaluation Results for Randomly Initialized Model:
--------------------------------------------------
  eval_loss: 6.925254
  eval_model_preparation_time: 0.000300
  eval_runtime: 0.769400
  eval_samples_per_second: 129.978000
  eval_steps_per_second: 16.897000

üéØ Expected Loss for Random Predictions:
----------------------------------------
  Cross-entropy loss with 1000 classes: 6.907755

üìä Comparison:
  Actual loss: 6.925254
  Expected random loss: 6.907755
  Ratio (actual/expected): 1.003
  ‚úÖ Loss is very close to random expectation - model is properly initialized!

üèÅ Evaluation completed!


In [9]:
# Load the trained model from results and evaluate it using the new from_pretrained method
import torch
import torch.nn as nn
from transformers import TrainingArguments
from prelu_cnn import CNN, CNNTrainer
import numpy as np
import os

print("üîÑ Loading trained model from results and evaluating...")
print("=" * 70)

# Find the latest checkpoint using the shared utility
from shared_utils import find_latest_checkpoint

results_dir = "/home/chrisobrien/model-examples/results/cnn_results_relu"
checkpoint_path = find_latest_checkpoint(results_dir)

if not checkpoint_path:
    print("‚ùå No checkpoints found in results directory")
    raise FileNotFoundError("No checkpoints found")

latest_checkpoint = os.path.basename(checkpoint_path)
print(f"üìÇ Loading from: {latest_checkpoint}")

# Use the new from_pretrained class method - much cleaner!
trained_model = CNN.from_pretrained(checkpoint_path)

# Set up training arguments for evaluation
eval_args = TrainingArguments(
    output_dir="../results/eval_trained_output",
    per_device_eval_batch_size=8,
    dataloader_num_workers=0,
    remove_unused_columns=False,
    label_names=["labels"],
    seed=42,
)

# Create trainer for evaluation
trainer = CNNTrainer(
    model=trained_model,
    args=eval_args,
    eval_dataset=eval_dataset,
)

print(f"\nüìä Evaluating trained model on {len(eval_dataset)} samples...")

# Run evaluation to get loss and metrics
eval_results = trainer.evaluate()

print(f"\nüìà Evaluation Results for Trained Model ({latest_checkpoint}):")
print("-" * 60)
for key, value in eval_results.items():
    if isinstance(value, float):
        print(f"  {key}: {value:.6f}")
    else:
        print(f"  {key}: {value}")

print(f"\nüèÅ Trained model evaluation completed!")
print(f"üìç Checkpoint used: {latest_checkpoint}")


üîÑ Loading trained model from results and evaluating...
üìÇ Loading from: checkpoint-1801800
üîÑ Loading CNN model from checkpoint...
   Path: /home/chrisobrien/model-examples/results/cnn_results_relu/checkpoint-1801800
   Activation: ReLU
   Device: cuda
üì• Loading trained weights from: model.safetensors

üìä Evaluating trained model on 100 samples...



üìà Evaluation Results for Trained Model (checkpoint-1801800):
------------------------------------------------------------
  eval_loss: 1.834600
  eval_model_preparation_time: 0.000300
  eval_runtime: 0.412000
  eval_samples_per_second: 242.697000
  eval_steps_per_second: 31.551000

üèÅ Trained model evaluation completed!
üìç Checkpoint used: checkpoint-1801800
