In [None]:
"""
JupyterHub Stress Test - AI/ML Workload Simulation
This notebook performs ~1-2GB of number crunching similar to AI workloads
"""

import numpy as np
import time
from datetime import datetime

# ============================================================================
# CONFIGURATION
# ============================================================================
NUM_ITERATIONS = 3  # Set this to run the test X number of times
MEMORY_SCALE = 2.0  # Memory usage multiplier (1.0 = ~750MB-1GB, 2.0 = ~1.5-2GB)

print("=" * 60)
print("JupyterHub Stress Test - AI/ML Workload Simulation")
print("=" * 60)
print(f"Number of iterations: {NUM_ITERATIONS}")
print(f"Memory scale: {MEMORY_SCALE}x (~{MEMORY_SCALE * 0.75:.1f}-{MEMORY_SCALE * 1.0:.1f} GB target)")
print(f"Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

overall_start = time.time()

for iteration in range(NUM_ITERATIONS):
    print("\n" + "=" * 60)
    print(f"ITERATION {iteration + 1} of {NUM_ITERATIONS}")
    print("=" * 60 + "\n")
    iteration_start = time.time()

# ============================================================================
# Test 1: Large Matrix Operations (Neural Network Forward Pass Simulation)
# ============================================================================
    print("Test 1: Large Matrix Operations (Simulating Neural Network)")
    print("-" * 60)

    # Simulate a deep neural network with large weight matrices
    layer_sizes = [int(3000 * MEMORY_SCALE), int(2500 * MEMORY_SCALE), int(2000 * MEMORY_SCALE), int(1500 * MEMORY_SCALE), int(1000 * MEMORY_SCALE)]
    batch_size = int(500 * MEMORY_SCALE)

    print(f"Creating neural network layers: {layer_sizes}")
    print(f"Batch size: {batch_size}")

    start = time.time()

    # Initialize input data (simulating image batch)
    X = np.random.randn(batch_size, layer_sizes[0]).astype(np.float32)
    print(f"Input shape: {X.shape}, Size: {X.nbytes / 1024**2:.2f} MB")

    # Forward pass through layers
    activations = [X]
    for i in range(len(layer_sizes) - 1):
        W = np.random.randn(layer_sizes[i], layer_sizes[i+1]).astype(np.float32)
        b = np.random.randn(layer_sizes[i+1]).astype(np.float32)
        
        print(f"Layer {i+1}: W shape {W.shape}, Size: {W.nbytes / 1024**2:.2f} MB")
        
        # Matrix multiplication (forward pass)
        Z = np.dot(activations[-1], W) + b
        
        # ReLU activation
        A = np.maximum(0, Z)
        activations.append(A)
        
        del W, b, Z  # Clean up intermediate results

    elapsed = time.time() - start
    print(f"✓ Completed in {elapsed:.2f} seconds\n")

# ============================================================================
# Test 2: Convolutional Operations (CNN Simulation)
# ============================================================================
    print("Test 2: Convolutional Operations (Simulating CNN)")
    print("-" * 60)

    start = time.time()

    # Simulate processing a batch of images
    n_images = int(250 * MEMORY_SCALE)
    img_height, img_width = 224, 224
    n_channels = 3

    images = np.random.randn(n_images, img_height, img_width, n_channels).astype(np.float32)
    print(f"Image batch shape: {images.shape}, Size: {images.nbytes / 1024**2:.2f} MB")

    # Simulate convolution with multiple filters
    n_filters = 64
    kernel_size = 3

    for layer in range(3):
        filters = np.random.randn(n_filters, kernel_size, kernel_size, n_channels).astype(np.float32)
        print(f"Conv Layer {layer+1}: {n_filters} filters, Size: {filters.nbytes / 1024**2:.2f} MB")
        
        # Simulate convolution (simplified - just multiply and sum)
        output = np.zeros((n_images, img_height-kernel_size+1, img_width-kernel_size+1, n_filters))
        
        for f in range(min(10, n_filters)):  # Process subset for speed
            for i in range(0, img_height-kernel_size+1, 10):
                for j in range(0, img_width-kernel_size+1, 10):
                    patch = images[:, i:i+kernel_size, j:j+kernel_size, :]
                    output[:, i, j, f] = np.sum(patch * filters[f], axis=(1,2,3))
        
        # Pooling operation
        images = output[:, ::2, ::2, :]  # Max pooling simulation
        n_channels = n_filters
        img_height, img_width = images.shape[1], images.shape[2]
        
        del filters, output

    elapsed = time.time() - start
    print(f"✓ Completed in {elapsed:.2f} seconds\n")

# ============================================================================
# Test 3: Gradient Computation (Backpropagation Simulation)
# ============================================================================
    print("Test 3: Gradient Computation (Simulating Backpropagation)")
    print("-" * 60)

    start = time.time()

    # Simulate computing gradients for large weight matrices
    n_params = 10000000  # 10 million parameters
    learning_rate = 0.001

    print(f"Total parameters: {n_params:,}")

    # Simulate parameter tensors
    weights = np.random.randn(n_params).astype(np.float32)
    gradients = np.random.randn(n_params).astype(np.float32)

    print(f"Weights size: {weights.nbytes / 1024**2:.2f} MB")
    print(f"Gradients size: {gradients.nbytes / 1024**2:.2f} MB")

    # Simulate gradient descent updates
    for iter_gd in range(5):
        # Compute gradient update
        weights -= learning_rate * gradients
        
        # Add momentum (requires additional memory)
        momentum = 0.9 * gradients + 0.1 * np.random.randn(n_params).astype(np.float32)
        weights -= learning_rate * momentum
        
        if iter_gd % 1 == 0:
            loss = np.mean(weights ** 2)
            print(f"GD Iteration {iter_gd+1}: Loss = {loss:.6f}")

    elapsed = time.time() - start
    print(f"✓ Completed in {elapsed:.2f} seconds\n")

# ============================================================================
# Test 4: Embedding Operations (NLP Simulation)
# ============================================================================
    print("Test 4: Embedding Operations (Simulating NLP Model)")
    print("-" * 60)

    start = time.time()

    # Simulate large vocabulary embeddings
    vocab_size = 30000
    embedding_dim = 512
    sequence_length = 256
    batch_size = 64

    print(f"Vocabulary size: {vocab_size:,}")
    print(f"Embedding dimension: {embedding_dim}")
    print(f"Sequence length: {sequence_length}")

    # Create embedding matrix
    embeddings = np.random.randn(vocab_size, embedding_dim).astype(np.float32)
    print(f"Embedding matrix size: {embeddings.nbytes / 1024**2:.2f} MB")

    # Simulate token sequences
    tokens = np.random.randint(0, vocab_size, size=(batch_size, sequence_length))
    print(f"Token batch shape: {tokens.shape}")

    # Lookup embeddings
    embedded = embeddings[tokens]
    print(f"Embedded batch size: {embedded.nbytes / 1024**2:.2f} MB")

    # Simulate attention mechanism
    for head in range(8):
        Q = np.random.randn(batch_size, sequence_length, embedding_dim // 8).astype(np.float32)
        K = np.random.randn(batch_size, sequence_length, embedding_dim // 8).astype(np.float32)
        V = np.random.randn(batch_size, sequence_length, embedding_dim // 8).astype(np.float32)
        
        # Attention scores
        scores = np.matmul(Q, K.transpose(0, 2, 1)) / np.sqrt(embedding_dim // 8)
        attention = np.exp(scores) / np.sum(np.exp(scores), axis=-1, keepdims=True)
        output = np.matmul(attention, V)
        
        del Q, K, V, scores, attention, output

    elapsed = time.time() - start
    print(f"✓ Completed in {elapsed:.2f} seconds\n")

# ============================================================================
# Test 5: Data Augmentation (Image Processing Simulation)
# ============================================================================
    print("Test 5: Data Augmentation (Image Processing)")
    print("-" * 60)

    start = time.time()

    n_images = 500
    img_size = 256
    augmented_images = []

    base_images = np.random.randint(0, 256, size=(n_images, img_size, img_size, 3), dtype=np.uint8)
    print(f"Base images size: {base_images.nbytes / 1024**2:.2f} MB")

    # Apply various augmentations
    for i in range(5):
        # Rotation simulation
        rotated = np.rot90(base_images, k=i % 4, axes=(1, 2))
        
        # Flip simulation
        flipped = np.flip(rotated, axis=2)
        
        # Brightness adjustment
        adjusted = np.clip(flipped.astype(np.float32) * (1 + np.random.randn() * 0.2), 0, 255).astype(np.uint8)
        
        augmented_images.append(adjusted)
        print(f"Augmentation {i+1}: {adjusted.nbytes / 1024**2:.2f} MB")

    # Stack all augmented images
    all_images = np.concatenate(augmented_images, axis=0)
    print(f"Total augmented dataset size: {all_images.nbytes / 1024**2:.2f} MB")

    elapsed = time.time() - start
    print(f"✓ Completed in {elapsed:.2f} seconds\n")
    
    iteration_elapsed = time.time() - iteration_start
    print(f"--- Iteration {iteration + 1} total time: {iteration_elapsed:.2f} seconds ---\n")

# ============================================================================
# Summary
# ============================================================================
overall_elapsed = time.time() - overall_start

print("\n" + "=" * 60)
print("STRESS TEST COMPLETE")
print("=" * 60)
print(f"End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Total iterations: {NUM_ITERATIONS}")
print(f"Total runtime: {overall_elapsed:.2f} seconds ({overall_elapsed/60:.2f} minutes)")
print(f"Average time per iteration: {overall_elapsed/NUM_ITERATIONS:.2f} seconds")

print(f"\nApproximate peak memory usage: ~750 MB - 1 GB")
print("All tests completed successfully!")
print("=" * 60)