# SageMaker Smoke Test

This notebook validates that your SageMaker environment is properly configured for running Contemplative Constitutional AI experiments.

It performs comprehensive tests including:
- System requirements
- PyTorch and CUDA setup
- Model loading capabilities
- S3 connectivity
- Generation testing


In [None]:
import sys
import os
from pathlib import Path
import torch
import psutil
import yaml

# Navigate to repo root
os.chdir('..')
sys.path.insert(0, str(Path.cwd() / 'src'))

print(f"Working directory: {os.getcwd()}")


## 1. System Requirements Check


In [None]:
print("=== System Requirements Check ===")
print(f"Python version: {sys.version}")

# Available memory
memory = psutil.virtual_memory()
print(f"Total memory: {memory.total / (1024**3):.1f} GB")
print(f"Available memory: {memory.available / (1024**3):.1f} GB")
print(f"Memory usage: {memory.percent}%")

# Disk space
disk = psutil.disk_usage('/')
print(f"Free disk space: {disk.free / (1024**3):.1f} GB")

# Check if we have enough memory
if memory.available < 4 * (1024**3):  # 4GB minimum
    print("⚠️ Warning: Less than 4GB available memory.")
else:
    print("✅ Sufficient memory available")


## 2. PyTorch and CUDA Check


In [None]:
print("=== PyTorch Installation Check ===")
print(f"PyTorch version: {torch.__version__}")

# Check CUDA availability
if torch.cuda.is_available():
    print(f"✅ CUDA is available with {torch.cuda.device_count()} GPU(s)")
    for i in range(torch.cuda.device_count()):
        print(f"   GPU {i}: {torch.cuda.get_device_name(i)}")
        props = torch.cuda.get_device_properties(i)
        print(f"   Memory: {props.total_memory / 1e9:.2f} GB")
else:
    print("❌ CUDA is not available")

# Test tensor operations
try:
    x = torch.randn(3, 3)
    print(f"✅ CPU tensor creation successful: {x.shape}")
    
    if torch.cuda.is_available():
        x_cuda = x.to('cuda')
        y_cuda = torch.randn(3, 3, device='cuda')
        z_cuda = x_cuda + y_cuda
        print(f"✅ CUDA tensor operations successful: {z_cuda.shape}")
except Exception as e:
    print(f"❌ Error in tensor operations: {e}")


## 3. Model Loader Test


In [None]:
print("=== Model Loader Test ===")

from models.model_loader import ModelLoader

try:
    loader = ModelLoader()
    print("✅ ModelLoader initialization successful")
    
    # Test device detection
    device = loader.detect_device()
    print(f"✅ Device detection successful: {device}")
    
    # Test model info retrieval
    model_info = loader.get_model_info('qwen2_0_5b')
    print(f"✅ Model info retrieval successful")
    print(f"   Model: {model_info['model_name']}")
    print(f"   Size: {model_info['model_size']}")
    print(f"   Estimated memory: {model_info['estimated_memory_gb']}GB")
    
    # Test loading config
    loading_config = loader.get_loading_config(device)
    print(f"✅ Loading config retrieved for {device}")
    
except Exception as e:
    print(f"❌ Error in ModelLoader test: {e}")


## 4. Simple Model Loading Test

This test loads a small, reliable model (GPT-2) to verify basic functionality.


In [None]:
print("=== Simple Model Loading Test ===")

try:
    from transformers import AutoTokenizer, AutoModelForCausalLM
    
    print("Loading GPT-2 (small, reliable model)...")
    tokenizer = AutoTokenizer.from_pretrained("gpt2")
    tokenizer.pad_token = tokenizer.eos_token
    model = AutoModelForCausalLM.from_pretrained("gpt2")
    
    print("✅ GPT-2 model loaded successfully")
    print(f"   Model parameters: ~124M")
    print(f"   Tokenizer vocab size: {len(tokenizer)}")
    
    # Test generation
    prompt = "The meaning of life is"
    inputs = tokenizer(prompt, return_tensors="pt")
    
    # Move to GPU if available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=20,
            do_sample=True,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"✅ Generation successful")
    print(f"   Prompt: {prompt}")
    print(f"   Response: {response}")
    
    # Clean up
    del model, tokenizer, outputs
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
except Exception as e:
    print(f"❌ Simple model loading failed: {e}")


## 5. S3 Connectivity Test


In [None]:
print("=== S3 Connectivity Test ===")

try:
    from utils.sagemaker_utils import get_s3_client
    import boto3
    from botocore.exceptions import NoCredentialsError, ClientError
    
    # Load config
    with open('configs/sagemaker_configs.yaml', 'r') as f:
        sagemaker_config = yaml.safe_load(f)
    
    S3_BUCKET = sagemaker_config['s3']['bucket']
    print(f"Testing S3 bucket: {S3_BUCKET}")
    
    s3_client = get_s3_client()
    
    # Test bucket access
    s3_client.head_bucket(Bucket=S3_BUCKET)
    print(f"✅ Successfully accessed S3 bucket: {S3_BUCKET}")
    
    # Test list operation
    response = s3_client.list_objects_v2(Bucket=S3_BUCKET, MaxKeys=1)
    print("✅ S3 list operation successful")
    
except NoCredentialsError:
    print("❌ AWS credentials not found")
except ClientError as e:
    if e.response['Error']['Code'] == '404':
        print(f"❌ Bucket '{S3_BUCKET}' not found")
    else:
        print(f"❌ S3 error: {e}")
except Exception as e:
    print(f"❌ Unexpected error: {e}")


## 6. SageMaker Utilities Test


In [None]:
print("=== SageMaker Utilities Test ===")

try:
    from utils.sagemaker_utils import (
        is_sagemaker_environment,
        detect_sagemaker_device,
        get_sagemaker_paths,
        S3PathManager
    )
    
    print(f"Is SageMaker environment: {is_sagemaker_environment()}")
    print(f"Detected device: {detect_sagemaker_device()}")
    
    paths = get_sagemaker_paths()
    print(f"✅ SageMaker paths retrieved: {len(paths)} paths")
    
    # Test S3PathManager
    if S3_BUCKET != "your-bucket-contemplative-ai":
        manager = S3PathManager(S3_BUCKET)
        print(f"✅ S3PathManager initialized")
        test_local = manager.get_local_path("test/file.txt")
        test_s3 = manager.get_s3_path("test/file.txt")
        print(f"   Local path example: {test_local}")
        print(f"   S3 path example: {test_s3}")
    
    print("✅ All SageMaker utilities working")
    
except Exception as e:
    print(f"❌ Error testing utilities: {e}")


## 7. Summary

All tests complete! Check the results above.


In [None]:
print("=" * 50)
print("Smoke Test Complete")
print("=" * 50)
print("\nIf all tests show ✅, your environment is fully configured!")
print("\nNext steps:")
print("  1. Run 00_quickstart.ipynb for end-to-end test")
print("  2. Use 01_data_generation.ipynb to create training data")
print("  3. Use 02_training.ipynb to train models")
print("  4. Use 03_evaluation.ipynb to evaluate results")
