# Ray Parallel Computing Demo

This notebook demonstrates how to use Ray for parallel computing within Jupyter notebooks.

## Features:
- Parallel function execution
- Performance benchmarking 
- Visualization of results
- Matrix operations
- Data processing

In [None]:
# Import required libraries
import sys
import os
sys.path.append('../ray-jobs')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ray_notebook_utils import RayNotebookHelper, ray_parallel, ParallelComputing, demo_parallel_computing

# Initialize Ray helper
ray_helper = RayNotebookHelper()
print("Ray initialized successfully!")
print(f"Available CPUs: {ray_helper.is_initialized}")

## 1. Basic Parallel Function Execution

In [None]:
# Define a computation-heavy function
def heavy_computation(n):
    """Simulate heavy computation"""
    total = 0
    for i in range(n * 1000):
        total += i ** 2
    return total

# Test data
test_data = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]

# Benchmark parallel vs sequential execution
performance_result = ray_helper.benchmark_parallel_vs_sequential(
    heavy_computation, 
    test_data, 
    "Heavy Computation Benchmark"
)

print(f"\nPerformance Summary:")
print(f"Sequential time: {performance_result['sequential_time']:.3f}s")
print(f"Parallel time: {performance_result['parallel_time']:.3f}s")
print(f"Speedup: {performance_result['speedup']:.2f}x")

## 2. Parallel Matrix Operations

In [None]:
# Create test matrices
print("Creating test matrices...")
matrices = [np.random.rand(200, 200) for _ in range(8)]

# Test different matrix operations
operations = ['multiply', 'add', 'eigenvals']

for op in operations:
    print(f"\nTesting {op} operation...")
    
    # Time the operation
    import time
    start_time = time.time()
    results = ray_helper.parallel_compute_matrix(matrices, op)
    end_time = time.time()
    
    print(f"Completed {len(results)} {op} operations in {end_time - start_time:.3f}s")
    
    if op == 'eigenvals':
        # Visualize eigenvalues distribution
        plt.figure(figsize=(10, 4))
        
        for i, eigenvals in enumerate(results[:3]):  # Show first 3
            plt.subplot(1, 3, i+1)
            plt.hist(np.real(eigenvals), bins=20, alpha=0.7)
            plt.title(f'Matrix {i+1} Eigenvalues')
            plt.xlabel('Real Part')
            plt.ylabel('Frequency')
        
        plt.tight_layout()
        plt.show()

## 3. Parallel Data Processing

In [None]:
# Create test datasets
print("Creating test datasets...")
datasets = []
for i in range(5):
    df = pd.DataFrame({
        'A': np.random.randn(1000),
        'B': np.random.randn(1000),
        'C': np.random.randn(1000),
        'category': np.random.choice(['X', 'Y', 'Z'], 1000)
    })
    datasets.append(df)

# Define data processing function
def analyze_dataframe(df):
    """Perform statistical analysis on DataFrame"""
    results = {
        'mean': df.select_dtypes(include=[np.number]).mean().to_dict(),
        'std': df.select_dtypes(include=[np.number]).std().to_dict(),
        'correlation': df.select_dtypes(include=[np.number]).corr().iloc[0, 1],
        'category_counts': df['category'].value_counts().to_dict()
    }
    return results

# Process datasets in parallel
print("Processing datasets in parallel...")
analysis_results = ray_helper.parallel_data_processing(datasets, analyze_dataframe)

# Visualize results
plt.figure(figsize=(15, 5))

# Mean values across datasets
plt.subplot(1, 3, 1)
means_A = [result['mean']['A'] for result in analysis_results]
means_B = [result['mean']['B'] for result in analysis_results]
means_C = [result['mean']['C'] for result in analysis_results]

x = range(len(datasets))
plt.plot(x, means_A, 'o-', label='Column A', marker='o')
plt.plot(x, means_B, 's-', label='Column B', marker='s')
plt.plot(x, means_C, '^-', label='Column C', marker='^')
plt.xlabel('Dataset')
plt.ylabel('Mean Value')
plt.title('Mean Values Across Datasets')
plt.legend()
plt.grid(True, alpha=0.3)

# Correlation values
plt.subplot(1, 3, 2)
correlations = [result['correlation'] for result in analysis_results]
plt.bar(x, correlations, color='skyblue', alpha=0.7)
plt.xlabel('Dataset')
plt.ylabel('Correlation (A-B)')
plt.title('A-B Correlation Across Datasets')
plt.grid(True, alpha=0.3)

# Category distribution for first dataset
plt.subplot(1, 3, 3)
categories = analysis_results[0]['category_counts']
plt.pie(categories.values(), labels=categories.keys(), autopct='%1.1f%%')
plt.title('Category Distribution (Dataset 1)')

plt.tight_layout()
plt.show()

print(f"Processed {len(analysis_results)} datasets successfully!")

## 4. Monte Carlo Simulation

In [None]:
# Monte Carlo estimation of π using Ray
import ray

# Different sample sizes to test
sample_sizes = [100000, 200000, 300000, 400000, 500000]

print("Running Monte Carlo π estimation...")

# Submit all tasks in parallel
futures = [ParallelComputing.monte_carlo_pi.remote(n) for n in sample_sizes]

# Get results
pi_estimates = ray.get(futures)

# Visualize convergence
plt.figure(figsize=(12, 5))

# Pi estimates
plt.subplot(1, 2, 1)
plt.plot(sample_sizes, pi_estimates, 'bo-', label='Estimated π')
plt.axhline(y=np.pi, color='r', linestyle='--', label='True π')
plt.xlabel('Sample Size')
plt.ylabel('π Estimate')
plt.title('Monte Carlo π Estimation')
plt.legend()
plt.grid(True, alpha=0.3)

# Error analysis
plt.subplot(1, 2, 2)
errors = [abs(estimate - np.pi) for estimate in pi_estimates]
plt.semilogy(sample_sizes, errors, 'ro-')
plt.xlabel('Sample Size')
plt.ylabel('Absolute Error')
plt.title('Estimation Error vs Sample Size')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print results
for size, estimate in zip(sample_sizes, pi_estimates):
    error = abs(estimate - np.pi)
    print(f"Sample size: {size:6d}, π estimate: {estimate:.6f}, Error: {error:.6f}")

## 5. Using the @ray_parallel Decorator

In [None]:
# Example using the decorator
@ray_parallel(num_workers=4)
def process_number(x, multiplier=2, offset=0):
    """Process a number with some computation"""
    import time
    time.sleep(0.1)  # Simulate some work
    return (x * multiplier) + offset

# Test data
numbers = list(range(20))

print("Using @ray_parallel decorator...")

# This will automatically run in parallel
results = process_number(numbers, multiplier=3, offset=10)

# Visualize results
plt.figure(figsize=(10, 6))
plt.plot(numbers, results, 'o-', linewidth=2, markersize=6)
plt.xlabel('Input')
plt.ylabel('Output (3x + 10)')
plt.title('Parallel Processing Results')
plt.grid(True, alpha=0.3)
plt.show()

print(f"Processed {len(results)} items: {results[:10]}...")

## 6. Performance Dashboard

In [None]:
# Create comprehensive performance data
performance_data = {
    'execution_times': {
        'sequential': 2.5,
        'parallel': 0.8
    },
    'resource_usage': {
        'CPU': 75,
        'Memory': 45,
        'Idle': 25
    },
    'throughput': {
        'time': [0, 1, 2, 3, 4, 5],
        'tasks_completed': [0, 15, 35, 60, 85, 100]
    },
    'error_rate': {
        'success': 95,
        'failed': 5
    }
}

# Display performance dashboard
ray_helper.create_performance_dashboard(performance_data)

print("Performance dashboard generated!")

## 7. Clean Up

In [None]:
# Clean up Ray resources
ray_helper.shutdown()
print("Ray shutdown completed. Resources cleaned up.")

## Summary

This notebook demonstrated:

1. **Basic parallel execution** with performance benchmarking
2. **Parallel matrix operations** with different algorithms
3. **Parallel data processing** with pandas DataFrames
4. **Monte Carlo simulation** for π estimation
5. **Decorator-based parallel functions** for easy usage
6. **Performance visualization** and monitoring

### Key Benefits:
- Automatic parallelization of compute-intensive tasks
- Built-in performance monitoring and visualization
- Easy integration with existing notebook workflows
- Scalable from local development to cluster deployment

### Next Steps:
- Try with your own computation-heavy functions
- Experiment with different numbers of workers
- Scale up to larger datasets
- Deploy on Ray clusters for even better performance