# GPU-Accelerated Data Science with RAPIDS

This notebook demonstrates how to accelerate common data science workflows using NVIDIA's RAPIDS ecosystem. We'll cover:

1. Loading and processing large datasets with cuDF
2. Machine learning with cuML
3. Graph analytics with cuGraph
4. Performance comparisons between CPU and GPU implementations

First, let's set up our environment and import the required libraries.

# Accelerating Pandas with NVIDIA cuDF

This notebook demonstrates how to accelerate pandas operations using NVIDIA's cuDF library. With just one line of code, you can run your existing pandas code on the GPU for significant performance improvements.

In [None]:
# Enable GPU acceleration for pandas operations
%load_ext cudf.pandas

# Import required libraries
import pandas as pd
import numpy as np
import cudf
import cuml
import cugraph
import time

# For visualization
import matplotlib.pyplot as plt
import seaborn as sns

## 1. Loading and Processing Data with GPU Acceleration

Let's create a sample dataset to demonstrate the performance difference between CPU and GPU processing. We'll generate a large dataset of stock market data with the following columns:
- Date
- Symbol
- Open
- High
- Low
- Close
- Volume

In [None]:
# Generate sample stock market data
def generate_stock_data(n_symbols=100, n_days=1000):
    symbols = [f'STOCK_{i}' for i in range(n_symbols)]
    dates = pd.date_range(end='2025-08-04', periods=n_days)
    
    # Create all combinations of dates and symbols
    index = pd.MultiIndex.from_product([dates, symbols], names=['date', 'symbol'])
    
    # Generate random price data
    np.random.seed(42)
    n_rows = len(index)
    base_price = np.random.uniform(10, 1000, n_symbols)
    
    # Expand base prices to match dates
    base_expanded = np.repeat(base_price, n_days)
    
    # Generate daily variations
    daily_change = np.random.normal(0, 0.02, n_rows)
    
    # Calculate OHLC prices
    open_prices = base_expanded * (1 + daily_change)
    high_prices = open_prices * (1 + np.abs(np.random.normal(0, 0.01, n_rows)))
    low_prices = open_prices * (1 - np.abs(np.random.normal(0, 0.01, n_rows)))
    close_prices = (high_prices + low_prices) / 2 + np.random.normal(0, 0.005, n_rows)
    
    # Generate volume data
    volume = np.random.lognormal(mean=11, sigma=1, size=n_rows).astype(int)
    
    # Create DataFrame
    df = pd.DataFrame({
        'open': open_prices,
        'high': high_prices,
        'low': low_prices,
        'close': close_prices,
        'volume': volume
    }, index=index)
    
    return df.reset_index()

# Generate a large dataset
print("Generating sample dataset...")
df = generate_stock_data(n_symbols=1000, n_days=1000)  # 1 million rows
print(f"Dataset shape: {df.shape}")
print("\nSample data:")
print(df.head())

## Performance Comparison: CPU vs GPU

Let's compare the performance of some common operations between pandas (CPU) and cuDF (GPU):
1. Grouping and aggregation
2. Sorting
3. Complex calculations

We'll measure the time taken for each operation on both CPU and GPU.

In [None]:
# Helper function for timing operations
def time_operation(func, name, *args, **kwargs):
    start = time.time()
    result = func(*args, **kwargs)
    end = time.time()
    print(f"{name}: {end - start:.4f} seconds")
    return result

# 1. Grouping and Aggregation
print("Testing groupby and aggregation performance...")
print("\nCPU (pandas):")
time_operation(
    lambda: df.groupby('symbol').agg({
        'open': 'mean',
        'high': 'max',
        'low': 'min',
        'volume': 'sum'
    }),
    "Group by symbol and aggregate"
)

print("\nGPU (cuDF):")
# Convert to cuDF DataFrame
gdf = cudf.DataFrame(df)
time_operation(
    lambda: gdf.groupby('symbol').agg({
        'open': 'mean',
        'high': 'max',
        'low': 'min',
        'volume': 'sum'
    }),
    "Group by symbol and aggregate"
)

In [None]:
# 2. Complex Calculations: Moving Averages and Returns
print("\nCalculating moving averages and returns...")

def calculate_metrics(df, gpu=False):
    # Sort by date and symbol
    df = df.sort_values(['symbol', 'date'])
    
    # Group by symbol for rolling calculations
    grouped = df.groupby('symbol')
    
    # Calculate metrics
    results = pd.DataFrame() if not gpu else cudf.DataFrame()
    
    # Calculate 20-day moving average
    ma20 = grouped['close'].transform(lambda x: x.rolling(window=20).mean())
    
    # Calculate daily returns
    daily_returns = grouped['close'].transform(lambda x: x.pct_change())
    
    # Calculate 20-day volatility
    volatility = grouped['close'].transform(lambda x: x.pct_change().rolling(window=20).std())
    
    return ma20, daily_returns, volatility

print("\nCPU (pandas):")
time_operation(calculate_metrics, "Calculate metrics", df, gpu=False)

print("\nGPU (cuDF):")
time_operation(calculate_metrics, "Calculate metrics", gdf, gpu=True)

## Visualizing Performance Differences

Let's create a bar chart comparing the execution times between CPU and GPU operations to visualize the performance gains.

In [None]:
# Create performance comparison visualization
def plot_performance_comparison(cpu_times, gpu_times, operations):
    x = np.arange(len(operations))
    width = 0.35
    
    fig, ax = plt.subplots(figsize=(10, 6))
    cpu_bars = ax.bar(x - width/2, cpu_times, width, label='CPU (pandas)', color='blue', alpha=0.6)
    gpu_bars = ax.bar(x + width/2, gpu_times, width, label='GPU (cuDF)', color='green', alpha=0.6)
    
    ax.set_ylabel('Time (seconds)')
    ax.set_title('Performance Comparison: CPU vs GPU')
    ax.set_xticks(x)
    ax.set_xticklabels(operations)
    ax.legend()
    
    # Add speedup annotations
    for i, (cpu_time, gpu_time) in enumerate(zip(cpu_times, gpu_times)):
        speedup = cpu_time / gpu_time
        ax.text(i, max(cpu_time, gpu_time), f'{speedup:.1f}x speedup',
                ha='center', va='bottom')
    
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# Example usage with dummy data
operations = ['Groupby + Agg', 'Moving Averages', 'Daily Returns']
cpu_times = [0.8, 1.2, 0.9]  # Replace with actual measured times
gpu_times = [0.1, 0.15, 0.12]  # Replace with actual measured times

plot_performance_comparison(cpu_times, gpu_times, operations)