<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/multi_GPU_computing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Jun__6_02:18:23_PDT_2024
Cuda compilation tools, release 12.5, V12.5.82
Build cuda_12.5.r12.5/compiler.34385749_0


In [None]:
import os
os.environ['CUDA_HOME'] = '/usr/local/cuda-12.5'  # Replace with your actual CUDA installation path
os.environ['PATH'] += os.pathsep + os.path.join(os.environ['CUDA_HOME'], 'bin')
os.environ['NUMBA_CUDA_TARGETS'] = 'sm_84' # Set the target compute capability to CUDA 8.4
!pip install --upgrade --force-reinstall numba -q

In [None]:
import numpy as np
import time
import scipy.stats as si
from numba import cuda, float64
from numba.cuda.random import create_xoroshiro128p_states, xoroshiro128p_next
from typing import List
from tqdm import tqdm  # Import tqdm
import math # Import the math module


# 1. Serial Implementation (CPU)
def european_call_payoff(spot_price: float, strike_price: float) -> float:
    """
    Calculates the payoff of a European call option.

    Args:
        spot_price: The spot price of the underlying asset at expiration.
        strike_price: The strike price of the option.

    Returns:
        The payoff of the option.
    """
    return max(spot_price - strike_price, 0.0)

def simulate_stock_price_path(
    start_price: float,
    mu: float,  # Drift
    sigma: float,  # Volatility
    time_to_maturity: float,
    num_time_steps: int,
    random_numbers: List[float]
) -> List[float]:
    """
    Simulates a single stock price path using the Geometric Brownian Motion model.

    Args:
        start_price: The initial price of the stock.
        mu: The expected return (drift).
        sigma: The volatility of the stock price.
        time_to_maturity: The time to maturity of the option in years.
        num_time_steps: The number of time steps in the simulation.
        random_numbers: A list of standard normal random numbers.

    Returns:
        A list of stock prices at each time step.
    """
    dt = time_to_maturity / num_time_steps
    prices = [start_price]
    for i in range(num_time_steps):
        # Geometric Brownian Motion formula
        price = prices[-1] * np.exp((mu - 0.5 * sigma**2) * dt + sigma * np.sqrt(dt) * random_numbers[i])
        prices.append(price)
    return prices

def monte_carlo_option_price_cpu(
    start_price: float,
    strike_price: float,
    mu: float,
    sigma: float,
    time_to_maturity: float,
    num_time_steps: int,
    num_simulations: int,
    discount_rate: float
) -> float:
    """
    Calculates the price of a European call option using Monte Carlo simulation on the CPU.

    Args:
        start_price: The initial price of the stock.
        strike_price: The strike price of the option.
        mu: The expected return (drift).
        sigma: The volatility of the stock price.
        time_to_maturity: The time to maturity in years.
        num_time_steps: The number of time steps in each simulation.
        num_simulations: The number of simulations to run.
        discount_rate: The risk-free interest rate.

    Returns:
        The estimated price of the option.
    """
    dt = time_to_maturity / num_time_steps
    payoffs = []
    for _ in tqdm(range(num_simulations), desc="CPU Simulation"):  # Wrap the loop with tqdm
        # Generate random numbers for the entire path
        random_numbers = np.random.standard_normal(num_time_steps)
        prices = simulate_stock_price_path(
            start_price, mu, sigma, time_to_maturity, num_time_steps, random_numbers.tolist()
        )
        payoff = european_call_payoff(prices[-1], strike_price)
        payoffs.append(payoff)

    average_payoff = np.mean(payoffs)
    option_price = np.exp(-discount_rate * time_to_maturity) * average_payoff
    return option_price

# 2. Parallel Implementation (GPU with CUDA)
#@cuda.jit
@cuda.jit
def monte_carlo_kernel(
    start_price,
    strike_price,
    mu,
    sigma,
    time_to_maturity,
    num_time_steps,
    num_simulations,
    discount_rate,
    rng_states,  # Random number generator states
    output_payoffs  # Array to store results
):
    """
    CUDA kernel to calculate European call option prices using Monte Carlo simulation.
    Each thread calculates one simulation.
    """
    # Compute 1D thread index
    thread_id = cuda.grid(1)

    # Make sure we are within bounds
    if thread_id < num_simulations:
        dt = time_to_maturity / num_time_steps
        price = start_price
        for _ in range(num_time_steps):
            # Get a random number for this step for this thread
            rand_val = xoroshiro128p_next(rng_states, thread_id)
            # Convert to standard normal
            z = (rand_val - 2**32 * 0.5) / 2**32 * 3.442619855899  # Max value for normal
            # Use math.exp instead of cuda.exp
            price = price * math.exp((mu - 0.5 * sigma**2) * dt + sigma * math.sqrt(dt) * z)

        # Calculate the payoff
        payoff = max(price - strike_price, 0.0)
        output_payoffs[thread_id] = payoff

def monte_carlo_option_price_gpu(
    start_price: float,
    strike_price: float,
    mu: float,
    sigma: float,
    time_to_maturity: float,
    num_time_steps: int,
    num_simulations: int,
    discount_rate: float
) -> float:
    """
    Calculates the price of a European call option using Monte Carlo simulation on the GPU.

    Args:
        start_price: The initial price of the stock.
        strike_price: The strike price of the option.
        mu: The expected return (drift).
        sigma: The volatility of the stock price.
        time_to_maturity: The time to maturity in years.
        num_time_steps: The number of time steps in each simulation.
        num_simulations: The number of simulations to run.
        discount_rate: The risk-free interest rate.

    Returns:
        The estimated price of the option.
    """

    # Prepare inputs for the kernel
    num_threads = 256  # Or any other suitable number
    blocks_per_grid = (num_simulations + (num_threads - 1)) // num_threads

    # Allocate memory on the GPU for the output payoffs
    output_payoffs = cuda.device_array(num_simulations, dtype=np.float64)

    # Create random number generator states on the GPU
    rng_states = create_xoroshiro128p_states(num_simulations, seed=123)  # Seed for reproducibility

    # Launch the kernel
    monte_carlo_kernel[blocks_per_grid, num_threads](
        start_price,
        strike_price,
        mu,
        sigma,
        time_to_maturity,
        num_time_steps,
        num_simulations,
        discount_rate,
        rng_states,
        output_payoffs,
    )

    # Copy the results back to the CPU
    payoffs = output_payoffs.copy_to_host()

    # Calculate the option price
    average_payoff = np.mean(payoffs)
    option_price = np.exp(-discount_rate * time_to_maturity) * average_payoff
    return option_price

# 3. Black-Scholes-Merton Formula (for comparison)
def black_scholes_call(S, K, T, r, sigma):
    """
    Calculates the price of a European call option using the Black-Scholes-Merton formula.

    Args:
        S: Current price of the underlying asset.
        K: Strike price of the option.
        T: Time to maturity in years.
        r: Risk-free interest rate.
        sigma: Volatility of the underlying asset.

    Returns:
        The price of the call option.
    """
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    d2 = (np.log(S / K) + (r - 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    N_d1 = si.norm.cdf(d1, 0.0, 1.0)
    N_d2 = si.norm.cdf(d2, 0.0, 1.0)
    call_price = S * N_d1 - K * np.exp(-r * T) * N_d2
    return call_price

if __name__ == "__main__":
    # Parameters
    start_price = 100.0  # Current stock price
    strike_price = 100.0  # Strike price
    mu = 0.05  # Expected return
    sigma = 0.2  # Volatility
    time_to_maturity = 1.0  # Time to maturity in years
    num_time_steps = 100  # Number of time steps per simulation
    #num_simulations = 1000000  # Number of simulations
    num_simulations = 100  # Number of simulations
    discount_rate = 0.02  # Risk-free interest rate

    # Get the number of available CUDA devices
    num_gpus = 0
    cuda_available = cuda.is_available()
    if cuda_available:
        try:
            # Try to get the device count using cuda.list_devices
            devices = cuda.list_devices()
            num_gpus = len(devices)
        except Exception as e:
            print(f"Error getting CUDA device count using cuda.list_devices(): {e}")
            try:
                # Fallback to cuda.gpus.get_num_gpus() if cuda.list_devices() fails
                num_gpus = cuda.gpus.get_num_gpus()
                print("Using cuda.gpus.get_num_gpus() as a fallback.")
            except Exception as e:
                print(f"Error getting CUDA device count using cuda.gpus.get_num_gpus(): {e}")
                print("CUDA is available, but there was an error accessing device information.")
    else:
        print("CUDA is not available. GPU calculations will be skipped.")

    # Run the CPU version
    start_time_cpu = time.time()
    option_price_cpu = monte_carlo_option_price_cpu(
        start_price, strike_price, mu, sigma, time_to_maturity, num_time_steps, num_simulations, discount_rate
    )
    end_time_cpu = time.time()
    execution_time_cpu = end_time_cpu - start_time_cpu

    # Run the GPU version
    if cuda_available and num_gpus > 0: # Check if GPUs are available
        start_time_gpu = time.time()
        option_price_gpu = monte_carlo_option_price_gpu(
            start_price, strike_price, mu, sigma, time_to_maturity, num_time_steps, num_simulations, discount_rate
        )
        end_time_gpu = time.time()
        execution_time_gpu = end_time_gpu - start_time_gpu
    else:
        option_price_gpu = 0.0
        execution_time_gpu = 0.0

    # Calculate Black-Scholes price
    black_scholes_price = black_scholes_call(start_price, strike_price, time_to_maturity, discount_rate, sigma)

    # Print the results
    print("------------------------------------------------------------")
    print("European Call Option Pricing using Monte Carlo Simulation")
    print("------------------------------------------------------------")
    print(f"Number of GPUs: {num_gpus}")  # Print the number of GPUs
    print(f"Start Price: {start_price:.2f}")
    print(f"Strike Price: {strike_price:.2f}")
    print(f"Expected Return (mu): {mu:.2f}")
    print(f"Volatility (sigma): {sigma:.2f}")
    print(f"Time to Maturity (T): {time_to_maturity:.2f} years")
    print(f"Number of Time Steps: {num_time_steps}")
    print(f"Number of Simulations: {num_simulations:,}")
    print(f"Risk-free Interest Rate (r): {discount_rate:.2f}")
    print("------------------------------------------------------------")
    print(f"Option Price (CPU): {option_price_cpu:.4f}")
    print(f"Execution Time (CPU): {execution_time_cpu:.4f} seconds")
    if cuda_available and num_gpus > 0:
        print(f"Option Price (GPU): {option_price_gpu:.4f}")
        print(f"Execution Time (GPU): {execution_time_gpu:.4f} seconds")
        speedup = execution_time_cpu / execution_time_gpu
        print(f"Speedup (CPU vs. GPU): {speedup:.2f}x")
    else:
        print("GPU not available. GPU calculations skipped.")
    print(f"Option Price (Black-Scholes): {black_scholes_price:.4f}")
    print("------------------------------------------------------------")

device = cuda if cuda_available else "CPU"
print(f"Device used for calculations: {device}")