In [11]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
import random
import math
import numpy as np
spark = SparkSession.builder \
    .appName("Monte Carlo Simulation") \
    .getOrCreate()
num_samples = 1000000
def monte_carlo_pi(samples):
    inside_circle = sum(1 for _ in range(samples) if (random.random() ** 2 + random.random() ** 2) <= 1)
    return inside_circle
num_partitions=10
samples_per_partition = num_samples // num_partitions
results_rdd = spark.sparkContext.parallelize([samples_per_partition] * num_partitions)
inside_circle_counts = results_rdd.map(monte_carlo_pi)
total_inside_circle = inside_circle_counts.reduce(lambda x, y: x + y)
pi_estimate = (total_inside_circle / num_samples) * 4
print(f"Estimated value of π: {pi_estimate}")



Estimated value of π: 3.145888


In [12]:
def monte_carlo_pi_normal(samples, mean=0.0, stddev=1.0):
    inside_circle = sum(1 for _ in range(samples) if (np.random.normal(mean, stddev) ** 2 + np.random.normal(mean, stddev) ** 2) <= 1)
    return inside_circle
results_rdd_normal = spark.sparkContext.parallelize([samples_per_partition] * num_partitions)
inside_circle_counts_normal = results_rdd_normal.map(lambda x: monte_carlo_pi_normal(x, mean=0, stddev=1))
total_inside_circle_normal = inside_circle_counts_normal.reduce(lambda x, y: x + y)
pi_estimate_normal = (total_inside_circle_normal / num_samples) * 4
print(f"Estimated value of π with Normal distribution: {pi_estimate_normal}")
spark.stop()

Estimated value of π with Normal distribution: 1.5652
