Pure Spark :)

In [0]:
from pyspark.sql import SparkSession

# Initialize Spark session
spark = SparkSession.builder.appName("IntegralApproximation").getOrCreate()
sc = spark.sparkContext

# Function and interval
a = 0  # Lower limit
b = 4  # Upper limit
n = 100000  # Number of intervals (higher = more accurate)

dx = (b - a) / n  # Step size

# Create RDD with n values from 0 to n-1
rdd = sc.parallelize(range(n))

# Use midpoint Riemann sum: x_i = a + (i + 0.5) * dx
def f(x):
    return x ** 2

area = rdd.map(lambda i: f(a + (i + 0.5) * dx) * dx).sum()

print(f"Estimated integral of x^2 from {a} to {b} is: {area:.5f}")

spark.stop()


In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, expr

# Create Spark session (unnecessary in Databricks notebook)
spark = SparkSession.builder.getOrCreate()

# Parameters
a = 0
b = 4
n = 100000
dx = (b - a) / n

# Create a DataFrame with values i = 0 to n-1
df = spark.range(n)

# Calculate midpoint x = a + (i + 0.5) * dx
df = df.withColumn("x", expr(f"{a} + (id + 0.5) * {dx}"))

# Calculate f(x) = x^2 and area = f(x) * dx
df = df.withColumn("fx", col("x") ** 2)
df = df.withColumn("area", col("fx") * dx)

# Sum the areas
total_area = df.agg({"area": "sum"}).collect()[0][0]

print(f"Estimated integral: {total_area:.5f}")


This part doesn't use session builder

In [0]:
from pyspark.sql.functions import col, expr

# Define integration parameters
a = 0
b = 4
n = 100000
dx = (b - a) / n

# Use existing 'spark' session to create a DataFrame with values 0 to n-1
df = spark.range(n)

# Apply midpoint rule: x = a + (i + 0.5) * dx
df = df.withColumn("x", expr(f"{a} + (id + 0.5) * {dx}"))

# Compute f(x) = x^2
df = df.withColumn("fx", col("x") ** 2)

# Area = f(x) * dx
df = df.withColumn("area", col("fx") * dx)

# Sum all areas to approximate the integral
total_area = df.agg({"area": "sum"}).collect()[0][0]

print(f"Estimated integral of x^2 from 0 to 4: {total_area:.5f}")
