In [1]:
from pyspark.sql import SparkSession
from pyspark import SparkConf, SparkContext
import random
import sys
import json

In [2]:
# ## Note: spark config already embedded in Domino Compute Environment, for seamless use...
 
# Create the Spark Context
sc=SparkContext.getOrCreate()

In [3]:
# ## Spark computing example: Approximate Pi
# * Pi can be approximated to a certain precision by computing the geometric probability that some number of random points chosen from asquare grid fall within its unit circle. 
# 
# #### How is 𝜋 approximated here? 
# * from: https://blog.codecentric.de/en/2016/04/calculating-pi-apache-spark/
# 
# This computation is based on the following heuristic: By definition 𝜋 is the area 𝐴Circle of a circle with radius 𝑟=1 (generally, 𝜋⋅𝑟2 is the area of a circle of radius 𝑟).
# 
# One then circumscribes this unit circle with a square whose area equals 𝐴Square=4. 
# 
# *The ratio of these two areas thus equals to 𝐴Circle𝐴Square=𝜋4 and gives the geometric probability of a point inside the square to lie inside in the circle.*
# 
# Now let us assume that we pick a huge number 𝑛 of points randomly inside the circumscribed square, for example, by throwing darts or dropping rain drops onto it. 
# 
# A certain number 𝑛 in of these points will end up inside the area described by the circle while the remaining number 𝑛out of these points will lie outside of it (but inside the square). Thus 𝑛in+𝑛out=𝑛 and the probability of a point lying inside of the circle area is 𝑛in𝑛.
# 
# Heuristically, one has 𝐴Circle𝐴Square≈𝑛in and hence 𝜋≈(4⋅𝑛_in/n).
 
# In[6]:
 
if len(sys.argv) == 2:
    #note: must be real number between 0 and 1
    NUM_SAMPLES = int(sys.argv[1])
else: 
    NUM_SAMPLES = 50000000
 
def inside(p):
 x, y = random.random(), random.random()
 return x*x + y*y < 1
count = sc.parallelize(range(0, NUM_SAMPLES)).filter(inside).count()
pi = float(4.0 * count / float(NUM_SAMPLES))
print('Pi is roughly', pi)
 
### write to Domino Stats
 
#with open('dominostats.json', 'w') as f:
#    f.write(json.dumps({"Number of Samples": NUM_SAMPLES, "Pi approximation": pi}))

Pi is roughly 3.1416952
