## Monte Carlo Method using Apache Spark
ref : https://cloud.google.com/solutions/monte-carlo-methods-with-hadoop-spark

In [1]:
# Connect to Spark by creating a Spark session
from pyspark.sql import SparkSession
from pyspark import SparkContext, SparkConf
    
conf = SparkConf().setAppName("MonteCarloSpark").setMaster("local")
sc = SparkContext(conf=conf)

In [2]:
import random
import time
from operator import add

In [3]:
def grow(seed):
        random.seed(seed)
        portfolio_value = INVESTMENT_INIT
        for i in range(TERM):
            growth = random.normalvariate(MKT_AVG_RETURN, MKT_STD_DEV)
            portfolio_value += portfolio_value * growth + INVESTMENT_ANN
        return portfolio_value

In [4]:
seeds = sc.parallelize([time.time() + i for i in range(10000)])

In [5]:
results = seeds.map(grow)

In [6]:
INVESTMENT_INIT = 100000  # starting amount
INVESTMENT_ANN = 10000  # yearly new investment
TERM = 30  # number of years
MKT_AVG_RETURN = 0.11 # percentage
MKT_STD_DEV = 0.18  # standard deviation

In [7]:
sum = results.reduce(add)

In [8]:
print(sum / 10000.)

4236851.214690303


In [9]:
MKT_AVG_RETURN = 0.07

In [10]:
print(sc.parallelize([time.time() + i for i in range(1000000)]) \
            .map(grow).reduce(add)/1000000.)

1706074.9982074979
