In [8]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.functions import year, avg, col

# Configure Spark for your machine
conf = SparkConf() \
    .setAppName("StockDataMapReduce") \
    .setMaster("local[*]") \
    .set("spark.executor.memory", "6g") \
    .set("spark.executor.cores", "4") \
    .set("spark.driver.memory", "6g") \
    .set("spark.driver.cores", "4") \
    .set("spark.default.parallelism", "8") \
    .set("spark.sql.shuffle.partitions", "8")

sc = SparkContext(conf=conf)

# Fetch the configuration
conf = sc.getConf()

# Print all configurations
print("Spark Configuration:")
for item in conf.getAll():
    print(f"{item[0]} = {item[1]}")

# Your existing code for processing stock data
lines = sc.textFile("Stock Data-New.csv")

# Split each line into fields and filter out the header if necessary
header = lines.first()
data = lines.filter(lambda line: line != header).map(lambda line: line.split(","))

# Convert data to (date, stock_value) tuples for a specific stock index, e.g., 'USD BASED ISE'
# Assuming 'USD BASED ISE' is in the third column (index 2)
stock_data = data.map(lambda fields: (fields[0], float(fields[2])))

# Map step: Transform (date, stock_value) to (date, (stock_value, 1))
mapped = stock_data.map(lambda x: (x[0], (x[1], 1)))

# Reduce step: Aggregate by date and calculate sum and count
reduced = mapped.reduceByKey(lambda x, y: (x[0] + y[0], x[1] + y[1]))

# Calculate average stock value per date
average_per_date = reduced.mapValues(lambda x: x[0] / x[1])

# Collect the results
results = average_per_date.collect()

# Print results
for date, average_value in results:
    print(f"Date: {date}, Average Stock Value: {average_value}")

# Stop the SparkContext
sc.stop()

24/07/21 06:01:48 WARN Utils: Your hostname, blue-nbjupyterhub8 resolves to a loopback address: 127.0.0.1; using 10.0.0.91 instead (on interface ens5)
24/07/21 06:01:48 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/07/21 06:01:49 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Spark Configuration:
spark.driver.extraJavaOptions = -Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false
spark.driver.cores = 4
spark.app.startTime = 1721541708980
spark.default.parallelism = 8
sp

                                                                                

Date: 6-Jan-09, Average Stock Value: 0.031812743
Date: 29-Jan-09, Average Stock Value: -0.020271848
Date: 2-Feb-09, Average Stock Value: -0.035607494
Date: 3-Feb-09, Average Stock Value: 0.022402818
Date: 12-Feb-09, Average Stock Value: -0.020825038
Date: 20-Feb-09, Average Stock Value: -0.04977568
Date: 23-Feb-09, Average Stock Value: 0.029566248
Date: 25-Feb-09, Average Stock Value: 0.010197804
Date: 29-Apr-09, Average Stock Value: 0.073005406
Date: 5-May-09, Average Stock Value: 0.042492156
Date: 12-May-09, Average Stock Value: 0.020467942
Date: 18-Jun-09, Average Stock Value: 0.024440981
Date: 17-Jul-09, Average Stock Value: -0.009281959
Date: 24-Jul-09, Average Stock Value: 0.01469961
Date: 30-Jul-09, Average Stock Value: 0.04563733
Date: 3-Aug-09, Average Stock Value: 0.050339385
Date: 5-Aug-09, Average Stock Value: -0.016477754
Date: 10-Aug-09, Average Stock Value: 0.004289841
Date: 27-Aug-09, Average Stock Value: 0.006307214
Date: 3-Sep-09, Average Stock Value: 0.012507327
Date