In [23]:
from pyspark.sql.types import *

orderSchema = StructType([
    StructField("Date", DateType()),
    StructField("Open", FloatType()),
    StructField("High", FloatType()),
    StructField("Low", FloatType()),
    StructField("Close", FloatType()),
    StructField("Volume", IntegerType()),
    StructField("AdjClose", FloatType())
    ])

df = spark.read.format("csv").schema(orderSchema).load("Files/apple_stock_data.csv")
display(df)

StatementMeta(, fd154a6a-8fe9-4c2f-b4e3-25e68a0a3d78, 25, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, e9399219-a47d-44de-bb54-b8a5a10e9c07)

In [24]:
columns = df.select('Date','Volume').groupBy('Date').sum()
display(columns)

StatementMeta(, fd154a6a-8fe9-4c2f-b4e3-25e68a0a3d78, 26, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, c65feb85-f902-4635-9d68-d20a83b59900)

In [37]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, avg, max, min, stddev, round, to_date, year, month
from pyspark.sql.window import Window

df = df.withColumn("date", to_date(col("date")))

df = df.withColumn("year", year(col("date"))) \
       .withColumn("month", month(col("date")))


# Calculate daily return %
df = df.withColumn("daily_return_pct", round(((col("close") - col("open")) / col("open")) * 100, 2))

# Define 5-day window
window_spec = Window.orderBy("date").rowsBetween(-4, 0)

# Add moving average and rolling volatility
df = df.withColumn("ma_5", round(avg(col("close")).over(window_spec), 2)) \
       .withColumn("volatility_5", round(stddev(col("close")).over(window_spec), 2))

# Display results
df.select("date", "open", "close", "daily_return_pct", "ma_5", "volatility_5") \
    .filter(col("date").isNotNull())\
  .orderBy("date") \
  .show(20)


StatementMeta(, fd154a6a-8fe9-4c2f-b4e3-25e68a0a3d78, 39, Finished, Available, Finished)

+----------+-----+-----+----------------+-----+------------+
|      date| open|close|daily_return_pct| ma_5|volatility_5|
+----------+-----+-----+----------------+-----+------------+
|1984-09-07| 26.5| 26.5|             0.0| 26.5|        NULL|
|1984-09-10| 26.5|26.37|           -0.49|26.44|        0.09|
|1984-09-11|26.62|26.87|            0.94|26.58|        0.26|
|1984-09-12|26.87|26.12|           -2.79|26.47|        0.31|
|1984-09-13| 27.5| 27.5|             0.0|26.67|        0.54|
|1984-09-14|27.62|27.87|            0.91|26.95|        0.74|
|1984-09-17|28.62|28.62|             0.0| 27.4|        0.95|
|1984-09-18|28.62|27.62|           -3.49|27.55|        0.91|
|1984-09-19|27.62| 27.0|           -2.24|27.72|        0.59|
|1984-09-20|27.12|27.12|             0.0|27.65|        0.65|
|1984-09-21|27.12|26.87|           -0.92|27.45|        0.72|
|1984-09-24|26.87|26.62|           -0.93|27.05|        0.37|
|1984-09-25| 26.5|26.12|           -1.43|26.75|         0.4|
|1984-09-26|26.12|25.75|

In [27]:
%%sql
SELECT 
    date,
    ROUND(((close - open) / open) * 100, 2) AS daily_return_pct,
    ROUND(AVG(close) OVER (ORDER BY date ROWS BETWEEN 4 PRECEDING AND CURRENT ROW), 2) AS ma_5,
    ROUND(STDDEV(close) OVER (ORDER BY date ROWS BETWEEN 4 PRECEDING AND CURRENT ROW), 2) AS volatility_5
FROM apple_stock_data
where date is not null
ORDER BY date;

StatementMeta(, fd154a6a-8fe9-4c2f-b4e3-25e68a0a3d78, 29, Finished, Available, Finished)

<Spark SQL result set with 1000 rows and 4 fields>

In [38]:
%%sql
CREATE OR REPLACE TABLE apple_stock_metrics AS
SELECT 
    date,
    open,
    close,
    ROUND(((close - open) / open) * 100, 2) AS daily_return_pct,
    ROUND(AVG(close) OVER (ORDER BY date ROWS BETWEEN 4 PRECEDING AND CURRENT ROW), 2) AS ma_5,
    ROUND(STDDEV(close) OVER (ORDER BY date ROWS BETWEEN 4 PRECEDING AND CURRENT ROW), 2) AS volatility_5
FROM apple_stock_data
where date is not null;


StatementMeta(, fd154a6a-8fe9-4c2f-b4e3-25e68a0a3d78, 40, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [39]:
%%sql
select * from apple_stock_metrics;

StatementMeta(, fd154a6a-8fe9-4c2f-b4e3-25e68a0a3d78, 41, Finished, Available, Finished)

<Spark SQL result set with 1000 rows and 6 fields>