### Example Exploratory Notebook

Use this notebook to explore the data generated by the pipeline in your preferred programming language.

**Note**: This notebook is not executed as part of the pipeline.

In [0]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("NSE_Daily_Data") \
    .config("spark.sql.shuffle.partitions", "200") \
    .config("spark.sql.execution.arrow.pyspark.enabled", "true") \
    .getOrCreate()

In [0]:
from pyspark.sql.functions import (
    col, sum, count, when, lit
)
spark_df = spark.read.table("workspace.NSE_SET.NIFTY50_DAILY")
# -----------------------------
# 1. MARKET SUMMARY (1 ROW)
# -----------------------------
market_summary = spark_df.select(
    count("*").alias("total_stocks"),
    sum(when(col("change") > 0, 1).otherwise(0)).alias("advancers"),
    sum(when(col("change") < 0, 1).otherwise(0)).alias("decliners"),
    sum(when(col("change") == 0, 1).otherwise(0)).alias("unchanged"),
    sum("totalTradedVolume").alias("market_total_volume"),
    sum("totalTradedValue").alias("market_total_value")
)

summary = market_summary.collect()[0]

# -----------------------------
# 2. ENRICH STOCK DATA
# -----------------------------
final_df = spark_df \
    .withColumn("total_stocks", lit(summary["total_stocks"])) \
    .withColumn("advancers", lit(summary["advancers"])) \
    .withColumn("decliners", lit(summary["decliners"])) \
    .withColumn("unchanged", lit(summary["unchanged"])) \
    .withColumn("market_total_volume", lit(summary["market_total_volume"])) \
    .withColumn("market_total_value", lit(summary["market_total_value"])) \
    .withColumn("is_gainer", when(col("pChange") > 0, "Y").otherwise("N")) \
    .withColumn("is_loser", when(col("pChange") < 0, "Y").otherwise("N")) \
    .withColumn("high_volume_flag", when(col("totalTradedVolume") > 1_000_000, "Y").otherwise("N")) \
    .withColumn("near_52w_high_flag", when(col("nearWKH") <= 2, "Y").otherwise("N")) \
    .withColumn("near_52w_low_flag", when(col("nearWKL") <= 2, "Y").otherwise("N"))

# -----------------------------
# 3. FINAL COLUMN SELECTION
# -----------------------------
report_df = final_df.select(
    "symbol",
    "open",
    "dayHigh",
    "dayLow",
    "lastPrice",
    "previousClose",
    "change",
    "pChange",
    "totalTradedVolume",
    "totalTradedValue",
    "yearHigh",
    "yearLow",
    "nearWKH",
    "nearWKL",
    "trade_date",
    "load_date",
    "is_gainer",
    "is_loser",
    "high_volume_flag",
    "near_52w_high_flag",
    "near_52w_low_flag",
    "total_stocks",
    "advancers",
    "decliners",
    "unchanged",
    "market_total_volume",
    "market_total_value"
)

# -----------------------------
# 4. WRITE SINGLE CSV FILE
# -----------------------------
report_df.write \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("workspace.NSE_SET.NIFTY50_DAILY_REPORT")

display(report_df)