In [0]:
from pyspark.sql.functions import sum, avg, stddev

In [0]:
# Silver stock prices (time-series analytics)
silver_stock_df = spark.table("silver_stock_prices")

# Silver portfolio transactions (clean portfolio data)
silver_portfolio_df = spark.table("silver_portfolio_transactions")

In [0]:
#Purpose: Stock-wise total investment
gold_portfolio_metrics_df = (
    silver_portfolio_df
    .groupBy("stock")
    .agg(
        sum(silver_portfolio_df.quantity * silver_portfolio_df.price)
        .alias("total_investment")
    )
)

gold_portfolio_metrics_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold_portfolio_metrics")

In [0]:
#Purpose: Executive-level portfolio KPIs
gold_portfolio_summary_df = (
    spark.table("gold_portfolio_metrics")
    .agg(
        sum("total_investment").alias("total_portfolio_value"),
        avg("total_investment").alias("avg_investment_per_stock")
    )
)

gold_portfolio_summary_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold_portfolio_summary")

In [0]:
#Purpose: Sector-wise investment exposure
gold_sector_allocation_df = (
    silver_portfolio_df
    .join(
        silver_stock_df.select("company", "sector").distinct(),
        silver_portfolio_df.stock == silver_stock_df.company,
        "inner"
    )
    .groupBy("sector")
    .agg(
        sum(silver_portfolio_df.quantity * silver_portfolio_df.price)
        .alias("sector_investment")
    )
)

gold_sector_allocation_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold_sector_allocation")


In [0]:
#Purpose: Risk & return indicators per stock
gold_stock_volatility_df = (
    silver_stock_df
    .groupBy("company")
    .agg(
        avg("daily_return").alias("avg_daily_return"),
        stddev("daily_return").alias("volatility")
    )
)

gold_stock_volatility_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold_stock_volatility")


In [0]:
print("DataFrames in the session:")
print("---------------------------------------------------------------------")
print("Gold Portfolio Metrics:")
spark.table("gold_portfolio_metrics").show()
print("Gold Portfolio Summary:")
spark.table("gold_portfolio_summary").show()
print("Gold Sector Allocation:")
spark.table("gold_sector_allocation").show()
print("Gold Stock Volatility:")
spark.table("gold_stock_volatility").show()

DataFrames in the session:
---------------------------------------------------------------------
Gold Portfolio Metrics:
+-----+-----------------+
|stock| total_investment|
+-----+-----------------+
| MSFT|       1232850.63|
|GOOGL|1256606.009999999|
|  JPM|1183444.610000001|
| AAPL|       1055555.72|
+-----+-----------------+

Gold Portfolio Summary:
+---------------------+------------------------+
|total_portfolio_value|avg_investment_per_stock|
+---------------------+------------------------+
|           4728456.97|            1182114.2425|
+---------------------+------------------------+

Gold Sector Allocation:
+----------+------------------+
|    sector| sector_investment|
+----------+------------------+
|Technology|3545012.3600000036|
|   Finance| 1183444.610000001|
+----------+------------------+

Gold Stock Volatility:
+-------+--------------------+--------------------+
|company|    avg_daily_return|          volatility|
+-------+--------------------+--------------------+
|   