In [None]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window

from pyspark.sql.functions import col, year, month, dayofmonth, to_date

In [None]:
# Create or get Spark session
spark = SparkSession.builder.appName("Latest_Transactions").getOrCreate()

In [None]:
spark.sparkContext.setLogLevel("ERROR")

In [None]:
# Define paths
silver_transactions_path = "/app/datamart/silver/transactions" 
silver_latest_transactions_path = "/app/datamart/silver/latest_transactions"

In [None]:
# Load all Silver parquet files
df_silver = spark.read.parquet(silver_transactions_path)
print("Silver Transactions schema:")
df_silver.printSchema()

In [None]:
# Partition by msno (member ID) and order by transaction_date

window_spec = Window.partitionBy("msno").orderBy(F.col("transaction_date").desc())

In [None]:
# Creating transaction_ID

latest_transactions_df = df_silver.withColumn(
    "row_num",
    F.row_number().over(window_spec)
).filter(
    F.col("row_num") == 1
).drop(
    "row_num"
)

# Add snapshot_date

latest_transactions_df = latest_transactions_df.withColumn(
    "snapshot_date", F.current_date()
)

In [None]:
# Save to silver layer

# Save as Parquet, without partitioning
(
    latest_transactions_df
    .write
    .mode("overwrite")
    .parquet(silver_latest_transactions_path)
)

print(f"✅ Silver layer (Latest_transactions) successfully written to: {silver_latest_transactions_path}")