In [4]:
# Import libraries

from pyspark.sql import SparkSession
from pyspark.sql.functions import col

StatementMeta(, ef1df5e1-10c0-4c3e-ba9d-8cfbb53bf977, 6, Finished, Cancelled, Cancelled)

In [2]:
# Functions to implement time travel and Rollback data pipeline

# Comparison Layer (Debugging)
def compare_versions_or_timestamps(table_name, version1=None, version2=None, timestamp1=None, timestamp2=None):
    """
    Compare two versions or timestamps of a Delta table in Microsoft Fabric.
    """
    if version1 is not None and version2 is not None:
        print(f"Comparing version {version1} with version {version2} for table {table_name}")
        data_v1 = spark.read.option("versionAsOf", version1).format("delta").table(table_name)
        data_v2 = spark.read.option("versionAsOf", version2).format("delta").table(table_name)
    elif timestamp1 is not None and timestamp2 is not None:
        timestamp1_iso = timestamp1.replace(" ", "T")
        timestamp2_iso = timestamp2.replace(" ", "T")
        print(f"Comparing timestamp {timestamp1_iso} with {timestamp2_iso} for table {table_name}")
        data_v1 = spark.read.option("timestampAsOf", timestamp1_iso).format("delta").table(table_name)
        data_v2 = spark.read.option("timestampAsOf", timestamp2_iso).format("delta").table(table_name)
    else:
        raise ValueError("Provide either versions (version1, version2) or timestamps (timestamp1, timestamp2).")

    # Join on Submission_Fct_id and find differences
    joined = data_v1.alias("v1").join(
        data_v2.alias("v2"),
        on=["Submission_Fct_id"],
        how="full_outer"
    )

    changes = joined.filter(
        col("v1.Submission_Fct_id").isNull() |
        col("v2.Submission_Fct_id").isNull() |
        (col("v1") != col("v2"))
    )

    print("Differences between snapshots:")
    changes.show(truncate=False)

# Rollback Layer
def rollback_table(table_name, version=None, timestamp=None):
    """
    Rollback a Delta table to a specific version or timestamp in Microsoft Fabric.
    """
    if version is not None:
        print(f"Rolling back table {table_name} to version {version}")
        rollback_df = spark.read.option("versionAsOf", version).format("delta").table(table_name)
    elif timestamp is not None:
        timestamp_iso = timestamp.replace(" ", "T")
        print(f"Rolling back table {table_name} to timestamp {timestamp_iso}")
        rollback_df = spark.read.option("timestampAsOf", timestamp_iso).format("delta").table(table_name)
    else:
        raise ValueError("Provide either version or timestamp for rollback.")

    # Overwrite the table
    rollback_df.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable(table_name)
    print(f"✅ Table rolled back to specified version/timestamp: {table_name}")

StatementMeta(, ef1df5e1-10c0-4c3e-ba9d-8cfbb53bf977, 4, Finished, Available, Finished)

In [5]:
# Simulate user input with hardcoded variables
compare_decision = "yes"
compare_type = "version"
version1 = "0"
version2 = "1"
rollback_decision = "no"
rollback_type = "version"
version_to_rollback = "0"

fact_table_path = "gold_dimensional_modeling.fact_submissions"

if compare_decision == "yes":
    if compare_type == "version":
        compare_versions_or_timestamps(
            table_name=fact_table_path,
            version1=version1,
            version2=version2
        )
    elif compare_type == "timestamp":
        timestamp1 = "2024-12-21T15:23:06"
        timestamp2 = "2024-12-21T15:58:51"
        compare_versions_or_timestamps(
            table_name=fact_table_path,
            timestamp1=timestamp1,
            timestamp2=timestamp2
        )
    else:
        print("Invalid input! Please choose either 'version' or 'timestamp'.")

if rollback_decision == "yes":
    if rollback_type == "version":
        rollback_table(fact_table_path, version=version_to_rollback)
    elif rollback_type == "timestamp":
        timestamp_to_rollback = "2024-12-21T15:23:06"
        rollback_table(fact_table_path, timestamp=timestamp_to_rollback)
    else:
        print("Invalid input! Please choose either 'version' or 'timestamp'.")


StatementMeta(, ef1df5e1-10c0-4c3e-ba9d-8cfbb53bf977, 7, Finished, Cancelled, Cancelled)