In [None]:
spark.conf.get("spark.app.name")
from pyspark.sql import SparkSession, Row
from pyspark.sql.types import StructType, StructField, StringType, TimestampType
import pyspark.sql.functions as F
from delta.tables import DeltaTable
    
def migration_v001(spark: SparkSession):
    migration_version = 'v0.0.1'
    migration_details = 'Create cdo schema and cdo.migration_log_fact table'

    try:
        schema_name = "cdo"
        spark.sql("SHOW SCHEMAS").show()
        spark.sql("CREATE SCHEMA IF NOT EXISTS cdo")
        spark.sql(f"SHOW TABLES IN {schema_name}").show()
    
    except Exception as err:
        print(err)
    
    migration_log_name = 'cdo.opssig_migration_log_fact'
    migration_log_schema = StructType([
        StructField('version', StringType(), False),
        StructField('details', StringType(), True),
        StructField('updated_timestamp', TimestampType(), True),
    ])
    created_log = DeltaTable.createIfNotExists(spark) \
        .tableName(migration_log_name) \
        .addColumns(migration_log_schema) \
        .execute()
    
    insert_data = [Row(version=migration_version, details=migration_details)]
    insert_df = spark.createDataFrame(insert_data)
    delta_table = DeltaTable.forName(spark, migration_log_name)
    
    delta_table.alias("target").merge(
        insert_df.alias("source"),
        "target.version = source.version") \
        .whenMatchedUpdate(
            set={
                'details': 'source.details',
                'updated_timestamp': F.current_timestamp()
            }
        ) \
        .whenNotMatchedInsert(
            values={
                'version': 'source.version',
                'details': 'source.details',
                'updated_timestamp': F.current_timestamp()
            }
        ).execute()
        
migration_v001(spark)

In [None]:
%run ./migration_v001.py

In [None]:
initialize_database(spark)

In [None]:
migration_log_name = 'cdo.opssig_migration_log_fact'
delta_table = DeltaTable.forName(spark, migration_log_name)
delta_df = delta_table.toDF()
print(type(delta_df))
print(delta_df.count())
for row in delta_df:
    print(row)