In [0]:
import os
import shutil
from pathlib import Path

# Configuration
CATALOG_NAME = 'gk_demo'
SCHEMA_NAME = 'default'
VOLUME_NAME = 'raw_data'
TABLE_NAME = f'{CATALOG_NAME}.{SCHEMA_NAME}.all_events'

# Create Unity Catalog objects
spark.sql(f"CREATE CATALOG IF NOT EXISTS {CATALOG_NAME}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG_NAME}.{SCHEMA_NAME}")
spark.sql(f"CREATE VOLUME IF NOT EXISTS {CATALOG_NAME}.{SCHEMA_NAME}.{VOLUME_NAME}")
print(f"✅ Unity Catalog objects ready")

# Source paths
DATA_DIR = Path('../data')
SOURCE_GZ_FILE = os.path.join(DATA_DIR, 'raw_events.json.gz')

# Destination paths (in volume)
VOLUME_PATH = f"/Volumes/{CATALOG_NAME}/{SCHEMA_NAME}/{VOLUME_NAME}"
VOLUME_GZ_FILE = f"{VOLUME_PATH}/raw_events.json.gz"

def copy_to_volume():
    if not os.path.exists(VOLUME_GZ_FILE):
        print(f"Copying {SOURCE_GZ_FILE} to volume {VOLUME_GZ_FILE}...")
        shutil.copy2(SOURCE_GZ_FILE, VOLUME_GZ_FILE)
        print("✅ Copied to volume")
    else:
        print(f"{VOLUME_GZ_FILE} already exists in volume. Skipping copy.")

def main():
    copy_to_volume()
    df = spark.read.json(VOLUME_GZ_FILE)
    df.write.format("delta").mode("overwrite").saveAsTable(TABLE_NAME)
    print(f"✅ Table {TABLE_NAME} created")

main()