In [None]:
import os
import gzip
from pathlib import Path

# Configuration
CATALOG_NAME = 'gk_demo'
SCHEMA_NAME = 'default'
VOLUME_NAME = 'raw_data'
TABLE_NAME = f'{CATALOG_NAME}.{SCHEMA_NAME}.all_events'

# Create Unity Catalog objects
spark.sql(f"CREATE CATALOG IF NOT EXISTS {CATALOG_NAME}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG_NAME}.{SCHEMA_NAME}")
spark.sql(f"CREATE VOLUME IF NOT EXISTS {CATALOG_NAME}.{SCHEMA_NAME}.{VOLUME_NAME}")
print(f"✅ Unity Catalog objects ready")

# Source paths (your working approach)
DATA_DIR = Path('../data')
GZ_FILE = os.path.join(DATA_DIR, 'raw_events.json.gz')

# Destination paths (in volume)
VOLUME_PATH = f"/Volumes/{CATALOG_NAME}/{SCHEMA_NAME}/{VOLUME_NAME}"
VOLUME_JSON_FILE = f"{VOLUME_PATH}/raw_events.json"

def unzip_to_volume():
    if not os.path.exists(VOLUME_JSON_FILE):
        print(f"Unzipping {GZ_FILE} to volume {VOLUME_JSON_FILE}...")
        with gzip.open(GZ_FILE, 'rb') as f_in, open(VOLUME_JSON_FILE, 'wb') as f_out:
            f_out.write(f_in.read())
        print("✅ Unzipped to volume")
    else:
        print(f"{VOLUME_JSON_FILE} already exists in volume. Skipping unzip.")

def main():
    unzip_to_volume()
    df = spark.read.json(VOLUME_JSON_FILE)
    df.write.format("delta").mode("overwrite").saveAsTable(TABLE_NAME)
main()