In [0]:
%sql
CREATE VOLUME IF NOT EXISTS 12daysofdemos.raw_data.stream

In [0]:
import time
from datetime import datetime

# Configuration
source_table = "12daysofdemos.raw_data.reindeer_telemetry"
target_volume_path = "/Volumes/12daysofdemos/raw_data/stream/reindeer_telemetry"
interval_seconds = 10
max_iterations = 100

print(f"Starting data streaming from {source_table} to {target_volume_path}")
print(f"Writing interval: {interval_seconds} seconds")
print(f"Total iterations: {max_iterations}\n")

# Read the source table once (reuse the same data)
df = spark.read.table(source_table)
row_count = df.count()
print(f"Loaded {row_count} rows from source table\n")

# Loop to write data every 30 seconds for 25 iterations
iteration = 0
while iteration < max_iterations:
    try:
        iteration += 1
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        file_path = f"{target_volume_path}/batch_{timestamp}.parquet"
        
        # Write data to volume
        df.write.mode("overwrite").parquet(file_path)
        
        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        print(f"[{current_time}] Iteration {iteration}/{max_iterations}: Written {row_count} rows to {file_path}")
        
        # Wait 30 seconds before next write (skip wait on last iteration)
        if iteration < max_iterations:
            time.sleep(interval_seconds)
        
    except KeyboardInterrupt:
        print(f"\nStreaming stopped by user at iteration {iteration}")
        break
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        print("Retrying in 30 seconds...")
        time.sleep(interval_seconds)

print(f"\nStreaming completed! Total iterations: {iteration}/{max_iterations}")