In [0]:
import requests
import json
from pyspark.sql import Row
from pyspark.sql.types import *
from pyspark.sql.functions import current_date, lit

# Set Delta destination path
volume_path = "/Volumes/workspace/default/assignment_practice"
delta_path = f"{volume_path}/site_info/person_info"

# Custom schema
user_schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("email", StringType(), True),
    StructField("first_name", StringType(), True),
    StructField("last_name", StringType(), True),
    StructField("avatar", StringType(), True)
])

# Step 1: Fetch all paginated data
page = 2
all_users = []

while True:
    url = f"https://reqres.in/api/users?page={page}"
    response = requests.get(url)
    
    if response.status_code != 200:
        break
    
    data = response.json()
    users = data.get("data", [])
    
    if not users:
        break
    
    all_users.extend(users)
    page += 1

# Step 2: Create DataFrame directly (without sparkContext)
df_final = spark.createDataFrame(all_users, schema=user_schema)

# Step 3: Add site_address and load_date
df_final = df_final.withColumn("site_address", lit("reqres.in")) \
                   .withColumn("load_date", current_date())

# Step 4: Write as Delta
df_final.write.format("delta").mode("overwrite").save(delta_path)

# Optional preview
df_final.display()
