In [0]:
import requests
import json

all_data = []
page = 2

while True:
    url = f"https://reqres.in/api/users?page={page}"
    response = requests.get(url)
    
    if response.status_code != 200:
        break
    
    result = response.json()
    
    data = result.get("data", [])
    
    if not data:
        break  # stop if no data returned
    
    all_data.extend(data)
    page += 1


In [0]:
# all_data already contains only user data; other keys are skipped
print(all_data[0])  # sample record


In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
from pyspark.sql import Row

# Define schema
schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("email", StringType(), True),
    StructField("first_name", StringType(), True),
    StructField("last_name", StringType(), True),
    StructField("avatar", StringType(), True)
])

# Convert to list of Rows
rows = [Row(**item) for item in all_data]

# Create DataFrame
df = spark.createDataFrame(rows, schema=schema)


In [0]:
from pyspark.sql.functions import lit, current_date

df = df.withColumn("site_address", lit("reqres.in")) \
       .withColumn("load_date", current_date())

display(df)


In [0]:
df.write \
  .format("delta") \
  .mode("overwrite") \
  .option("overwriteSchema", "true") \
  .save("/Volumes/workspace/default/databricks_assignment/site_info/person_info")

df2 = spark.read.format("delta").load("/Volumes/workspace/default/databricks_assignment/site_info/person_info")
display(df2)
