In [0]:
import requests
import json

# Test API call for a single stock
api_key = "ZVG3CQYLUO8Q83XQ"
symbol = "AAPL"
url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&apikey={api_key}"

response = requests.get(url)
data = response.json()

print(json.dumps(data, indent=2))

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from datetime import datetime

# Create spark session
spark = SparkSession.builder.getOrCreate()

# Convert the JSON to a DataFrame
time_series = data["Time Series (Daily)"]

# Flatten the nested JSON into rows
rows = []
for date, prices in time_series.items():
    row = {
        "date": date,
        "symbol": "AAPL",
        "open": float(prices["1. open"]),
        "high": float(prices["2. high"]),
        "low": float(prices["3. low"]),
        "close": float(prices["4. close"]),
        "volume": int(prices["5. volume"]),
        "ingestion_timestamp": datetime.now()
    }
    rows.append(row)

# Create DataFrame
df = spark.createDataFrame(rows)

# Show the data
df.show(5)

In [0]:
# Create database if it doesn't exist
spark.sql("CREATE DATABASE IF NOT EXISTS stock_market")

# Write to Unity Catalog managed table
df.write \
    .format("delta") \
    .mode("append") \
    .saveAsTable("stock_market.bronze_stock_data")

print(f"Successfully wrote {df.count()} records to bronze layer")

In [0]:
# Read the bronze table
bronze_df = spark.table("stock_market.bronze_stock_data")

# Show some stats
print(f"Total records: {bronze_df.count()}")
print(f"Date range: {bronze_df.agg(min('date'), max('date')).collect()[0]}")

# Show sample data
bronze_df.orderBy(col("date").desc()).show(5)