In [4]:
import requests
from typing import Dict, Any
import pandas as pd
from datetime import datetime
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, current_timestamp, input_file_name
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, LongType, DateType, TimestampType

In [None]:
import dlt

In [1]:
API_KEY = "N3XG3TIHC285NBHT"
BASE_URL = "https://www.alphavantage.co/query"

In [None]:
def fetch_stock_data(symbol, api_key):
    """
    Fetch daily stock data for a given symbol from Alpha Vantage
    """
    params = {
        "function": "TIME_SERIES_DAILY",
        "symbol": symbol,
        "apikey": api_key,
        "outputsize": "compact"
    }
    response = requests.get(BASE_URL, params=params)
    response.raise_for_status()
    data = response.json()
    return data

In [None]:
@dlt.view(name="stock_data_view")
def stock_data_view():
    stock_data = fetch_stock_data("AAPL", API_KEY)

    records = []
    if "Time Series (Daily)" in stock_data:
        for date, values in stock_data["Time Series (Daily)"].items():
            record = {
                "date": date,
                "open": float(values["1. open"]),
                "high": float(values["2. high"]),
                "low": float(values["3. low"]),
                "close": float(values["4. close"]),
                "volume": int(values["5. volume"]),
                "symbol": "AAPL",
                "ingest_time": pd.Timestamp.now()
            }
            records.append(record)

    pdf = pd.DataFrame(records)
    spark_df = spark.createDataFrame(pdf)

    return spark_df

In [None]:
@dlt.table(
    name="stocksoxl_bronze_daily_stock_metrics_data",
    comment="Appends daily stock market data from Alpha Vantage",
    table_properties={"quality": "bronze"}
)
def stocksoxl_bronze_daily_stock_metrics_data():
    return dlt.read("stock_data_view")

In [None]:
@dlt.table(
    name="soxl_silver_daily_stock_metrics",
    comment="Cleaned and typed SOXL daily stock metrics."
)

@dlt.expect_or_drop("date_is_not_null", "stock_date IS NOT NULL") 
@dlt.expect_or_drop("close_is_not_null", "close IS NOT NULL")

def soxl_silver_daily_stock_metrics():
    bronze_df = dlt.read("stocksoxl_bronze_daily_stock_metrics_data")

    silver_df = bronze_df.select(
        col("date").alias("stock_date"), 
        col("symbol"),
        col("open"),
        col("high"),
        col("low"),
        col("close"),
        col("volume"))

    return silver_df

In [None]:
@dlt.table(
    name="soxl_gold_daily_aggregates",
    comment="Example gold table: could be daily OHLCV aggregates or other views."
)
def soxl_gold_daily_aggregates():
    silver_df = dlt.read("soxl_silver_daily_stock_metrics")
    
    gold_df = silver_df.select(
        "stock_date",
        "symbol",
        "open",
        "high",
        "low",
        "close",
        "volume"
    )
    return gold_df