In [12]:
import os, json, time, datetime as dt, csv, pathlib
import sys
from typing import Dict, List
import requests
import pandas as pd
from bs4 import BeautifulSoup
from dotenv import load_dotenv
sys.path.append(os.path.abspath(".."))

from src.utils import validate_df, safe_filename

DATA_RAW = pathlib.Path("../data/raw")
DATA_RAW.mkdir(parents=True, exist_ok=True)

load_dotenv()
ALPHA_KEY = os.getenv("ALPHAVANTAGE_API_KEY")
Stock = os.getenv("STOCK")
print("Loaded ALPHAVANTAGE_API_KEY?", bool(ALPHA_KEY))

Loaded ALPHAVANTAGE_API_KEY? True


In [13]:
SYMBOL = Stock
use_alpha = bool(ALPHA_KEY)
print("Using Alpha Vantage:", use_alpha)

if use_alpha:
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "TIME_SERIES_DAILY",
        "symbol": SYMBOL,
        "outputsize": "compact",
        "apikey": ALPHA_KEY,
        "datatype": "json"
    }
    r = requests.get(url, params=params, timeout=30)
    r.raise_for_status()
    js = r.json()
    key = [k for k in js.keys() if "Time Series" in k]
    if not key:
        raise ValueError(f"Alpha Vantage did not return time series data. Response keys: {list(js.keys())}. Message: {js.get('Information') or js.get('Error Message')}")
    assert key, f"Unexpected response keys: {list(js.keys())}"
    series = js[key[0]]
    df_api = (pd.DataFrame(series).T
              .rename_axis('date')
              .reset_index())
    # keep a couple columns and coerce types
    df_api = df_api[['date', '4. close']].rename(columns={'4. close': 'close'})
    df_api['date'] = pd.to_datetime(df_api['date'])
    df_api['close'] = pd.to_numeric(df_api['close'])
else:
    import yfinance as yf
    df_api = yf.download(SYMBOL, period="6mo", interval="1d").reset_index()[['Date','Close']]
    df_api.columns = ['date','close']

df_api = df_api.sort_values('date').reset_index(drop=True)
msgs = validate_df(df_api, required_cols=['date','close'], dtypes_map={'date':'datetime64[ns]','close':'float'})
print(msgs)

fname = safe_filename(prefix="api", meta={"source": "alpha" if use_alpha else "yfinance", "symbol": SYMBOL})
out_path = DATA_RAW / fname
df_api.to_csv(out_path, index=False)
print("Saved:", out_path)

Using Alpha Vantage: True
{'na_total': 'Total NA values: 0'}
Saved: ..\data\raw\api_source-alpha_symbol-AAPL_20250824-155558.csv
