In [5]:
import time
import requests
import pandas as pd
from sqlalchemy import create_engine

db_engine = create_engine("postgresql+psycopg2://postgres:admin1234@localhost:5432/bootcamp_final_project")
stock_ohlcv_url = "https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?period1=1697904000&period2=1761108140&interval=1d&events=history"

df_symbols = pd.read_sql("select * from symbols", con=db_engine)
symbol_to_id = dict(zip(df_symbols['symbol'], df_symbols['id']))
# symbols = list(df_symbols["symbol"])
symbols = ['TSLA','AAPL']

In [6]:
headers = {
  "User-Agent" : "Mozilla/5.0"
}

for symbol in symbols:
  stock_id = symbol_to_id.get(symbol)
  if stock_id is None:
    print(f"No stock_id found for symbol: {symbol}")
    continue

  url = stock_ohlcv_url.format(symbol=symbol)
  response = requests.get(url, headers=headers)
  data = response.json()

  # Check if 'chart' and 'result' exist and are not empty
  if 'chart' not in data or not data['chart'] or 'result' not in data['chart'] or not data['chart']['result']:
    error_msg = data.get('chart', {}).get('error', 'No error message provided')
    print(f"No valid data for {symbol}: {error_msg}")
    continue
        
  result = data['chart']['result'][0]
        
  # Check if required keys exist in result
  required_keys = ['timestamp', 'indicators']
  if not all(key in result for key in required_keys):
    missing_keys = [key for key in required_keys if key not in result]
    print(f"Missing keys {missing_keys} in response for {symbol}: {result}")
    continue
        
  # Check if indicators contain quote and adjclose
  if 'quote' not in result['indicators']:
    print(f"Missing 'quote' or 'adjclose' in indicators for {symbol}: {result['indicators']}")
    continue
        
  timestamps = result['timestamp']
  quote = result['indicators']['quote'][0]
  # Ensure lists are the same length
  if not (len(timestamps) == len(quote.get('close', []))):
    print(f"Data length mismatch for {symbol}")
    continue

  dates = pd.to_datetime(timestamps, unit='s').date

  ohlcv = {
    "stock_id" : [stock_id] * len(timestamps),
    "date" : dates,
    "open" : quote["open"],
    "high" : quote["high"],
    "low" : quote["low"],
    "close" : quote["close"],
    "volume" : quote["volume"]
    }

  df_ohlcv = pd.DataFrame(ohlcv)
  df_ohlcv.to_sql('stock_ohlcv', db_engine, if_exists='append', index=False)
  time.sleep(1)
