In [1]:
import requests
import json
import time
from datetime import datetime, timezone
from sqlalchemy import create_engine, text, MetaData, Table, Column, String, Float, Date

DATABASE_URL = 'postgresql+psycopg2://postgres:admin1234@localhost:5432/bootcamp_2504p'
engine = create_engine(DATABASE_URL)

metadata = MetaData()

stock_ohlcv_table = Table(
    'stock_ohlcv',
    metadata,
    Column('symbol', String(10), primary_key=True),
    Column('date', Date, primary_key=True, nullable=False),
    Column('open', Float, nullable=True),
    Column('high', Float, nullable=True),
    Column('low', Float, nullable=True),
    Column('close', Float, nullable=True),
    Column('volume', Float, nullable=True)
)

def create_table():
    metadata.create_all(engine)
    print("stock_ohlcv table created.")

def load_symbols():
    with engine.connect() as conn:
        try:
            result = conn.execute(text("SELECT symbol FROM stocks"))
            symbols = [row['symbol'] for row in result.mappings()]
            print(f"loading {len(symbols)} symbol of stock: {symbols}")
            return symbols
        except Exception as e:
            print(f"fail of loading the symbol: {e}")
            return []

from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

def get_json_from_api(symbol, period1, period2, interval='1d', events='history'):
    url = f"https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?period1={period1}&period2={period2}&interval={interval}&events={events}"
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
    session = requests.Session()
    retry_strategy = Retry(total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
    adapter = HTTPAdapter(max_retries=retry_strategy)
    session.mount("http://", adapter)
    session.mount("https://", adapter)
    try:
        response = session.get(url, headers=headers, timeout=30)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"API fail ({symbol}): {e}")
        return None

def save_to_postgres(symbol, json_data):
    if not json_data or 'chart' not in json_data or 'result' not in json_data['chart'] or not json_data['chart']['result']:
        print(f"invalid JSON data ({symbol})")
        return 0

    result = json_data['chart']['result'][0]
    timestamps = result.get('timestamp', [])
    quotes = result['indicators']['quote'][0] if 'indicators' in result and 'quote' in result['indicators'] else {}

    open_prices = quotes.get('open', [])
    high_prices = quotes.get('high', [])
    low_prices = quotes.get('low', [])
    close_prices = quotes.get('close', [])
    volumes = quotes.get('volume', [])

    if not timestamps or len(timestamps) == 0:
        print(f"no data for ({symbol})")
        return 0

    data_length = len(timestamps)
    if not all(len(lst) == data_length for lst in [open_prices, high_prices, low_prices, close_prices, volumes]):
        print(f"length of data isn't inconsistency ({symbol})")
        return 0

    records = []
    for ts, open_price, high, low, close, volume in zip(timestamps, open_prices, high_prices, low_prices, close_prices, volumes):
        date = datetime.fromtimestamp(ts, tz=timezone.utc).date()
        records.append({
            'symbol': symbol,
            'date': date,
            'open': open_price if open_price is not None else None,
            'high': high if high is not None else None,
            'low': low if low is not None else None,
            'close': close if close is not None else None,
            'volume': volume if volume is not None else None
        })

    with engine.connect() as conn:
        try:
            conn.execute(
                text("""
                    INSERT INTO stock_ohlcv (symbol, date, open, high, low, close, volume)
                    VALUES (:symbol, :date, :open, :high, :low, :close, :volume)
                    ON CONFLICT (symbol, date) DO NOTHING;
                """),
                records
            )
            conn.commit()
            print(f"save {len(records)} datas ({symbol})")
            return len(records)
        except Exception as e:
            print(f"fail to save ({symbol}): {e}")
            conn.rollback()
            return 0


if __name__ == "__main__":
    start_dt = datetime(2022, 7, 1, 0, 0, 0, tzinfo=timezone.utc)
    end_dt = datetime.now()
    period1 = int(start_dt.timestamp())
    period2 = int(end_dt.timestamp())

    create_table()

    symbols = load_symbols()
    if not symbols:
        print("no symbol, quit.")
        exit()

    total_records = 0
    for symbol in symbols:
        print(f"loading stock: {symbol}")
        json_data = get_json_from_api(symbol, period1, period2)
        if json_data:
            total_records += save_to_postgres(symbol, json_data)
        time.sleep(3)
    print(f"total {total_records} datas.")


stock_ohlcv table created.
loading 503 symbol of stock: ['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A', 'APD', 'ABNB', 'AKAM', 'ALB', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AEE', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'AME', 'AMGN', 'APH', 'ADI', 'AON', 'APA', 'APO', 'AAPL', 'AMAT', 'APTV', 'ACGL', 'ADM', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'ADP', 'AZO', 'AVB', 'AVY', 'AXON', 'BKR', 'BALL', 'BAC', 'BAX', 'BDX', 'BRK.B', 'BBY', 'TECH', 'BIIB', 'BLK', 'BX', 'XYZ', 'BK', 'BA', 'BKNG', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF.B', 'BLDR', 'BG', 'BXP', 'CHRW', 'CDNS', 'CZR', 'CPT', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CAT', 'CBOE', 'CBRE', 'CDW', 'COR', 'CNC', 'CNP', 'CF', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'COIN', 'CL', 'CMCSA', 'CAG', 'COP', 'ED', 'STZ', 'CEG', 'COO', 'CPRT', 'GLW', 'CPAY', 'CTVA', 'CSGP', 'COST', 'CTRA', 'CR