In [1]:
import yfinance as yf
import pandas as pd
import psycopg2
from psycopg2.extras import execute_values

def fetch_and_store_all_stock_data(db_url, table_name):
    # Fetch tickers from an S&P 500 static list or replace this with a CSV/API call
    sp500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].tolist()

    # Define required columns
    required_columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Ticker']

    all_data = []

    for ticker in sp500_tickers:
        try:
            print(f"Fetching data for {ticker}...")
            # Fetch historical data for the past two days
            stock_data = yf.download(ticker, period="2d", progress=False)

            if stock_data is not None and not stock_data.empty:
                # Add a column for the ticker symbol
                stock_data['Ticker'] = ticker

                # Ensure 'Adj Close' exists in the data
                if 'Adj Close' not in stock_data.columns:
                    stock_data['Adj Close'] = stock_data['Close']

                # Reset index to make 'Date' a column
                stock_data.reset_index(inplace=True)

                # Add missing columns to match required schema
                for col in required_columns:
                    if col not in stock_data.columns:
                        stock_data[col] = None

                # Retain only required columns
                stock_data = stock_data[required_columns]
                all_data.append(stock_data)
                print(f"Data for {ticker} added successfully.")
            else:
                print(f"No data available for {ticker}.")
        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")

    # Concatenate all data into a single DataFrame
    if all_data:
        try:
            full_data = pd.concat(all_data, ignore_index=True)
        except Exception as e:
            print(f"Error concatenating data: {e}")
            return

        # Check for missing required columns after concatenation
        for col in required_columns:
            if col not in full_data.columns:
                full_data[col] = None

        # Drop rows with NaN values in critical columns
        try:
            full_data.dropna(subset=['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Ticker'], inplace=True)
        except KeyError as e:
            print(f"Error dropping NaN values: {e}")
            return

        # Store all data in PostgreSQL using psycopg2
        try:
            print(f"Storing all data into the database...")
            connection = psycopg2.connect(db_url)
            cursor = connection.cursor()

            # Create table if it doesn't exist
            create_table_query = f"""
            CREATE TABLE IF NOT EXISTS {table_name} (
                date DATE,
                open NUMERIC,
                high NUMERIC,
                low NUMERIC,
                close NUMERIC,
                adj_close NUMERIC,
                volume BIGINT,
                ticker VARCHAR(10)
            )
            """
            cursor.execute(create_table_query)

            # Prepare data for insertion
            records = full_data[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Ticker']].values.tolist()

            # Insert data into the table
            insert_query = f"""
            INSERT INTO {table_name} (date, open, high, low, close, adj_close, volume, ticker)
            VALUES %s
            """
            execute_values(cursor, insert_query, records)

            # Commit and close connection
            connection.commit()
            cursor.close()
            connection.close()

            print(f"All stock data stored successfully in {table_name} table.")
        except Exception as e:
            print(f"Error storing data to PostgreSQL: {e}")
    else:
        print("No valid data collected. Nothing to store.")

# Database connection URL (update with your credentials)
db_url = "postgresql://user:password@192.168.0.34:5432/rag_db"
table_name = "all_stock_data"

# Fetch and store all stock data
fetch_and_store_all_stock_data(db_url, table_name)


Fetching data for MMM...
Data for MMM added successfully.
Fetching data for AOS...
Data for AOS added successfully.
Fetching data for ABT...
Data for ABT added successfully.
Fetching data for ABBV...
Data for ABBV added successfully.
Fetching data for ACN...
Data for ACN added successfully.
Fetching data for ADBE...
Data for ADBE added successfully.
Fetching data for AMD...
Data for AMD added successfully.
Fetching data for AES...
Data for AES added successfully.
Fetching data for AFL...
Data for AFL added successfully.
Fetching data for A...
Data for A added successfully.
Fetching data for APD...
Data for APD added successfully.
Fetching data for ABNB...
Data for ABNB added successfully.
Fetching data for AKAM...
Data for AKAM added successfully.
Fetching data for ALB...
Data for ALB added successfully.
Fetching data for ARE...
Data for ARE added successfully.
Fetching data for ALGN...
Data for ALGN added successfully.
Fetching data for ALLE...
Data for ALLE added successfully.
Fetchi

KeyboardInterrupt: 