LSTM (Long short-term memory) model


In [1]:
import yfinance as yf
import pandas as pd
import requests
from io import StringIO
import time
from tqdm import tqdm
import numpy as np

# scrape the S&P 500 from wikipedia
wiki_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"

response = requests.get(wiki_url)

# wrap the text in StringIO object to prevent Pandas "future warning"
wiki_tables = pd.read_html(StringIO(response.text))

# the first table is the relevant one
sp500_table = wiki_tables[0]

# get the stock tickers
ticker_list = sp500_table["Symbol"].tolist()

# replace the dots with dashes to be compatible with yahoo finance api
ticker_list = [ticker.replace(".","-") for ticker in ticker_list]

# For testing, only use 50 tickers:
ticker_list = ticker_list[:50]

# look at a 5 year window
start_date = "2020-01-01"
end_date = "2025-01-01"


def get_stock_data(ticker, start_date=start_date, end_date=end_date):
    """ Gets the historic data for a stock based on the ticker and a date range.

    Args:
        ticker (str): The stock ticker
    Returns:
        stock (Pandas DataFrame): time-series historical data for a stock
    """

    temp_df = yf.download(ticker, start=start_date, end=end_date, progress=False, auto_adjust=True)

    # remove the ticker column -- the ticker will be found in the filename
    temp_df.columns = temp_df.columns.droplevel(1)

    return temp_df


# list of dataframes, each being stock data
all_stock_data = []

# we use tqdm to create a useful progress bar
for ticker in tqdm(ticker_list, desc="Downloading stocks..."):

    temp_df = get_stock_data(ticker)

    # check if empty or full of NaN (happens when spamming api)
    if temp_df.empty or temp_df.isna().all().all():
        print(f"Error getting {ticker}")
    else:
         all_stock_data.append(temp_df)

    # prevents too many api calls
    time.sleep(1)


print(f"Collected {len(all_stock_data)} stocks")


ModuleNotFoundError: No module named 'yfinance'