In [1]:
import pandas as pd
import requests
import warnings

In [2]:
warnings.filterwarnings("ignore")

In [3]:
# ! pip3 install lxml html5lib

Get list of S&P 500 symbols

In [4]:
df = pd.read_csv("./data/sp_500_companies.csv")
symbols = df["Symbol"]
symbols

0       MMM
1       AOS
2       ABT
3      ABBV
4       ACN
       ... 
498     XYL
499     YUM
500    ZBRA
501     ZBH
502     ZTS
Name: Symbol, Length: 503, dtype: object

In [5]:
def scrape_yahoo_finance(stock: str) -> pd.DataFrame:
    url = f"https://finance.yahoo.com/quote/{stock}/history/?period1=1712523837&period2=1720386234"
    r = requests.get(
        url,
        headers={
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        },
    )
    df = pd.read_html(r.text)[0]
    df.rename(columns={"Close Close price adjusted for splits.": "Close"}, inplace=True)
    return df[["Date", "Open", "High", "Low", "Close", "Volume"]]

In [40]:
df = scrape_yahoo_finance("TSLA")
df

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,"Jul 5, 2024",249.81,252.37,242.46,251.52,154170000
1,"Jul 3, 2024",234.56,248.35,234.25,246.39,166561500
2,"Jul 2, 2024",218.89,231.30,218.06,231.26,205047900
3,"Jul 1, 2024",201.02,213.23,200.85,209.86,135691400
4,"Jun 28, 2024",199.55,203.20,195.26,197.88,95438100
...,...,...,...,...,...,...
57,"Apr 12, 2024",172.34,173.81,170.36,171.05,64506600
58,"Apr 11, 2024",172.55,175.88,168.51,174.60,94516000
59,"Apr 10, 2024",173.04,174.93,170.01,171.76,84532400
60,"Apr 9, 2024",172.91,179.22,171.92,176.88,103232700


In [35]:
def format_table(df_in: pd.DataFrame, stock: str = "") -> pd.DataFrame:
    df = df_in[["Date", "Close"]]
    df["Date"] = pd.to_datetime(df["Date"])
    df.set_index("Date", inplace=True)
    df.rename(columns={"Close": stock}, inplace=True)
    return df

In [39]:
format_table(df, stock="TSLA")

Unnamed: 0_level_0,TSLA
Date,Unnamed: 1_level_1
2024-07-05,251.52
2024-07-03,246.39
2024-07-02,231.26
2024-07-01,209.86
2024-06-28,197.88
...,...
2024-04-12,171.05
2024-04-11,174.60
2024-04-10,171.76
2024-04-09,176.88


In [13]:
some_symbols = symbols.sample(10, random_state=42)
some_symbols

268     JNJ
73      BMY
289    LDOS
155     DOV
104     CVX
280     KKR
392     DGX
124     STZ
68       BX
244    HBAN
Name: Symbol, dtype: object

In [14]:
for stock in some_symbols:
    print(f"Processing stock {stock}")
    try:
        df = scrape_yahoo_finance(stock=stock)
    except Exception as e:
        print(f"Error retrieving data for {stock} - error was {str(e)}")
        continue

Processing stock JNJ
Processing stock BMY
Processing stock LDOS
Processing stock DOV
Processing stock CVX
Processing stock KKR
Processing stock DGX
Processing stock STZ
Processing stock BX
Processing stock HBAN
