In [4]:
import yfinance as yf
import pandas as pd

In [71]:
def get_stock_data(tickers, start, end):
    data = {}

    for ticker in tickers:
        stock = yf.Ticker(ticker)
        df = stock.history(start=start, end=end)

        # Get the income statement (Net Income)
        income_statement = stock.quarterly_financials.T  # Transpose to get data as rows
        if 'Net Income' in income_statement.columns:
            net_income = income_statement['Net Income']
            net_income = net_income.reset_index()  # Make sure the index is datetime
            net_income.columns = ['Date', 'Net Income']
        else:
            print(f"No Net Income data for {ticker}")
            continue  # Skip if no income data is available

        # Convert Date columns to timezone-naive to match the stock data
        net_income['Date'] = pd.to_datetime(net_income['Date']).dt.tz_localize(None)
        df = df.reset_index()
        df['Date'] = pd.to_datetime(df['Date']).dt.tz_localize(None)

        # Merge EPS with stock prices (assuming you calculate EPS as shown earlier)
        df = pd.merge_asof(df, net_income[['Date', 'Net Income']], on='Date', direction='backward')

        # Calculate P/E Ratio
        df['P/E'] = df['Close'] / df['Net Income']  # You may need to adjust this if using EPS

        # Calculate P/S Ratio (using total revenue)
        if 'Total Revenue' in income_statement.columns:
            total_revenue = income_statement['Total Revenue']
            total_revenue = total_revenue.reset_index()
            total_revenue.columns = ['Date', 'Total Revenue']
            total_revenue['Date'] = pd.to_datetime(total_revenue['Date']).dt.tz_localize(None)
            df = pd.merge_asof(df, total_revenue[['Date', 'Total Revenue']], on='Date', direction='backward')
            df['P/S'] = df['Close'] / df['Total Revenue']
        else:
            df['P/S'] = None  # Mark as None if not available

        data[ticker] = df

    return data

In [72]:
dotcom_stocks = ['AMZN', 'CSCO', 'INTC']
ai_stocks = ['GOOG', 'MSFT', 'NVDA']

dotcom_start = '1996-01-01'
dotcom_end = '2001-01-01'

ai_start = '2020-01-01'
ai_end = '2024-10-10'

In [73]:
dotcom_data = get_stock_data(dotcom_stocks, dotcom_start, dotcom_end)

ValueError: right keys must be sorted

In [58]:
ai_data = get_stock_data(ai_stocks, ai_start, ai_end)

dict_keys(['address1', 'city', 'state', 'zip', 'country', 'phone', 'website', 'industry', 'industryKey', 'industryDisp', 'sector', 'sectorKey', 'sectorDisp', 'longBusinessSummary', 'fullTimeEmployees', 'companyOfficers', 'compensationAsOfEpochDate', 'maxAge', 'priceHint', 'previousClose', 'open', 'dayLow', 'dayHigh', 'regularMarketPreviousClose', 'regularMarketOpen', 'regularMarketDayLow', 'regularMarketDayHigh', 'dividendRate', 'dividendYield', 'exDividendDate', 'payoutRatio', 'beta', 'trailingPE', 'forwardPE', 'volume', 'regularMarketVolume', 'averageVolume', 'averageVolume10days', 'averageDailyVolume10Day', 'bid', 'ask', 'bidSize', 'askSize', 'marketCap', 'fiftyTwoWeekLow', 'fiftyTwoWeekHigh', 'priceToSalesTrailing12Months', 'fiftyDayAverage', 'twoHundredDayAverage', 'trailingAnnualDividendRate', 'trailingAnnualDividendYield', 'currency', 'enterpriseValue', 'profitMargins', 'floatShares', 'sharesOutstanding', 'sharesShort', 'sharesShortPriorMonth', 'sharesShortPreviousMonthDate', 'd

In [59]:
df_ai = pd.concat(ai_data.values(), keys=ai_data.keys(), names=['Ticker', 'Date'])

In [60]:
df_dotcom = pd.concat(dotcom_data.values(), keys=dotcom_data.keys(), names=['Ticker', 'Date'])

In [61]:
df_ai.head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,P/E,P/S
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
GOOG,2020-01-02 00:00:00-05:00,66.913174,68.239415,66.913174,68.201012,28132000,0.0,0.0,8.71,6.152636
GOOG,2020-01-03 00:00:00-05:00,67.227894,68.456878,67.112379,67.866325,23728000,0.0,0.0,8.71,6.152636
GOOG,2020-01-06 00:00:00-05:00,67.334639,69.65394,67.334639,69.539726,34646000,0.0,0.0,8.71,6.152636
GOOG,2020-01-07 00:00:00-05:00,69.72577,69.977646,69.34869,69.49633,30054000,0.0,0.0,8.71,6.152636
GOOG,2020-01-08 00:00:00-05:00,69.433483,70.406101,69.371639,70.043991,30560000,0.0,0.0,8.71,6.152636
GOOG,2020-01-09 00:00:00-05:00,70.854493,71.191662,70.340748,70.817581,30018000,0.0,0.0,8.71,6.152636
GOOG,2020-01-10 00:00:00-05:00,71.203139,71.57069,70.743772,71.311378,36414000,0.0,0.0,8.71,6.152636
GOOG,2020-01-13 00:00:00-05:00,71.630591,71.849551,71.126328,71.78521,33046000,0.0,0.0,8.71,6.152636
GOOG,2020-01-14 00:00:00-05:00,71.774235,71.913389,71.24354,71.368729,31178000,0.0,0.0,8.71,6.152636
GOOG,2020-01-15 00:00:00-05:00,71.335307,71.893187,71.335307,71.783707,25654000,0.0,0.0,8.71,6.152636


In [36]:
df_dotcom.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,P/E,P/S
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AMZN,1997-05-15 00:00:00-04:00,0.121875,0.125,0.096354,0.097917,1443120000,0.0,0.0,45.18424,3.280148
AMZN,1997-05-16 00:00:00-04:00,0.098438,0.098958,0.085417,0.086458,294000000,0.0,0.0,45.18424,3.280148
AMZN,1997-05-19 00:00:00-04:00,0.088021,0.088542,0.08125,0.085417,122136000,0.0,0.0,45.18424,3.280148
AMZN,1997-05-20 00:00:00-04:00,0.086458,0.0875,0.081771,0.081771,109344000,0.0,0.0,45.18424,3.280148
AMZN,1997-05-21 00:00:00-04:00,0.081771,0.082292,0.06875,0.071354,377064000,0.0,0.0,45.18424,3.280148


In [37]:
# save to csv 
df_ai.to_csv('ai_stocks.csv')
df_dotcom.to_csv('dotcom_stocks.csv')