In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
pip install -r requirements.txt


Note: you may need to restart the kernel to use updated packages.


Explore stock market dataset from Yahoo Finance

In [5]:
import yfinance as yf
import pandas as pd


## Load list of IBD Growth Stocks

In [6]:

ibd50 = pd.read_csv('data/IBD50.csv')
ibd50


Unnamed: 0,Symbol
0,GCT
1,NVDA
2,CRWD
3,RCL
4,AMPH
5,ELF
6,CCL
7,CELH
8,ASND
9,ZS


In [7]:
ibd250 = pd.read_csv('data/IBD250.csv')
ibd250

Unnamed: 0,Symbol
0,INTU
1,ALKT
2,FICO
3,GWRE
4,COIN
...,...
295,CAMT
296,STLA
297,RACE
298,RYAAY


In [8]:
# merge IBD 50 and 250 symbol sets
ibd50_set = set(ibd50['Symbol'])
ibd250_set = set(ibd250['Symbol'])
ibd_growth_set = ibd50_set.union(ibd250_set)

In [9]:
len(ibd_growth_set)

309

In [10]:
ibdgrowth_str = ' '.join(ibd_growth_set)
ibdgrowth_str



'CB SKX CRS COST EDU FLEX CAMT ZS DECK DUOL FERG TPX MAS WTFC MLM ALKS COF TXRH BLK DDOG TGH BOH DASH IMCR WDAY ETN CADE EWBC NTNX IT ARVN LULU ROL INFA GWRE CYBR PINS CRBG MEDP GLOB RYZB TMUS MCO BKNG NBIX ALTR VRTX KNF SSD DHI NMIH PSN BK SHOP IMGN ALLE IBP CHKP VRNS ENSG ZTS FICO FLT PATH URBN GOOG XYL VRT AGO INTU PDD TRIP ROST MHO FBIN ZG SKWD STNE CRWD TBBK JBI BURL IOT AVGO EXPE PRI GTLB TWLO MARA BLDR MMYT AXP FLR PCOR MFC HUBS RY ZION TWST CCL RELX WAB PH DT DXCM AXON DKS OC FITB PBRA YELP ALLY PLAB BSX CSWC GDDY GEN WING AMD AIG MNST AMZN DOCN ITGR XP NET NFLX AWI CELH SMCI AZTA MAR CMG GOOGL FIX PANW AFYA CLS ASND CRH WBS UBS ITT WAL AMK TREX NTRA GCT SNOW HOLI IDCC CNM MBWM PCAR BMI FI ISRG AEO TOL AZEK CDRE CRM MAIN LRN RL FRPT PWR ASO NSIT MSFT SHW META COLB AER PNC BAC BLBD STRL BLKB CARR HWKN MATX FNF GFF OWL QLYS TIMB RAMP CAT TDW APO RYAAY GEL DV SFM SYF OFG NVDA SQ IHG FOUR GBDC ASML MBIN LHX GE AAON AOS KKR LII TSLX QCOM ANF GPN DOCU MNDY ARM FSS GPS ERIE CAH AMP JP

In [11]:
stocks_ticker_set = ibd_growth_set

## Prepare broad market indicies

In [None]:
# Capture S&P500, NASDAQ100 and Russell 200 indecies and their equal weighted counter parts
# As well as VIX volatility index, DYX US Dollar index and TNX US 10 Year Treasuries Rate Index
broad_market_indicies = '^SPX ^SPXEW ^NDX ^NDXE ^RUT ^R2ESC ^VIX DX-Y.NYB ^TNX'

In [None]:
broad_market = yf.download(broad_market_indicies, period='max', group_by='tickers') 
broad_market

In [None]:
broad_market.to_csv('data/broad_market.csv.bz2', index='Date')

## Prepare Sector Indicies

In [None]:
sector_indicies = 'XLE ^SP500-15 ^SP500-20 ^SP500-25 ^SP500-30 ^SP500-35 ^SP500-40 ^SP500-45 ^SP500-50 ^SP500-55 ^SP500-60'

In [None]:
sectors = yf.download(sector_indicies, period='max') 
sectors

In [None]:
sectors.to_csv('data/sectors.csv.bz2')

## Prepare stocks price data

In [None]:
ibdgrowth_data = yf.download(ibdgrowth_str, period='max', group_by='tickers') 
ibdgrowth_data

In [None]:
ibdgrowth_data.columns.levels

In [None]:
ibdgrowth_data.to_csv('data/ibdgrowth_hist.csv.bz2', index='Date')

In [None]:
ibdgrowth_loaded = pd.read_csv('data/ibdgrowth_hist.csv.bz2', header=[0, 1], index_col=0)
ibdgrowth_loaded

In [None]:
for ticker in ibdgrowth_loaded.columns.levels[0][:2]:
    print(f'ticker: {ticker}')
    ticker_data = ibdgrowth_loaded[ticker]
    print(f'ticker historic data: {ticker_data}')
    # remove missing values
    ticker_data = ticker_data.dropna()
    print(f'ticker historic data without missing data: {ticker_data}')


## Prepare historical stock sales and earnings data

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

FMP_API_KEY=os.getenv("FMP_API_KEY")

print(f'FMP_API_KEY={FMP_API_KEY!= None}')

FMP_API_KEY=True


In [2]:
import fmpsdk

# Company Valuation Methods
symbol: str = "AAPL"
symbols: ["AAPL", "CSCO", "QQQQ"]
exchange: str = "NYSE"
exchanges: ["NYSE", "NASDAQ"]
query: str = "AA"
limit: int = 3
period: str = "quarter"
download: bool = True
market_cap_more_than: int = 1000000000
beta_more_than: int = 1
volume_more_than: int = 10000
sector: str = "Technology"
dividend_more_than: int = 0
industry: str = "Software"
filing_type: str = "10-K"
print(f"Company Profile: {fmpsdk.company_profile(apikey=FMP_API_KEY, symbol=symbol)=}")


Company Profile: fmpsdk.company_profile(apikey=FMP_API_KEY, symbol=symbol)=[{'symbol': 'AAPL', 'price': 182.68, 'beta': 1.29, 'volAvg': 54127501, 'mktCap': 2824579892000, 'lastDiv': 0.96, 'range': '133.77-199.62', 'changes': -0.95, 'companyName': 'Apple Inc.', 'currency': 'USD', 'cik': '0000320193', 'isin': 'US0378331005', 'cusip': '037833100', 'exchange': 'NASDAQ Global Select', 'exchangeShortName': 'NASDAQ', 'industry': 'Consumer Electronics', 'website': 'https://www.apple.com', 'description': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to disc

In [None]:
# there should be no duplicate symbols in this list
assert not ibdgrowth_loaded.columns.levels[0].duplicated().any() 

In [26]:
earnings_all_df = pd.DataFrame()
for ticker in stocks_ticker_set: # ['AAON']: # 
    earnings = fmpsdk.historical_earning_calendar(apikey=FMP_API_KEY, symbol=ticker, limit=-1)
    if earnings is not None and len(earnings) > 0:
        edf = pd.DataFrame(earnings)
        edf['date'] = pd.to_datetime(edf['date'])
        edf = edf.set_index(['symbol', 'date'])
        print(f"Earnings calendar for {ticker}: \n{edf}")
        # edf = edf.pivot(columns='symbol')
        # edf.swaplevel(i=0,j=1, axis=0)
        # edf.drop(columns=['symbol'])
        earnings_all_df = pd.concat([earnings_all_df, edf])
        print(f"Earnings calendar after pivot for {ticker}: \n{edf}")
        n_earnings = len(earnings)
        print(f"Total earnings reports for {ticker}: {n_earnings}")
#    earliest_earn = earnings[-1] if len(earnings > 0 else 'None')
#    print(f"Earliest earnings report for {ticker}: {earliest_earn}")


Earnings calendar for AAON: 
                       eps  epsEstimated time      revenue  revenueEstimated  \
symbol date                                                                    
AAON   2024-11-04      NaN           NaN  amc          NaN               NaN   
       2024-08-01      NaN           NaN  amc          NaN               NaN   
       2024-05-02      NaN           NaN  amc          NaN               NaN   
       2024-02-26      NaN          0.53  bmo          NaN       294360000.0   
       2023-11-06  0.64000          0.55  amc  311970000.0       294360000.0   
...                    ...           ...  ...          ...               ...   
       1991-03-31 -0.00159           NaN  bmo    5700000.0               NaN   
       1990-12-31  0.00352           NaN  bmo   36100000.0               NaN   
       1990-09-30  0.00703           NaN  bmo   10100000.0               NaN   
       1990-06-30  0.00703           NaN  bmo    9300000.0               NaN   
       1990

In [None]:
earnings

In [None]:
aaon = earnings_all_df.loc[['AAON']]

In [None]:
aaon

In [None]:
len(earnings_all_df)

In [None]:
earnings_all_df


In [None]:
earnings_file = 'data/earnings_calendar.csv.bz2'

In [None]:
earnings_all_df.to_csv(earnings_file)

### Read back data and verify it

In [None]:
import pandas as pd

earnings_loaded_df = pd.read_csv('data/earnings_calendar.csv.bz2', index_col=['symbol', 'date'])
print(earnings_loaded_df)

## Prepare historical dividends
  * This is secondary information since growth stocks usually do not have dividends and rarely have splits
  * Additionally the dividends and split information is partially reflected in Adj Close of price history data

In [49]:
def fetch_dividends_history():
    divs_hist_all_df = pd.DataFrame()
    for ticker in stocks_ticker_set: # ['AAON']:
        divs_hist = fmpsdk.historical_stock_dividend(apikey=FMP_API_KEY, symbol=ticker)
        # print(f"Loaded historical dividends for {ticker}: \n{divs_hist}")
        print(f"Loaded {len(divs_hist['historical'])} historical dividends for {ticker}")
        if divs_hist['historical'] is not None and len(divs_hist['historical']) > 0:
            dh_df_tmp = pd.DataFrame.from_dict(data=divs_hist['historical'])
            # print(f"Historical dividends for {ticker} dataframe: \n{dh_df_tmp.head()}")
            dh_df_tmp['symbol'] = ticker
            dh_df = dh_df_tmp
            # print(f"Historical dividends for {ticker} dataframe: \n{dh_df_tmp.head()}")
            # print(f"Historical dividends for {ticker} full dataframe: \n{dh_df.head()}")
            dh_df['date'] = pd.to_datetime(dh_df['date'])
            dh_df = dh_df.set_index(['symbol', 'date'])
            n_divs_hist = len(dh_df)
            print(f"Total dividends history reports for {ticker}: {n_divs_hist}")
            # print(f"Historical dividends for {ticker} full dataframe: \n{dh_df}")
            divs_hist_all_df = pd.concat([divs_hist_all_df, dh_df])
    return divs_hist_all_df


Loaded 124 historical dividends for CB
Total dividends history reports for CB: 124
Loaded 0 historical dividends for SKX
Loaded 146 historical dividends for CRS
Total dividends history reports for CRS: 146
Loaded 84 historical dividends for COST
Total dividends history reports for COST: 84
Loaded 4 historical dividends for EDU
Total dividends history reports for EDU: 4
Loaded 0 historical dividends for FLEX
Loaded 3 historical dividends for CAMT
Total dividends history reports for CAMT: 3
Loaded 0 historical dividends for ZS
Loaded 0 historical dividends for DECK
Loaded 0 historical dividends for DUOL
Loaded 23 historical dividends for FERG
Total dividends history reports for FERG: 23
Loaded 19 historical dividends for TPX
Total dividends history reports for TPX: 19
Loaded 162 historical dividends for MAS
Total dividends history reports for MAS: 162
Loaded 68 historical dividends for WTFC
Total dividends history reports for WTFC: 68
Loaded 118 historical dividends for MLM
Total dividen

In [51]:
# divs_hist_file = 'data/dividends_history.csv.bz2'

In [52]:
# divs_hist_all_df.to_csv(divs_hist_file)

### Read back data and verify it

In [None]:
import pandas as pd

earnings_loaded_df = pd.read_csv('data/earnings_calendar.csv.bz2', index_col=['symbol', 'date'])
print(earnings_loaded_df)

## Prepare key metrics data for company fundamentals

In [None]:

keymetrics_all_df = pd.DataFrame()
for ticker in stocks_ticker_set:
    kms = fmpsdk.key_metrics(apikey=FMP_API_KEY, symbol=ticker, period='quarter', limit=-1)
    if kms is not None and len(kms) > 0:
        kms_df = pd.DataFrame(kms)
        kms_df['date'] = pd.to_datetime(kms_df['date'])
        kms_df = kms_df.set_index(['symbol', 'date'])
        print(f"Key metrics for {ticker} sample: \n{kms_df.columns}")
        keymetrics_all_df = pd.concat([keymetrics_all_df, kms_df])
        print(f"Key metrics concatenated {ticker}: \n{keymetrics_all_df.columns}")
        n_kms = len(kms_df)
        print(f"Total key metrics reports for {ticker}: {n_kms}")
    else:
        print(f"No {ticker} key metrics reports: kms={kms}")

Experiment with other stock data

In [None]:
keymetrics_all_df

In [None]:
kms_file = 'data/keymetrics_history.csv.bz2'
keymetrics_all_df.to_csv(kms_file)

## Prepare forward looking analyst estimates to be used as future covariates

In [None]:
DEFAULT_LIMIT=-1
import typing
from fmpsdk.url_methods import __return_json_v3, __validate_period


def analyst_estimates(
    apikey: str, 
    symbol: str, 
    period: str = "annual",
    limit: int = DEFAULT_LIMIT
) -> typing.Optional[typing.List[typing.Dict]]:
    """
    Query FMP /analyst-estimates/ API.

    :param apikey: Your API key.
    :param symbol: Company ticker.
    :param period: 'annual' or 'quarter'
    :param limit: Number of rows to return.
    :return: A list of dictionaries.
    """
    path = f"/analyst-estimates/{symbol}"
    query_vars = {
        "apikey": apikey,
        "symbol": symbol,
        "period": __validate_period(value=period),
        "limit": limit,
    }
    return __return_json_v3(path=path, query_vars=query_vars)



In [None]:

def fetch_estimates(period=None):
    assert period in ['quarter', 'annual']
    estimates_all_df = pd.DataFrame()
    for ticker in stocks_ticker_set: # ['ALTR']: 
        est = analyst_estimates(apikey=FMP_API_KEY, symbol=ticker, period=period, limit=-1)
        # print('est:', est)
        if est is not None and len(est) > 0:
            est_df = pd.DataFrame(est)
            est_df['date'] = pd.to_datetime(est_df['date'])
            est_df = est_df.set_index(['symbol', 'date'])
            # print(f"Analyst estimates for {ticker} sample: \n{est_df.columns}")
            estimates_all_df = pd.concat([estimates_all_df, est_df])
            # print(f"Key metrics concatenated {ticker}: \n{estimates_all_df.columns}")
            n_est = len(est_df)
            print(f"{n_est} total {ticker} {period} analyst estimates reports")
        else:
            print(f"No {ticker} {period} analyst estimates reports: est={est}")

    return estimates_all_df



In [None]:
# 'TW' in stocks_ticker_set

In [None]:
for p in ['annual', 'quarter']:
    estimates_all_df = fetch_estimates(p)
    est_file_name= f'data/analyst_estimates_{p}.csv.bz2'    
    estimates_all_df.to_csv(est_file_name)
    print(f'{p} estimates:', estimates_all_df)
    