In [None]:
pip install yfinance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import ast
from matplotlib import pyplot as plt
from tqdm import tqdm
from pandas.core.indexes.datetimes import date
from datetime import timedelta

import requests
from concurrent.futures import ThreadPoolExecutor
import concurrent.futures

import warnings
import numba
from numba import jit

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

We cab get the **full** symbol universe from this Git repo: https://github.com/mlapenna7/yh_symbol_universe

It will have to be downloaded and uploaded to this colab notebook whenever the runtime resets.

In [None]:
# Reading the json as a dict
with open('yhallsym.txt', 'r') as file:
  all_tickers = file.read().rstrip()


In [None]:
all_tickers_dict = ast.literal_eval(data)

But generally speaking, we should ignore the above and focus on just the S&P500 since it's an easier calculation and represents safer investments.

In [None]:
start_dt = pd.to_datetime('2000-01-01')
end_dt = pd.to_datetime('2022-08-01')

In [None]:
# This is a list of all tickers on the S&P500 circa... whenever.
spx_tickers_df = pd.read_csv('https://raw.githubusercontent.com/datasets/s-and-p-500-companies/master/data/constituents.csv', on_bad_lines='warn')

The list 'target-equities.csv' originated with a simple stock screen via ThinkorSwim to look for equities that were exposed to the Building Materials sector.

In [None]:
target_tickers_df = pd.read_csv('target-equities.csv')

The mortgage rate data comes from the weekly rate information published alongside the MBA Purchase Index and retrieved from Bloomberg.

In [None]:
mortgage_rates_df = pd.read_csv('mortgage-rates.csv')
mortgage_rates_df['Date'] = pd.to_datetime(mortgage_rates_df['Unnamed: 0'])
mortgage_rates_df = pd.DataFrame(mortgage_rates_df.set_index('Date', drop=True)['MortgageRates'])
mortgage_rates_df = mortgage_rates_df[mortgage_rates_df.index >= start_dt]

The list of Fed Funds rate changes in 'fed-actions.csv' was drawn from: https://en.wikipedia.org/wiki/History_of_Federal_Open_Market_Committee_actions

In [None]:
# Load The Fed actions data, set the dates as the index and narrow it to the timeframe we're interested in.
fed_actions_df = pd.read_csv('fed-actions.csv')
fed_actions_df['fed-date'] = pd.to_datetime(fed_actions_df['fed-date'])
fed_actions_df = fed_actions_df.set_index('fed-date', drop=True)
fed_actions_df = fed_actions_df[(fed_actions_df.index >= start_dt) & (fed_actions_df.index < end_dt)]

In [None]:
# Define your targets as the full S&P500 plus any additional you've identified.
target_tickers_lst = spx_tickers_df['Symbol'].to_list() + target_tickers_df['Symbol'].to_list() + ['^GSPC']
target_tickers_lst = list(set(target_tickers_lst))

In [None]:
# Define your targets as only those potential equities you've identified.
target_tickers_lst = list(set(target_tickers_df['Symbol'].to_list() + ['^GSPC']))

In [None]:
# Download the data on stock prices for our selected equities from Y Finance
targets_df = yf.download(tickers=target_tickers_lst, start=start_dt, end=end_dt, interval='1mo')

# Narrow the data downloaded to only the close prices.
targets_df = targets_df[[(col[0], col[1]) for col in list(targets_df) if col[0] == 'Close']]
targets_df.columns = [col[1] for col in list(targets_df)]

In [None]:
# Download the stock prices for our selected equities for dates aronud Fed fund actions.
# The requires downloading a seperate dataframe for each Fed action and then concatenating them all.
dfs_lst = []
for dt in tqdm(fed_actions_df.index[:2]):
  range_start_dt = dt - timedelta(days=7)
  range_end_dt = dt + timedelta(days=7)

  df = yf.download(tickers=target_tickers_lst, start=range_start_dt, end=range_end_dt, interval='1d', threads=True)

  dfs_lst.append(df)

fed_reaction_df = pd.concat(dfs_lst, axis=0)

In [None]:
def get_prices_async(action_dt):
  range_start_dt = action_dt - timedelta(days=7)
  range_end_dt = action_dt + timedelta(days=7)
  
  df = yf.download(tickers=target_tickers_lst, start=range_start_dt, end=range_end_dt, interval='1d', threads=False)
  
  return df


In [None]:
futures = []
with ThreadPoolExecutor(max_workers=6) as executor:
  for _dt in fed_actions_df.index[:5]:
    future = executor.submit(get_prices_async, _dt)
    futures.append(future)

[*********************100%***********************]  685 of 685 completed

28 Failed downloads:
- LEN/B: No timezone found, symbol may be delisted
- AAN: Data doesn't exist for startDate = 1603944000, endDate = 1605157200
- ANAC: Data doesn't exist for startDate = 1603944000, endDate = 1605157200
- OGN: Data doesn't exist for startDate = 1603944000, endDate = 1605157200
- BF.B: No data found for this date range, symbol may be delisted
- AERC: Data doesn't exist for startDate = 1603944000, endDate = 1605157200
- MBINN: Data doesn't exist for startDate = 1603944000, endDate = 1605157200
- CEG: Data doesn't exist for startDate = 1603944000, endDate = 1605157200
- BITI: Data doesn't exist for startDate = 1646802000, endDate = 1648008000
- ARHS: Data doesn't exist for startDate = 1603944000, endDate = 1605157200
- DFH: Data doesn't exist for startDate = 1603944000, endDate = 1605157200
- WEBR: Data doesn't exist for startDate = 1603944000, endDate = 1605157200
- HMA: Data doesn't exist for s

In [None]:
fed_reactions_df = pd.DataFrame()
for future in concurrent.futures.as_completed(futures):
  fed_reactions_df = pd.concat([fed_reactions_df, future.result()])

In [None]:
((636/4)*107)/60/60

4.725833333333333

In [None]:
fed_reactions_df.to_csv('fed-reaction.csv')

In [None]:
# This function calculates the cumulative growth rate for each equity relative to the S&P500
def calc_growth(df: pd.DataFrame) -> pd.DataFrame:
  growth_df = pd.DataFrame(index=df.index)

  for ticker in tqdm((['^GSPC'] + list(df)[:-1])):
    start_flt = None
    growth_flt = None
    ticker_indx = df[df[ticker].notnull()].index
    if len(ticker_indx) > 0:
      start_flt = df.loc[ticker_indx[0], ticker]

      for month in ticker_indx:
        if ticker == '^GSPC':
          growth_flt = (df.loc[month, ticker] / start_flt) - 1
          growth_df.loc[month, ticker] = growth_flt
        else:
          growth_flt = ((df.loc[month, ticker] / start_flt) - 1) - growth_df.loc[month, '^GSPC']
          growth_df.loc[month, ticker] = growth_flt

  return growth_df

In [None]:
growth_df = calc_growth(targets_df)

In [None]:
# This loop adds the growth rate in mortgage rates to their dataframe.
for month in mortgage_rates_df.index:
  growth_flt = (mortgage_rates_df.loc[month, 'MortgageRates'] / mortgage_rates_df.loc[start_dt, 'MortgageRates']) - 1
  mortgage_rates_df.loc[month, 'Growth'] = growth_flt

In [None]:
# This loop finds only those equities wih a history that goes back at least through our start date and calculates the 
# correlation between its growth relative to the S&P500 and the change in mortgage rates.
corr_df = pd.DataFrame(index=list(growth_df), columns=['mortgage-rate-corr'])
for ticker in tqdm(list(growth_df)):
  corr_flt = None
  ticker_df = pd.DataFrame(growth_df[growth_df[ticker].notnull()][ticker])
  if ticker_df.index[0] <= start_dt:
    start_dt = ticker_df.index[0]
    x_growth_arr = np.array(ticker_df[ticker])

    y_growth_arr = np.array(mortgage_rates_df[mortgage_rates_df.index >= start_dt]['Growth'])

    corr_flt = np.corrcoef(x_growth_arr, y_growth_arr)[0][1]

    corr_df.loc[ticker, 'mortgage-rate-corr'] = corr_flt

In [None]:
# Narrow the correlation dataframe to only the best candidates for graphing
graph_df = corr_df.sort_values('mortgage-rate-corr')[:10]

In [None]:
# Get additional company issue on the most likely candidates
for tkr in tqdm(graph_df.index):
  try:
    tkr_info = yf.Ticker(tkr)
    graph_df.loc[tkr, 'name'] = tkr_info.info.get('longName')
    graph_df.loc[tkr, 'sector'] = tkr_info.info.get('sector')
    graph_df.loc[tkr, 'ebitda-margins'] = tkr_info.info.get('ebitdaMargins')
    graph_df.loc[tkr, 'long-summary'] = tkr_info.info.get('longBusinessSummary')
  except:
    print("No longer listed")


In [None]:
# Create a function to quickly graph the values on a two y-axis line plot
def graph_series(tkr, x_srs, y1_srs, y2_srs):
  # create figure and axis objects with subplots()
  fig, ax = plt.subplots()
  
  # Plot the first y series
  ax.plot(x_srs, y1_srs, color='red')
  ax.set_xlabel("Date", fontsize = 14)
  ax.set_ylabel(corr_df.loc[tkr].name, color="red", fontsize=14)
  
  # Twin object for two different y-axis on the sample plot
  ax2=ax.twinx()
  # Plot the second y series
  ax2.plot(x_srs, y2_srs, color="blue")
  ax2.set_ylabel("Mortgage Rates",color="blue",fontsize=14)
  plt.show()

In [None]:
for tkr in graph_df.index:
  graph_series(tkr, mortgage_rates_df.index, targets_df[tkr], mortgage_rates_df['MortgageRates'])

Additional exploration.
1. To overcome the noise it might be best to look at price swings only on the day of and the day following rate change announcements. Of course, mortgage rates are tied more to treasuries than the Fed Funds but... https://en.wikipedia.org/wiki/History_of_Federal_Open_Market_Committee_actions

In [None]:
graph_df = corr_df.sort_values('mortgage-rate-corr')[corr_df.sort_values('mortgage-rate-corr')['mortgage-rate-corr'] < -0.7]

In [None]:
graph_df['sector'].unique()

In [None]:
graph_df = graph_df.query("sector in ('Real Estate', 'Utilities', 'Basic Materials', 'Financial Services')")
display(graph_df)