In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import datetime

import utils

In [8]:
STARTING_DAY = datetime.datetime(1989, 1, 1)
FIRST_ACTION_DAY = datetime.datetime(1994, 1, 3)

# Load data

In [3]:
selected_stocks = pd.read_excel('data/selected_stocks.xlsx')

In [4]:
display(selected_stocks)

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Date added,Market Capitalization
0,AAPL,Apple Inc.,Information Technology,"Technology Hardware, Storage & Peripherals",1982-11-30,3453699000000.0
1,LLY,Eli Lilly and Company,Health Care,Pharmaceuticals,1970-12-31,820115900000.0
2,JPM,JPMorgan Chase,Financials,Diversified Banks,1975-06-30,588692400000.0
3,WMT,Walmart,Consumer Staples,Consumer Staples Merchandise Retail,1982-08-31,554159700000.0
4,XOM,ExxonMobil,Energy,Integrated Oil & Gas,1957-03-04,510507900000.0
5,HD,Home Depot (The),Consumer Discretionary,Home Improvement Retail,1988-03-31,331744400000.0
6,DIS,Walt Disney Company (The),Communication Services,Movies & Entertainment,1976-06-30,178105700000.0
7,GE,GE Aerospace,Industrials,Aerospace & Defense,1957-03-04,175082900000.0
8,NEE,NextEra Energy,Utilities,Multi-Utilities,1976-06-30,147946700000.0
9,SHW,Sherwin-Williams,Materials,Specialty Chemicals,1964-06-30,75060900000.0


In [5]:
# Download prices
symbols = selected_stocks['Symbol'].tolist()

In [6]:
prices = yf.download(symbols)

[*********************100%%**********************]  11 of 11 completed


In [7]:
# Exclude data before STARTING DAY
prices = prices.loc[STARTING_DAY:]

# Compute returns

In [9]:
# Separate variables
adj_close_df = prices['Adj Close']
close_df = prices['Close']
open_df = prices['Open']

adj_close_returns = utils.compute_returns(adj_close_df)
close_returns = utils.compute_returns(close_df)
open_returns = utils.compute_returns(open_df)

In [12]:
action_days = [pd.Timestamp(FIRST_ACTION_DAY)]
month = FIRST_ACTION_DAY.month
for date in adj_close_returns.loc[FIRST_ACTION_DAY:].index[1:]:
    if date.month != month:
        action_days.append(date)
        month = date.month

In [14]:
# Utility function to compute covariance matrix
def get_period_for_variance_computation_from_action_date(date: datetime.datetime | pd.Timestamp,
                                                         lookback: int = 5) -> datetime.datetime:
    year, month, day = date.year, date.month, date.day
    return datetime.datetime(year - lookback, month, day)

# Compute weights dataframe

## 1) Equal weight

In [15]:
number_of_action_days = len(action_days)
n_stocks = len(symbols)
matrix = (1/n_stocks) * np.ones((number_of_action_days, n_stocks), dtype=float)
ew_weights_df = pd.DataFrame(matrix, index=action_days, columns=adj_close_returns.columns)

In [16]:
display(ew_weights_df.head())

Ticker,AAPL,DIS,GE,HD,JPM,LLY,NEE,SHW,WMT,WY,XOM
1994-01-03,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909
1994-02-01,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909
1994-03-01,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909
1994-04-04,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909
1994-05-02,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909


In [17]:
# Save
ew_weights_df.to_csv('data/ew_weights.csv')

## 2) Min variance

In [18]:
# Create empty DataFrame
matrix = np.empty((number_of_action_days, n_stocks), dtype=float)
min_var_weights_df = pd.DataFrame(matrix, index=action_days, columns=adj_close_returns.columns)

for action_day in action_days:
    # 1 select correct observation period
    start = get_period_for_variance_computation_from_action_date(action_day)
    end = action_day - datetime.timedelta(days=1)
    ret: pd.DataFrame = adj_close_returns.loc[start: end]
    # 2 Compute covariance matrix
    covariance_matrix = ret.cov()
    # 3 Compute min-variance weights
    weights = utils.compute_min_variance_weights(covariance_matrix)
    min_var_weights_df.loc[action_day] = weights

In [19]:
display(min_var_weights_df.head())

Ticker,AAPL,DIS,GE,HD,JPM,LLY,NEE,SHW,WMT,WY,XOM
1994-01-03,0.012817,0.051956,0.036285,7.471360999999999e-19,0.0,0.055347,0.545524,0.08381,1.360571e-18,0.001242,0.213021
1994-02-01,0.014049,0.051255,0.037083,5.384721e-19,0.0,0.055578,0.532077,0.085135,1.21842e-18,0.003972,0.220851
1994-03-01,0.014825,0.053463,0.035988,5.4997299999999995e-19,0.0,0.057496,0.522139,0.086661,1.3004610000000001e-18,0.002571,0.226858
1994-04-04,0.013211,0.052456,0.03335,4.604424999999999e-19,0.0,0.060476,0.51752,0.087363,1.170497e-18,0.003527,0.232098
1994-05-02,0.012852,0.052827,0.042252,3.741979e-19,0.0,0.061809,0.50483,0.08732,1.022753e-18,0.001886,0.236224


In [20]:
# Save
min_var_weights_df.to_csv('data/min_variance_weights.csv')