In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import datetime

import utils

In [2]:
STARTING_DAY = datetime.datetime(1989, 1, 1)
FIRST_ACTION_DAY = datetime.datetime(1994, 1, 1)

# Load data

In [3]:
selected_stocks = pd.read_excel('data/selected_stocks.xlsx')

In [4]:
display(selected_stocks)

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Date added,Market Capitalization
0,AAPL,Apple Inc.,Information Technology,"Technology Hardware, Storage & Peripherals",1982-11-30,3453699000000.0
1,LLY,Eli Lilly and Company,Health Care,Pharmaceuticals,1970-12-31,820115900000.0
2,JPM,JPMorgan Chase,Financials,Diversified Banks,1975-06-30,588692400000.0
3,WMT,Walmart,Consumer Staples,Consumer Staples Merchandise Retail,1982-08-31,554159700000.0
4,XOM,ExxonMobil,Energy,Integrated Oil & Gas,1957-03-04,510507900000.0
5,HD,Home Depot (The),Consumer Discretionary,Home Improvement Retail,1988-03-31,331744400000.0
6,DIS,Walt Disney Company (The),Communication Services,Movies & Entertainment,1976-06-30,178105700000.0
7,GE,GE Aerospace,Industrials,Aerospace & Defense,1957-03-04,175082900000.0
8,NEE,NextEra Energy,Utilities,Multi-Utilities,1976-06-30,147946700000.0
9,SHW,Sherwin-Williams,Materials,Specialty Chemicals,1964-06-30,75060900000.0


In [5]:
# Download prices
symbols = selected_stocks['Symbol'].tolist()

In [6]:
prices = yf.download(symbols)

[*********************100%%**********************]  11 of 11 completed


In [7]:
# Exclude data before STARTING DAY
prices = prices.loc[STARTING_DAY:]

# Compute returns

In [8]:
# Separate variables
adj_close_df = prices['Adj Close']
close_df = prices['Close']
open_df = prices['Open']

adj_close_returns = utils.compute_returns(adj_close_df)
close_returns = utils.compute_returns(close_df)
open_returns = utils.compute_returns(open_df)

In [9]:
today = datetime.date.today()
current_year = today.year
current_month = today.month
action_days = []
for year in range(FIRST_ACTION_DAY.year, current_year + 1):
    for month in range(1, 13):
        if year == current_year and month == current_month:
            break
        action_days.append(datetime.datetime(year, month, 1))

In [10]:
# Utility function to compute covariance matrix
def get_period_for_variance_computation_from_action_date(date: datetime.datetime, lookback: int = 5) -> datetime.datetime:
    year, month, day = date.year, date.month, date.day
    return datetime.datetime(year - 5, month, day)

# Compute weights dataframe

## 1) Equal weight

In [11]:
number_of_action_days = len(action_days)
n_stocks = len(symbols)
matrix = (1/n_stocks) * np.ones((number_of_action_days, n_stocks), dtype=float)
ew_weights_df = pd.DataFrame(matrix, index=action_days, columns=adj_close_returns.columns)

In [12]:
display(ew_weights_df.head())

Ticker,AAPL,DIS,GE,HD,JPM,LLY,NEE,SHW,WMT,WY,XOM
1994-01-01,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909
1994-02-01,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909
1994-03-01,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909
1994-04-01,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909
1994-05-01,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909


In [13]:
# Save
ew_weights_df.to_csv('data/weights.csv')

## 2) Min variance

In [14]:
# Create empty DataFrame
matrix = np.empty((number_of_action_days, n_stocks), dtype=float)
min_var_weights_df = pd.DataFrame(matrix, index=action_days, columns=adj_close_returns.columns)

for action_day in action_days:
    # 1 select correct observation period
    start = get_period_for_variance_computation_from_action_date(action_day)
    end = action_day - datetime.timedelta(days=1)
    ret: pd.DataFrame = adj_close_returns.loc[start: end]
    # 2 Compute covariance matrix
    covariance_matrix = ret.cov()
    # 3 Compute min-variance weights
    weights = utils.compute_min_variance_weights(covariance_matrix)
    min_var_weights_df.loc[action_day] = weights

In [15]:
display(min_var_weights_df.head())

Ticker,AAPL,DIS,GE,HD,JPM,LLY,NEE,SHW,WMT,WY,XOM
1994-01-01,0.012816,0.051956,0.036284,7.471795e-19,0.0,0.055347,0.545525,0.08381,1.360581e-18,0.001241,0.213021
1994-02-01,0.014048,0.051255,0.037082,5.385090999999999e-19,0.0,0.055578,0.532079,0.085135,1.218432e-18,0.003972,0.220851
1994-03-01,0.014825,0.053463,0.035986,5.50009e-19,0.0,0.057496,0.522141,0.086661,1.300479e-18,0.002571,0.226857
1994-04-01,0.013061,0.052952,0.034352,4.610879e-19,0.0,0.060137,0.517565,0.087169,1.167919e-18,0.003191,0.231573
1994-05-01,0.012852,0.052816,0.042258,3.7268999999999997e-19,0.0,0.061802,0.504813,0.087327,1.034773e-18,0.001894,0.236239


In [16]:
# Save
min_var_weights_df.to_csv('data/min_variance_weights.csv')