In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from polygon.rest import RESTClient
import json
import statsmodels.api as sm
from datetime import datetime
import pytz
from tqdm import tqdm
import concurrent.futures
from scipy.optimize import minimize
from cvxopt import solvers, matrix

api_key = 'hFrBS7nzcaLTa8mplO1ejm44DI4EscDM'
client = RESTClient(api_key)

warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.max_columns', None)
warnings.simplefilter("ignore", category=UserWarning)

<h1>Load Data</h1>

In [4]:
df = pd.read_csv('main.csv')

In [5]:
def est(unix_ms_timestamp):
    # Convert Unix timestamp in milliseconds to seconds
    unix_seconds = unix_ms_timestamp / 1000.0
    # Create a datetime object from the Unix timestamp
    utc_time = datetime.utcfromtimestamp(unix_seconds)
    # Define the UTC and EST timezones
    utc_zone = pytz.utc
    est_zone = pytz.timezone('US/Eastern')
    # Localize the UTC datetime object to UTC timezone
    utc_time = utc_zone.localize(utc_time)
    # Convert the UTC time to EST
    est_time = utc_time.astimezone(est_zone)
    est_time = est_time.replace(tzinfo=None)
    return est_time.strftime('%Y-%m-%d')

In [72]:
class data_collector():
    def __init__(self, index_name:str,):
        with open('dlist.json','r') as file:
            self.dlist = json.load(file)
        self.index_data = self.load_index(index_name)
        rf_df = pd.read_csv('DGS3MO.csv')
        rf_df['riskfree'] = rf_df['DGS3MO']/365
        rf_df.rename(columns={'observation_date': 'date'}, inplace=True)
        self.riskfree = rf_df[['date','riskfree']]

    def load_index(self, index_name: str) -> pd.DataFrame:
        '''Collect index data, compute overnight percentage change'''
        data = client.get_aggs(index_name,multiplier=1,timespan='day',adjusted='true',from_='2010-01-01',to='2024-11-30',limit = 50000)
        df = pd.DataFrame(data)
        df['index_overnight'] = ((df['open'].shift(-1)/df['close'])-1) * 100
        df['date'] = df['timestamp'].apply(est)
        return df[['date','index_overnight']]
    
    def ticker_data(self, ticker, end_date):
        '''Collect and clean single ticker data, compute ticker and index risk-adjusted returns, match by date'''
        start_date = self.dlist[self.dlist.index(end_date) - 252]
        data = client.get_aggs(ticker,multiplier=1,timespan='day',adjusted='true',from_=start_date,to=end_date,limit = 50000)
        df = pd.DataFrame(data)
        df['date'] = df['timestamp'].apply(est)
        #fill dates not present (didn't trade on day, leads to incorrect overnight returns if not addressed)
        date_range = [date for date in self.dlist if start_date <= date <= end_date]
        df = pd.merge(pd.DataFrame({'date': date_range}), df, on='date', how='left')
        df[f'{ticker}_overnight'] = ((df['open'].shift(-1)/df['close'])-1) * 100   #overnight returns
        #add index data, excess returns
        df = pd.merge(df,self.index_data,on='date')
        df = pd.merge(df,self.riskfree,on='date')
        df[f'{ticker}_return'] = df[f'{ticker}_overnight'] - df['riskfree']   #excess returns
        df['index_return'] = df['index_overnight'] - df['riskfree']
        return df[['date',f'{ticker}_overnight','index_overnight',f'{ticker}_return','index_return']].dropna()
    
    
    def para_collect(self,dataframe):
        '''Collect and clean ticker/index data. Ensures that both covariance matrices are calculated using identical data'''
        with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
            dataframe_list = list(tqdm(executor.map(self.ticker_data, dataframe['ticker'], dataframe['date']), total=len(dataframe)))
        for df,ticker,i in zip(dataframe_list,dataframe['ticker'],range(len(dataframe_list))):
            if i == 0:
                finaldf = df[['date',f'{ticker}_overnight','index_overnight',f'{ticker}_return','index_return']]
            else:
                finaldf = pd.merge(finaldf,df[['date',f'{ticker}_overnight',f'{ticker}_return']],on='date',how='outer')
        self.cleaned_data = finaldf.dropna()
        self.tickers = dataframe['ticker']
    
    def raw_sigma(self):
        '''Use .cov() to estimate no-factor covariance matrix.'''
        df = self.cleaned_data[[col for col in self.cleaned_data.columns if 'overnight' in col and 'index' not in col]]
        return df.cov()
    
    def one_factor_sigma(self):
        '''Estimate one-factor covariance matrix using ticker beta, idiosyncratic var, and factor var'''
        df = self.cleaned_data[[col for col in self.cleaned_data.columns if 'return' in col]]
        index_var = df['index_return'].var()
        betas = []
        idio_var = []
        for ticker in [col for col in df.columns if 'index' not in col]:
            X = sm.add_constant(df['index_return'])
            Y = df[ticker]
            model = sm.OLS(Y,X).fit()
            betas.append(model.params[1])
            idio_var.append(model.resid.var())
        betas = np.array(betas)              
        idio_var = np.diag(np.array(idio_var))
        systematic_cov = index_var * np.outer(betas, betas)
        sigma = systematic_cov + idio_var
        return pd.DataFrame(sigma)

<h1>Min var portfolio</h1>

In [60]:
def min_var(sigma):
    sigma_inv = np.linalg.inv(sigma)
    ones = np.ones((sigma.shape[0], 1))
    numerator = sigma_inv @ ones
    denominator = ones.T @ sigma_inv @ ones
    w = (numerator / denominator).flatten()
    return w

In [73]:
dc = data_collector('SPY')
dc.para_collect(df.head(5))
nf_weights = min_var(np.array(dc.raw_sigma()))
sf_weights = min_var(np.array(dc.one_factor_sigma()))
mv_df = pd.DataFrame({'No Factor Weights':nf_weights,'Single Factor Weights':sf_weights},index=df.head(5)['ticker'])
mv_df

100%|██████████| 5/5 [00:00<00:00, 16.60it/s]


Unnamed: 0_level_0,No Factor Weights,Single Factor Weights
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
AAU,0.014687,0.091961
ARX,0.682878,0.60341
AXU,0.029065,0.037352
AZC,0.231455,0.218124
CSR,0.041915,0.049153
