In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from polygon.rest import RESTClient
import json
import statsmodels.api as sm
from datetime import datetime
import pytz
from tqdm import tqdm
import concurrent.futures
from scipy.optimize import minimize

api_key = 'hFrBS7nzcaLTa8mplO1ejm44DI4EscDM'
client = RESTClient(api_key)

warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.max_columns', None)
warnings.simplefilter("ignore", category=UserWarning)

<h1>Load Data</h1>

In [50]:
df = pd.read_csv('fullsample.csv')
#set alpha to mean trade return when trade = 1, 0 if else
df['proj'] = np.where(df['trade'] == 1,1.812708,0)

In [51]:
def est(unix_ms_timestamp):
    # Convert Unix timestamp in milliseconds to seconds
    unix_seconds = unix_ms_timestamp / 1000.0
    # Create a datetime object from the Unix timestamp
    utc_time = datetime.utcfromtimestamp(unix_seconds)
    # Define the UTC and EST timezones
    utc_zone = pytz.utc
    est_zone = pytz.timezone('US/Eastern')
    # Localize the UTC datetime object to UTC timezone
    utc_time = utc_zone.localize(utc_time)
    # Convert the UTC time to EST
    est_time = utc_time.astimezone(est_zone)
    est_time = est_time.replace(tzinfo=None)
    return est_time.strftime('%Y-%m-%d')

In [52]:
class quadprog():
    def __init__(self):
        #get index data
        self.index_data = self.load_index('SPY')
        #get trading dates
        with open('dlist.json', 'r') as file:
            self.dlist = json.load(file)
        #get riskfree data
        rf_df = pd.read_csv('DGS3MO.csv')
        rf_df['riskfree'] = rf_df['DGS3MO']/365
        rf_df.rename(columns={'observation_date': 'date'}, inplace=True)
        self.riskfree = rf_df[['date','riskfree']]


    def load_index(self, index_name):
        data = client.get_aggs(index_name,multiplier=1,timespan='day',adjusted='true',from_='2015-01-01',to='2024-11-30',limit = 50000)
        df = pd.DataFrame(data)
        df['index_overnight'] = ((df['open'].shift(-1)/df['close'])-1) * 100
        df['date'] = df['timestamp'].apply(est)
        return df[['date','index_overnight']]

    def ticker_data(self, ticker, end_date):
        #get ticker data
        start_date = self.dlist[self.dlist.index(end_date) - 252]
        data = client.get_aggs(ticker,multiplier=1,timespan='day',adjusted='true',from_=start_date,to=end_date,limit = 50000)
        df = pd.DataFrame(data)
        df['date'] = df['timestamp'].apply(est)
        #fill dates not present (didn't trade on day, leads to incorrect overnight returns if not addressed)
        date_range = [date for date in self.dlist if start_date <= date <= end_date]
        df = pd.merge(pd.DataFrame({'date': date_range}), df, on='date', how='left')
        df['ticker_overnight'] = ((df['open'].shift(-1)/df['close'])-1) * 100   #overnight returns
        #add index and riskfree data
        df = pd.merge(df,self.index_data,on='date')
        df = pd.merge(df,self.riskfree,on='date')
        df['ticker_return'] = df['ticker_overnight'] - df['riskfree']
        df['index_return'] = df['index_overnight'] - df['riskfree']
        return df
    
    def alpha_matrix(self,dataframe):
        a_vector = dataframe['proj'].tolist()   #alpha vector is given in data upload - use average trade return if threshold met, 0 if not
        a_vector.append(0)                      #append 0 alpha for index
        return np.array(a_vector)

    def sigma_matrix(self,dataframe):
        #get covariance matrix
        with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
            dataframe_list = list(tqdm(executor.map(self.ticker_data, dataframe['ticker'], dataframe['date']), total=len(dataframe)))
        all_data = pd.concat({ticker: df['ticker_overnight'] for ticker, df in zip(dataframe['ticker'], dataframe_list)},axis=1)    
        all_data['index'] = self.ticker_data(dataframe['ticker'].iloc[0],dataframe['date'].iloc[0])['index_overnight']      #add index
        covariance_matrix = all_data.cov()
        return covariance_matrix
    
    def function_data(self,dataframe,lambda2):
        #collect/save data
        self.alpha = self.alpha_matrix(dataframe)
        self.sigma = self.sigma_matrix(dataframe)
        self.l2 = lambda2
        self.tickers = dataframe['ticker'].tolist() + ['index']
        return self.alpha
    
    def function(self,weights):
        #use negative function, minimize
        return -np.dot(weights.T, self.alpha) + (self.l2 * (np.dot(np.dot(weights.T, self.sigma), weights)))
    
    def solve_function(self):
        initial_weights = np.full(len(self.alpha), 1 / len(self.alpha))         #set initial weights equal
        constraints = [{'type': 'eq', 'fun': lambda w: np.sum(abs(w)) - 1},     #absolute value of weights must sum to 1
                       {'type': 'ineq', 'fun': lambda w: w[:-1]}]               #can only short index
        result = minimize(self.function, initial_weights,constraints=constraints,options = {'maxiter': 100000})     #minimize negative function
        if result.success:
            self.decimal_weights = result.x
        else:
            raise ValueError("Optimization failed:", result.message)
    
    def scale_weights(self,portfolio_value):
        dollar_weights = portfolio_value * self.decimal_weights     #scale weights from decimal amount of portfolio to dollars
        return pd.DataFrame({'ticker': self.tickers, 'dollar_weight': [round(dw, 2) for dw in dollar_weights]})

In [53]:
qp = quadprog()
qp.function_data(dataframe=df[df['date'] == '2024-09-27'],lambda2=0)
qp.solve_function()
result = qp.scale_weights(10000)

100%|██████████| 138/138 [00:04<00:00, 30.24it/s]


In [54]:
result

Unnamed: 0,ticker,dollar_weight
0,SGN,909.3
1,CERO,-0.0
2,PLCE,-0.0
3,SBC,-0.0
4,NNE,-0.0
...,...,...
134,UONE,909.3
135,AIFF,909.3
136,TANH,-0.0
137,ME,-0.0
