In [183]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from polygon.rest import RESTClient
import json
import statsmodels.api as sm
from datetime import datetime
import pytz
from tqdm import tqdm
import concurrent.futures
from scipy.optimize import minimize

api_key = 'hFrBS7nzcaLTa8mplO1ejm44DI4EscDM'
client = RESTClient(api_key)

warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.max_columns', None)
warnings.simplefilter("ignore", category=UserWarning)

<h1>Load Data</h1>

In [184]:
df = pd.read_csv('fullsample.csv')
df

Unnamed: 0.1,Unnamed: 0,ticker,date,pred,trade
0,27219,BVXV,2021-01-04,-0.441402,0
1,151092,VINC,2021-01-04,0.233248,0
2,3737,AEZS,2021-01-04,-0.878808,0
3,136483,SSTK,2021-01-04,-0.253395,0
4,110749,PED,2021-01-04,1.059537,0
...,...,...,...,...,...
156407,3659,AEVA,2024-09-27,0.967824,0
156408,147874,UONE,2024-09-27,1.835434,1
156409,5223,AIFF,2024-09-27,1.853510,1
156410,139626,TANH,2024-09-27,0.167393,0


In [185]:
def est(unix_ms_timestamp):
    # Convert Unix timestamp in milliseconds to seconds
    unix_seconds = unix_ms_timestamp / 1000.0
    # Create a datetime object from the Unix timestamp
    utc_time = datetime.utcfromtimestamp(unix_seconds)
    # Define the UTC and EST timezones
    utc_zone = pytz.utc
    est_zone = pytz.timezone('US/Eastern')
    # Localize the UTC datetime object to UTC timezone
    utc_time = utc_zone.localize(utc_time)
    # Convert the UTC time to EST
    est_time = utc_time.astimezone(est_zone)
    est_time = est_time.replace(tzinfo=None)
    return est_time.strftime('%Y-%m-%d')

In [188]:
class quadprog():
    def __init__(self):
        #get index data
        self.index_data = self.load_index('SPY')
        #get trading dates
        with open('dlist.json', 'r') as file:
            self.dlist = json.load(file)
        #get riskfree data
        rf_df = pd.read_csv('DGS3MO.csv')
        rf_df['riskfree'] = rf_df['DGS3MO']/365
        rf_df.rename(columns={'observation_date': 'date'}, inplace=True)
        self.riskfree = rf_df[['date','riskfree']]

    def load_index(self, index_name):
        data = client.get_aggs(index_name,multiplier=1,timespan='day',adjusted='true',from_='2015-01-01',to='2024-11-30',limit = 50000)
        df = pd.DataFrame(data)
        df['index_overnight'] = ((df['open'].shift(-1)/df['close'])-1) * 100
        df['date'] = df['timestamp'].apply(est)
        return df[['date','index_overnight']]

    def ticker_data(self, ticker, end_date):
        #get ticker data
        start_date = self.dlist[self.dlist.index(end_date) - 252]
        data = client.get_aggs(ticker,multiplier=1,timespan='day',adjusted='true',from_=start_date,to=end_date,limit = 50000)
        df = pd.DataFrame(data)
        df['date'] = df['timestamp'].apply(est)
        #fill dates not present (didn't trade on day, leads to incorrect overnight returns if not addressed)
        date_range = [date for date in self.dlist if start_date <= date <= end_date]
        df = pd.merge(pd.DataFrame({'date': date_range}), df, on='date', how='left')
        df['ticker_overnight'] = ((df['open'].shift(-1)/df['close'])-1) * 100   #overnight returns
        #add index data, excess returns
        df = pd.merge(df,self.index_data,on='date')
        df = pd.merge(df,self.riskfree,on='date')
        df['ticker_return'] = df['ticker_overnight'] - df['riskfree']   #excess returns
        df['index_return'] = df['index_overnight'] - df['riskfree']
        return df

    def factor_model(self,ticker,end_date,proj_return):
        df = self.ticker_data(ticker,end_date)
        #OLS
        df = df[['ticker_return','index_return']].dropna()
        model = sm.OLS(df['ticker_return'],sm.add_constant(df['index_return'])).fit()
        #stats to return
        alpha = model.params[0] + proj_return
        beta = model.params[1]
        i_var = model.resid.var()
        return alpha,beta,i_var

    def ticker_stats(self,dataframe):
        #get stats for every ticker
        with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
            factor_outcomes = list(tqdm(executor.map(self.factor_model, dataframe['ticker'], dataframe['date'], dataframe['pred']), total=len(dataframe)))
        factor_dataframe = pd.DataFrame(factor_outcomes, columns=['alpha', 'beta', 'i_var'])
        return factor_dataframe
    
    def function_data(self,dataframe,lambda2):
        df = self.ticker_stats(dataframe)   #get ticker stats
        df.loc[len(df)] = [0,1,0]   #add SPY stats
        index_var = self.ticker_data('AAPL',dataframe['date'].iloc[0])['index_return'].var()
        self.beta = np.array(df['beta'])
        i = np.diag(df['i_var'])
        self.sigma = (np.outer(self.beta, self.beta) * index_var) + i
        self.l2 = lambda2
        self.alpha = np.array(df['alpha'])
        self.tickers = dataframe['ticker'].tolist() + ['SPY']
        return df

    def function(self,weights):
        return -np.dot(weights.T, self.alpha) + (self.l2 * (np.dot(np.dot(weights.T, self.sigma), weights)))
    
    def solve_function(self,max_alloc,min_alloc):
        initial_weights = np.full(len(self.alpha), 1 / len(self.alpha))
        constraints = [{'type': 'eq', 'fun': lambda w: np.sum(np.abs(w)) - 1},  #Absolute values of weights must sum to 1
                       {'type': 'ineq', 'fun': lambda w: w[:-1]},  #Only ticker shortable is SPY
                       {'type': 'ineq', 'fun': lambda w: max_alloc - np.abs(w)}]  #Maximum single-ticker allocation = 20%
                       #{'type': 'eq', 'fun': lambda w: np.dot(self.beta, w)}]  #Market-neutral
                       #{'type': 'ineq', 'fun': lambda w: np.abs(w) - min_alloc * (w != 0)}]      #Allocation can be 0 or greater than 1%
        result = minimize(self.function, initial_weights,constraints=constraints,options = {'maxiter': 100})
        if result.success:
            self.decimal_weights = result.x
        else:
            raise ValueError("Fail:", result.message)
    
    def scale_weights(self,portfolio_value):
        dollar_weights = portfolio_value * self.decimal_weights
        return pd.DataFrame({'ticker': self.tickers, 'dollar_weight': [round(dw, 6) for dw in dollar_weights]})

In [189]:
qp = quadprog()
a = qp.function_data(dataframe=df[df['date'] == '2024-09-27'],lambda2=0)
qp.solve_function(max_alloc=.2,min_alloc=.01)
result = qp.scale_weights(10000)
result.sort_values(by='dollar_weight',ascending=True)

100%|██████████| 138/138 [00:04<00:00, 30.49it/s]


Unnamed: 0,ticker,dollar_weight
69,IVP,-0.000000
132,CIA,0.000000
95,ADTX,-0.000000
94,JBDI,0.000000
93,AEHL,0.000000
...,...,...
96,ASNS,1999.997484
49,GXAI,2000.000000
33,NUKK,2000.000000
3,SBC,2000.000000
