In [1]:
import polars as pl
import pandas as pd
import numpy as np
import pickle

In [2]:
with open("ticker_data_augmented_2019start.pkl", "rb") as file:
    loaded_data = pickle.load(file)

all_stocks = list(loaded_data.keys())[:-1]
all_stocks.remove('BG')
all_stocks.remove('ROST')
all_stocks.remove('AMCR')
all_stocks.remove('STE')


'''
all_stocks.remove('ABNB')
all_stocks.remove('CEG')
all_stocks.remove('GEHC')
all_stocks.remove('GEV')
all_stocks.remove('KVUE')
all_stocks.remove('PLTR')
all_stocks.remove('SOLV')
all_stocks.remove('SW')
all_stocks.remove('VLTO')'''

loaded_data = {stock : pl.DataFrame(loaded_data[stock].reset_index()).with_columns((pl.col('Annualized Vol') / np.sqrt(252)).alias('Daily Vol')) for stock in all_stocks + ['SPY']}

mkt_weight = vol_weight = 0.5
std_weight = downside_weight = 0.5
volfac_weight = dividend_weight1 = 0.5
de_weight = eps_weight = roe_weight = 0.333333333333333333333
ep_weight = bv_weight = dividend_weight2 = 0.333333333333333333333
qual_weight = alpha_weight1 = val_weight = 0.333333333333333333333
pe_weight = pbv_weight = 0.5
growth_weight = alpha_weight2 = 0.5
beta_weight = alpha_weight3 = mom_weight = 0.333333333333333333333

final_ports = {'Date' : [], 'low' : [], 'moderate' : [], 'high' : [], 'very_high' : []}

In [3]:
def normalize(data, stocks, factor):
    
    values = [data[stock][factor].mean() for stock in stocks if factor in data[stock] and not np.isnan(data[stock][factor].mean())]
    
    if len(values) == 0 or max(values) == min(values):
        return {stock: 0 for stock in stocks}
    
    min_val = min(values)
    max_val = max(values)
    
    return {stock: (data[stock][factor].mean() - min_val) / (max_val - min_val)
            if factor in data[stock] and not np.isnan(data[stock][factor].mean()) else 0
            for stock in stocks}

In [4]:
def screening_level_1(data, stocks):

    mkt_cap = normalize(data, stocks, 'Market_Cap')
    daily_vol = normalize(data, stocks, 'Daily Vol')

    score = {}

    for stock in stocks:
        score[stock] = -mkt_cap[stock] * mkt_weight + daily_vol[stock] * vol_weight

    return score

In [5]:
def screening_level_2(data, stocks):

    low_metrics = low_calcs(data, stocks[1])
    med_metrics = med_calcs(data, stocks[2])
    high_metrics = high_calcs(data, stocks[3])
    vhigh_metrics = vhigh_calcs(data, stocks[4])

    sorted_low = sorted(low_metrics.items(), key=lambda x: x[1])
    top_low = len(sorted_low) // 2
    low_ports = list(dict(sorted_low[:top_low]).keys())

    sorted_med = sorted(med_metrics.items(), key=lambda x: x[1])
    top_med = len(sorted_med) // 2
    med_ports = list(dict(sorted_med[:top_med]).keys())

    high_ports = [key for key, value in high_metrics.items() if value == 0]

    vhigh_ports = [key for key, value in vhigh_metrics.items() if value == 0]

    return  low_ports, med_ports, high_ports, vhigh_ports

In [6]:
def low_calcs(data, stocks):

    std = {}
    downside = {}
    dividend_yield = dividend_metric(data, stocks)

    for stock in stocks:
        std[stock] = pl.DataFrame({'std' : data[stock].std()['Return'] * np.sqrt(252)})
        downside[stock] = pl.DataFrame({'downside' : np.sqrt((np.minimum(0, data[stock]['Return'])**2).mean())})

    std_norm = normalize(std, stocks, 'std')
    downside_norm = normalize(downside, stocks, 'downside')
    dividend_norm = normalize(dividend_yield, stocks, 'dividend')

    score = {}

    for stock in stocks:
        vol_score = std_weight * std_norm[stock] + downside_weight * downside_norm[stock]
        score[stock] = vol_score * volfac_weight + dividend_norm[stock] * dividend_weight1

    return score

In [13]:
def med_calcs(data, stocks):

    alpha = alpha_metric(data, stocks)
    alpha_norm = normalize(alpha, stocks, 'alpha')

    roe = {}
    debt_equity = {}
    eps_growth_vol = {}
    earnings_price = {}
    book_val_price = {}
    dividend_yield = dividend_metric(data, stocks)

    for stock in stocks:
        debt_equity[stock] = pl.DataFrame({'D/E' : data[stock]['Debt_Ratio'].mean()})
        
        g = (data[stock]['EPS'] - data[stock]['EPS'].shift(1)) / data[stock]['EPS'].shift(1)
        eps_growth_vol[stock] = pl.DataFrame({'EPS' : np.sqrt(((g - g.mean())**2).mean())})

        shares_outstanding = data[stock]['Market_Cap'] / data[stock]['Adj Close']
        net_income = shares_outstanding * data[stock]['EPS']
        roe[stock] = pl.DataFrame({'ROE' : net_income / data[stock]['Book_Value']})

        earnings_price[stock] = pl.DataFrame({'EP' : (data[stock]['EPS'] / data[stock]['Adj Close']).mean()})
    
        book_val_per_share = data[stock]['Book_Value'] / shares_outstanding
        book_val_price[stock] = pl.DataFrame({'BV' : (book_val_per_share / data[stock]['Adj Close']).mean()})

    #for stock in stocks:
    #    print(stock + ': ' + str(roe[stock]['ROE'].mean()))
    de_norm = normalize(debt_equity, stocks, 'D/E')
    eps_norm = normalize(eps_growth_vol, stocks, 'EPS')
    roe_norm = normalize(roe, stocks, 'ROE')

    ep_norm = normalize(earnings_price, stocks, 'EP')
    bv_norm = normalize(earnings_price, stocks, 'BV')
    dividend_norm = normalize(dividend_yield, stocks, 'dividend')

    score = {}

    for stock in stocks:
        qual_score = de_weight * de_norm[stock] + eps_weight * eps_norm[stock] + roe_weight * roe_norm[stock]
        val_score = ep_weight * ep_norm[stock] + bv_weight * bv_norm[stock] + dividend_weight2 * dividend_norm[stock]
        score[stock] = qual_score * qual_weight + alpha_norm[stock] * alpha_weight1 + val_score * val_weight

    return score

In [14]:
def high_calcs(data, stocks):

    alpha = alpha_metric(data, stocks)
    alpha_norm = normalize(alpha, stocks, 'alpha')

    pe = {}
    pbv = {}

    for stock in stocks:

        price_earnings = 1 / (data[stock]['EPS'] / data[stock]['Adj Close']).mean()
        pe[stock] = pl.DataFrame({'PE' : price_earnings})

        shares_outstanding = data[stock]['Market_Cap'] / data[stock]['Adj Close']
        book_val_per_share = data[stock]['Book_Value'] / shares_outstanding
        price_book = 1 / (book_val_per_share / data[stock]['Adj Close']).mean()
        pbv[stock] = pl.DataFrame({'PBV' : price_book})

    sorted_pe = sorted(pe.items(), key=lambda x: x[1]['PE'][0], reverse=True)
    top_pe = len(sorted_pe) // 2
    pe_score1 = dict(sorted_pe[:top_pe]).keys()
    pe = {stock_check: pl.DataFrame({'PE' : 0 if stock_check not in pe_score1 else 1}) for stock_check in stocks}
    pe_norm = normalize(pe, pe, 'PE')

    sorted_pbv = sorted(pbv.items(), key=lambda x: x[1]['PBV'][0], reverse=True)
    top_pbv = len(sorted_pbv) // 2
    pbv_score1 = dict(sorted_pbv[:top_pbv]).keys()
    pbv = {stock_check: pl.DataFrame({'PBV' : 0 if stock_check not in pbv_score1 else 1}) for stock_check in stocks}
    pbv_norm = normalize(pbv, pbv, 'PBV')

    score = {}
    
    for stock in stocks:
        growth_score = pe_weight * pe_norm[stock] + pbv_weight * pbv_norm[stock]
        score[stock] = growth_score * growth_weight + alpha_norm[stock] * alpha_weight2

    return score
    

In [15]:
def vhigh_calcs(data, stocks):

    alpha = alpha_metric(data, stocks)
    alpha_norm = normalize(alpha, stocks, 'alpha')

    beta = {}
    mom = {}
    
    for stock in stocks:
    
        cov = np.cov(data[stock]['Return'].dropna(), data['SPY']['Return'].dropna().tail(len(data[stock]['Return'].dropna())))
        var = loaded_data['SPY']['Return'].var()
        beta[stock] = pl.DataFrame({'beta' : 1 if cov[1][0] / var > 1 else 0})
        #print(stock)
        momentum = data[stock].tail(1)['Adj Close'] - data[stock]['Adj Close'][0]
        mom[stock] = pl.DataFrame({'momentum' : 0 if momentum.iloc[-1] > 0 else 1})

    beta_norm = normalize(beta, stocks, 'beta')
    mom_norm = normalize(mom, stocks, 'momentum')

    score = {}

    for stock in stocks:
        score[stock] = alpha_weight3 * alpha_norm[stock] + beta_weight * beta_norm[stock] + mom_weight * mom_norm[stock]

    return score

In [16]:
def dividend_metric(data, stocks):

    dividend_yield = {}

    for stock in stocks:
        dividend_yield[stock] = pl.DataFrame({'dividend' : (data[stock]['Last_Dividend'] / data[stock]['Adj Close']).mean() * 4})

    return dividend_yield


def alpha_metric(data, stocks):

    alpha = {}

    for stock in stocks:

        val = np.max((data[stock]['Return'] - data['SPY']['Return']).mean(), 0)
        alpha[stock] = pl.DataFrame({'alpha' : 1 if val == 0 else 0})

    return alpha

In [17]:
def score_to_risk(score, stocks):

    measure_vals = list(score.values())
    measure_vals = [x for x in measure_vals if not np.isnan(x)]
    quartiles = {
        'Q' : [1, 2, 3, 4],
        'Measure' : [np.quantile(measure_vals, x) for x in [0.25, 0.5, 0.75, 1.0]]
    }

    quart = pd.DataFrame(quartiles).set_index('Q')

    risk_levels = {}

    for stock in stocks:
        for i, threshold in enumerate(quart['Measure'], start=1):
            if score[stock] <= threshold:
                risk_levels[stock] = i
                break
        else: risk_levels[stock] = np.nan

    return risk_levels

In [18]:
def risk_groups(data, stocks):

    level1_scores = screening_level_1(data, stocks)
    level1_cats = score_to_risk(level1_scores, stocks)

    ports_level1 = {1: [], 2: [], 3: [], 4: []}


    for stock in stocks:
        for j in ports_level1.keys():
            if level1_cats[stock] == j:
                ports_level1[j].append(stock)
                break

    #print(len(ports_level1[1]))
    #print(len(ports_level1[1]))
    #print(len(ports_level1[1]))
    #print(len(ports_level1[1]))

    low_ports, med_ports, high_ports, vhigh_ports = screening_level_2(data, ports_level1)

    #print(len(low_ports))
    #print(len(med_ports))
    #print(len(high_ports))
    #print(len(vhigh_ports))

    final_ports['Date'].append(data['SPY']['Date'].iloc[-1])
    final_ports['low'].append(low_ports)
    final_ports['moderate'].append(med_ports)
    final_ports['high'].append(high_ports)
    final_ports['very_high'].append(vhigh_ports)

    return

In [19]:
from datetime import date, timedelta

# Generate a list of quarter start dates from 2021 to 2024
start_year = 2019
end_year = 2025
dates = []

for year in range(start_year, end_year + 1):
    for month in [1, 4, 7, 10]:  # Quarter start months: January, April, July, October
        dates.append(date(year, month, 1))

dates.remove(date(2025, 4, 1))
dates.remove(date(2025, 7, 1))
dates.remove(date(2025, 10, 1))

In [20]:
for i in range(1, len(dates)):
    data = {}
    for stock in all_stocks + ['SPY']:
        data[stock] = loaded_data[stock].filter(pl.col('Date').is_between(dates[i - 1], dates[i] - timedelta(days = 1))).to_pandas()
    print(dates[i - 1])
    risk_groups(data, all_stocks)

2019-01-01
2019-04-01
2019-07-01
2019-10-01
2020-01-01
2020-04-01
2020-07-01
2020-10-01
2021-01-01
2021-04-01
2021-07-01
2021-10-01
2022-01-01
2022-04-01
2022-07-01
2022-10-01
2023-01-01
2023-04-01
2023-07-01
2023-10-01
2024-01-01
2024-04-01
2024-07-01
2024-10-01


In [21]:
pd.DataFrame(final_ports).set_index('Date').to_excel('Portfolios_start2020.xlsx')

In [180]:
loaded_data['SPY']

Date,Adj Close,Close,High,Low,Open,Volume,Last_Dividend,Return,Log_Return,Rolling Std_252,Annualized Vol,Daily Vol
datetime[ns],f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64
2019-01-02 00:00:00,228.403625,250.179993,251.210007,245.949997,245.979996,126925200,1.435,0.00104,0.00104,0.010711,0.170032,0.010711
2019-01-03 00:00:00,222.953278,244.210007,248.570007,243.669998,248.229996,144140700,1.435,-0.023863,-0.024152,0.010805,0.171522,0.010805
2019-01-04 00:00:00,230.421249,252.389999,253.110001,247.169998,247.589996,142628800,1.435,0.033496,0.032947,0.011004,0.17469,0.011004
2019-01-07 00:00:00,232.238068,254.380005,255.949997,251.690002,252.690002,103139100,1.435,0.007885,0.007854,0.011013,0.17482,0.011013
2019-01-08 00:00:00,234.420013,256.769989,257.309998,254.0,256.820007,102512600,1.435,0.009395,0.009351,0.011021,0.174948,0.011021
…,…,…,…,…,…,…,…,…,…,…,…,…
2024-11-29 00:00:00,602.549988,602.549988,603.349976,599.380005,599.659973,30177400,1.746,0.006212,0.006193,0.007659,0.121584,0.007659
2024-12-02 00:00:00,603.630005,603.630005,604.320007,602.469971,602.969971,31746000,1.746,0.001792,0.001791,0.007657,0.121555,0.007657
2024-12-03 00:00:00,603.909973,603.909973,604.159973,602.340027,603.390015,26906600,1.746,0.000464,0.000464,0.007652,0.121464,0.007652
2024-12-04 00:00:00,607.659973,607.659973,607.909973,604.950012,605.630005,42648700,1.746,0.00621,0.00619,0.007647,0.121396,0.007647
