In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
with open("ticker_data_augmented.pkl", "rb") as file:
    loaded_data = pickle.load(file)

stocks = list(loaded_data.keys())[:-1]

In [3]:
caps_threshold = {
    'Risk' : [1, 2, 3],
    'Threshold' : [10000000000, 2000000000, 250000000]
}

mkt_caps = {}

for stock in stocks:
    mkt_cap = loaded_data[stock].mean()['Market_Cap']
    for i, threshold in enumerate(caps_threshold['Threshold'], start=1):
        if mkt_cap >= threshold:
            mkt_caps[stock] = i
            break
    else: mkt_caps[stock] = 4

In [4]:
def screening_level_1(data, measure_name):
    measure = {}

    for stock in stocks:
        stock_measure = data[stock].mean()[measure_name]
        measure[stock] = stock_measure / np.sqrt(252)

    measure_vals = list(measure.values())
    measure_vals = [x for x in measure_vals if not np.isnan(x)]
    quartiles = {
        'Q' : [1, 2, 3, 4],
        'Measure' : [np.quantile(measure_vals, x) for x in [0.25, 0.5, 0.75, 1.0]]
    }

    quart = pd.DataFrame(quartiles).set_index('Q')

    risk_levels = {}

    for stock in stocks:
        for i, threshold in enumerate(quart['Measure'], start=1):
            if measure[stock] <= threshold:
                risk_levels[stock] = i
                break
        else: risk_levels[stock] = np.nan

    return risk_levels

In [5]:
vol_risk = screening_level_1(loaded_data, 'Annualized Vol')

In [6]:
vol_weight = 0.5
mkt_cap_weight = 0.5

weighted_risk = {
    key: round(vol_weight * vol_risk[key] + mkt_cap_weight * mkt_caps[key], 1) for key in mkt_caps
}

In [7]:
risk = {
    1: [],
    2: [],
    3: [],
    4: []
}

for stock in stocks:
    if weighted_risk[stock] == 1.0:
        risk[1].append(stock)
    elif weighted_risk[stock] == 1.5:
        risk[2].append(stock)
    elif weighted_risk[stock] == 2.0:
        risk[3].append(stock)
    elif weighted_risk[stock] == 2.5:
        risk[4].append(stock)

In [8]:
len(risk[1])

123

In [9]:
len(risk[2])

116

In [10]:
len(risk[3])

121

In [11]:
len(risk[4])

128

In [12]:
std = {}
downside = {}
dividend_yield = {}
alpha = {}
debt_equity = {}
eps_growth_vol = {}
earnings_price = {}
book_val_price = {}
roe = {}
beta = {}

for stock in stocks:
    std[stock] = loaded_data[stock].std()['Return']
    
    downside[stock] = np.sqrt((np.minimum(0, loaded_data[stock]['Return'])**2).mean())
    
    dividend_yield[stock] = (loaded_data[stock]['Last_Dividend'] / loaded_data[stock]['Adj Close']).mean() * 4
    
    alpha[stock] = (loaded_data[stock]['Return'] - loaded_data['SPY']['Return']).mean()
    
    debt_equity[stock] = loaded_data[stock]['Debt_Ratio'].mean()
    
    g = (loaded_data[stock]['EPS'] - loaded_data[stock]['EPS'].shift(1)) / loaded_data[stock]['EPS'].shift(1)
    eps_growth_vol[stock] = np.sqrt(((g - g.mean())**2).mean())
    
    earnings_price[stock] = (loaded_data[stock]['EPS'] / loaded_data[stock]['Adj Close']).mean()

    shares_outstanding = loaded_data[stock]['Market_Cap'] / loaded_data[stock]['Adj Close']
    book_val_per_share = loaded_data[stock]['Book_Value'] / shares_outstanding
    book_val_price[stock] = (book_val_per_share / loaded_data[stock]['Adj Close']).mean()
    
    net_income = shares_outstanding * loaded_data[stock]['EPS']
    roe[stock] = net_income / loaded_data[stock]['Book_Value']

    cov = np.cov(loaded_data[stock]['Return'].dropna(), loaded_data['SPY']['Return'].dropna().tail(len(loaded_data[stock]['Return'].dropna())))
    var = loaded_data['SPY'].var()['Return']
    beta[stock] = cov[1][0] / var

In [13]:
alpha_satisfied = {key: value for key, value in alpha.items() if value > 0}
beta_satisfied = {key: value for key, value in beta.items() if value > 1}
debt_equity_satisfied = {key: value for key, value in debt_equity.items() if (value >= 0.3) and (value <= 0.6)}

mod_risk = [key for key in alpha_satisfied.keys() if (key in risk[2]) and (key in debt_equity_satisfied)]
high_risk = [key for key in alpha_satisfied.keys() if key in risk[3]]
very_high_risk = [key for key in alpha_satisfied.keys() if (key in risk[4]) and (key in beta_satisfied.keys())]

In [14]:
len(mod_risk)

22

In [15]:
len(np.unique(high_risk + very_high_risk))

146

In [16]:
beta

{'A': 0.9642779691881768,
 'AAPL': 1.1965320583303884,
 'ABBV': 0.5416946279684027,
 'ABNB': 0.9590650665430346,
 'ABT': 0.7676016458463327,
 'ACGL': 0.996997909288878,
 'ACN': 1.0877022308739503,
 'ADBE': 1.275829565240368,
 'ADI': 1.2861964721216752,
 'ADM': 0.7916424630603404,
 'ADP': 0.9919216547119869,
 'ADSK': 1.362684209939696,
 'AEE': 0.6775115636364264,
 'AEP': 0.5865562671260502,
 'AES': 1.1259808809263552,
 'AFL': 0.9718975404344959,
 'AIG': 1.2677430631383213,
 'AIZ': 0.8196106962079046,
 'AJG': 0.8428674902203573,
 'AKAM': 0.6955318600130942,
 'ALB': 1.3687616467383283,
 'ALGN': 1.5566389822994278,
 'ALL': 0.8102264489088948,
 'ALLE': 1.0397612024220149,
 'AMAT': 1.6639056507730456,
 'AMCR': 0.894744386652257,
 'AMD': 1.5877574917807722,
 'AME': 1.0379022485787317,
 'AMGN': 0.6429954135711868,
 'AMP': 1.5332235645815202,
 'AMT': 0.8533767790871994,
 'AMZN': 1.1007984627298943,
 'ANET': 1.2559754336226965,
 'ANSS': 1.2882902885327538,
 'AON': 0.7725442207118989,
 'AOS': 0.8

In [None]:
print(list(loaded_data[low_risk[0]].columns))


['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume', 'Last_Dividend', 'Return', 'Log_Return', 'Rolling Std_252', 'Annualized Vol', 'Market_Cap', 'EPS', 'Book_Value', 'Debt_Ratio']


In [39]:
std_normalized = normalize(loaded_data, low_risk, 'Annualized Vol')

def compute_downside(data, stocks):
    return {
        stock: np.sqrt(np.mean(np.square(np.minimum(0, data[stock]['Return']))))
        for stock in stocks if 'Return' in data[stock]
    }

downside = compute_downside(loaded_data, low_risk)
downside_normalized = normalize({stock: {'Downside': val} for stock, val in downside.items()}, low_risk, 'Downside')

def compute_dividend_yield(data, stocks):
    return {
        stock: (data[stock]['Last_Dividend'] / data[stock]['Adj Close']).mean() * 4  
        for stock in stocks if 'Last_Dividend' in data[stock] and 'Adj Close' in data[stock]
    }

div_yield = compute_dividend_yield(loaded_data, low_risk)
div_yield_normalized = normalize({stock: {'Dividend Yield': val} for stock, val in div_yield.items()}, low_risk, 'Dividend Yield')

weights = {'std': 0.4, 'downside': 0.3, 'dividend_yield': 0.3}

low_risk_scores = {}
for stock in low_risk:
    low_risk_scores[stock] = (
        weights['std'] * std_normalized.get(stock, 0) +
        weights['downside'] * downside_normalized.get(stock, 0) +
        weights['dividend_yield'] * div_yield_normalized.get(stock, 0)
    )

low_risk_sorted = sorted(low_risk_scores.items(), key=lambda x: x[1])
low_risk_portfolio = {
    "Stocks": [stock for stock, score in low_risk_sorted],
    "Weights": {stock: 1 / low_risk_scores[stock] for stock, score in low_risk_sorted}
}

print("Low-Risk Portfolio:", low_risk_portfolio)


Low-Risk Portfolio: {'Stocks': ['JNJ', 'CL', 'PG', 'KO', 'PEP', 'KMB', 'GIS', 'K', 'BMY', 'MRK', 'DUK', 'GD', 'ED', 'VZ', 'CMS', 'ATO', 'HRL', 'SO', 'SJM', 'AEP', 'AEE', 'LNT', 'LMT', 'PEG', 'XEL', 'WEC', 'CAG', 'AMGN', 'ABBV', 'CPB', 'DTE', 'PM', 'GILD', 'PPL', 'MDT', 'NI', 'SRE', 'FAST', 'CME', 'EVRG', 'KHC', 'CSCO', 'IBM', 'EXC', 'ETR', 'PSA', 'PAYX', 'ES', 'HD', 'PNW', 'D', 'MAA', 'AFL', 'MO', 'EIX', 'CPT', 'CTAS', 'AVB', 'CNP', 'FE', 'AMCR', 'VRSN', 'T', 'TMUS'], 'Weights': {'JNJ': 57.01299011979528, 'CL': 19.33849095467841, 'PG': 13.514877836289859, 'KO': 10.1109463190782, 'PEP': 8.434543978342054, 'KMB': 5.142065684880143, 'GIS': 3.5354004666676206, 'K': 3.1887499897895877, 'BMY': 3.1238231077429788, 'MRK': 3.05957830711413, 'DUK': 3.0188558402269137, 'GD': 2.9948780626410882, 'ED': 2.972299202921044, 'VZ': 2.963213702864072, 'CMS': 2.821060016422224, 'ATO': 2.813577728067905, 'HRL': 2.583421232146726, 'SO': 2.5503528799232655, 'SJM': 2.5459409851211143, 'AEP': 2.539022484654189

In [45]:
mod_risk_filtered = [
    stock for stock in mod_risk
    if alpha.get(stock, 0) > 0 and
    0.3 <= debt_equity.get(stock, 0) <= 0.6 and
    not np.isnan(eps_growth_vol.get(stock, 0)) and
    not np.isnan(earnings_price.get(stock, 0)) and
    not np.isnan(book_val_price.get(stock, 0))
]

alpha_normalized = normalize({stock: {'Alpha': alpha[stock]} for stock in mod_risk_filtered}, mod_risk_filtered, 'Alpha')
debt_equity_normalized = normalize({stock: {'Debt-Equity': debt_equity[stock]} for stock in mod_risk_filtered}, mod_risk_filtered, 'Debt-Equity')
eps_growth_vol_normalized = normalize({stock: {'EPS Growth': eps_growth_vol[stock]} for stock in mod_risk_filtered}, mod_risk_filtered, 'EPS Growth')
earnings_price_normalized = normalize({stock: {'Earnings-Price': earnings_price[stock]} for stock in mod_risk_filtered}, mod_risk_filtered, 'Earnings-Price')
book_val_price_normalized = normalize({stock: {'Book-Value-Price': book_val_price[stock]} for stock in mod_risk_filtered}, mod_risk_filtered, 'Book-Value-Price')

weights = {
    'alpha': 0.3,
    'quality': 0.5, 
    'value': 0.2   
}

mod_risk_scores = {}
for stock in mod_risk_filtered:
    mod_risk_scores[stock] = (
        weights['alpha'] * alpha_normalized.get(stock, 0) +
        weights['quality'] * (
            0.5 * debt_equity_normalized.get(stock, 0) +
            0.5 * eps_growth_vol_normalized.get(stock, 0)
        ) +
        weights['value'] * (
            0.5 * earnings_price_normalized.get(stock, 0) +
            0.5 * book_val_price_normalized.get(stock, 0)
        )
    )

mod_risk_sorted = sorted(mod_risk_scores.items(), key=lambda x: x[1])
mod_risk_portfolio = {
    "Stocks": [stock for stock, score in mod_risk_sorted],
    "Weights": {stock: 1 / mod_risk_scores[stock] for stock, score in mod_risk_sorted}
}

print("Moderate-Risk Portfolio:", mod_risk_portfolio)


Moderate-Risk Portfolio: {'Stocks': ['J', 'TXN', 'VICI', 'ACN', 'ERIE', 'ROL', 'SNA', 'VMC', 'DLR', 'SYK', 'CTVA', 'CVX', 'WAB', 'FFIV', 'MSFT', 'GLW', 'KMI', 'PKG', 'JCI', 'HUBB', 'ETN', 'FI'], 'Weights': {'J': 5.302682331413695, 'TXN': 5.233673594395141, 'VICI': 4.825569522928701, 'ACN': 4.586619860179337, 'ERIE': 4.396359137876881, 'ROL': 4.223608692087033, 'SNA': 4.025993091370095, 'VMC': 3.760528405780273, 'DLR': 3.7399249528455716, 'SYK': 3.3717436960965075, 'CTVA': 3.073718748140415, 'CVX': 3.0130523103134266, 'WAB': 2.814529590480911, 'FFIV': 2.7939183276397968, 'MSFT': 2.7796778149821866, 'GLW': 2.6733353681153256, 'KMI': 2.4561708279272785, 'PKG': 2.3484109853009993, 'JCI': 2.2398957832862507, 'HUBB': 1.9234258790089125, 'ETN': 1.8231886768582914, 'FI': 1.781472771653772}}


In [47]:
high_risk_filtered = [
    stock for stock in high_risk
    if alpha.get(stock, 0) > 0 and
    beta.get(stock, 0) > 1 and
    not np.isnan(eps_growth_vol.get(stock, 0))
]

alpha_normalized = normalize({stock: {'Alpha': alpha[stock]} for stock in high_risk_filtered}, high_risk_filtered, 'Alpha')
beta_normalized = normalize({stock: {'Beta': beta[stock]} for stock in high_risk_filtered}, high_risk_filtered, 'Beta')
eps_growth_vol_normalized = normalize({stock: {'EPS Growth': eps_growth_vol[stock]} for stock in high_risk_filtered}, high_risk_filtered, 'EPS Growth')

weights = {
    'alpha': 0.2,
    'beta': 0.5,
    'eps_growth_vol': 0.3
}

high_risk_scores = {}
for stock in high_risk_filtered:
    high_risk_scores[stock] = (
        weights['alpha'] * alpha_normalized.get(stock, 0) +
        weights['beta'] * beta_normalized.get(stock, 0) +
        weights['eps_growth_vol'] * eps_growth_vol_normalized.get(stock, 0)
    )

high_risk_sorted = sorted(high_risk_scores.items(), key=lambda x: x[1])
high_risk_portfolio = {
    "Stocks": [stock for stock, score in high_risk_sorted],
    "Weights": {stock: 1 / high_risk_scores[stock] for stock, score in high_risk_sorted}
}

print("High-Risk Portfolio:", high_risk_portfolio)


High-Risk Portfolio: {'Stocks': ['EFX', 'POOL', 'CINF', 'FDX', 'HCA', 'NTAP', 'WELL', 'PTC', 'AMZN', 'ULTA', 'ODFL', 'HLT', 'NVR', 'IT', 'GDDY', 'IPG', 'MAR', 'PNR', 'MLM', 'IR', 'HPQ', 'GOOG', 'GOOGL', 'KIM', 'MSCI', 'GE', 'TRMB', 'RJF', 'BKNG', 'CBRE', 'ISRG', 'TDG', 'ADI', 'PFG', 'DRI', 'PWR', 'TXT', 'SPG', 'MS', 'SNPS', 'OKE', 'VTR', 'AXP', 'CDNS', 'PH', 'INTU', 'WFC', 'HWM', 'AVGO', 'AIG', 'AMP'], 'Weights': {'EFX': 31.186470360944675, 'POOL': 22.696342948542448, 'CINF': 22.171446530654766, 'FDX': 14.615040739248665, 'HCA': 12.080412506604963, 'NTAP': 9.452937392267327, 'WELL': 7.973310266269417, 'PTC': 7.902886974519319, 'AMZN': 7.684506922954736, 'ULTA': 7.348185350492125, 'ODFL': 7.222355130590109, 'HLT': 7.195523292404011, 'NVR': 6.955760746747836, 'IT': 6.643603534352826, 'GDDY': 6.330557992472789, 'IPG': 6.199043822011138, 'MAR': 6.18351233376272, 'PNR': 6.148593043841386, 'MLM': 5.865692696345388, 'IR': 5.731407583782211, 'HPQ': 5.705303352230057, 'GOOG': 5.671469339892927,

In [48]:
def compute_momentum(data, stocks, period=20): 
    momentum = {}
    for stock in stocks:
        if 'Adj Close' in data[stock]:
            prices = data[stock]['Adj Close']
            if len(prices) > period:
                momentum[stock] = (prices.iloc[-1] - prices.iloc[-period]) / prices.iloc[-period]
    return momentum


In [49]:
momentum = compute_momentum(loaded_data, very_high_risk)

very_high_risk_filtered = [
    stock for stock in very_high_risk
    if alpha.get(stock, 0) > 0 and
    beta.get(stock, 0) > 1.5 and
    not np.isnan(momentum.get(stock, 0))
]

alpha_normalized = normalize({stock: {'Alpha': alpha[stock]} for stock in very_high_risk_filtered}, very_high_risk_filtered, 'Alpha')
beta_normalized = normalize({stock: {'Beta': beta[stock]} for stock in very_high_risk_filtered}, very_high_risk_filtered, 'Beta')
momentum_normalized = normalize({stock: {'Momentum': momentum[stock]} for stock in very_high_risk_filtered}, very_high_risk_filtered, 'Momentum')

weights = {
    'alpha': 0.2,
    'beta': 0.5,
    'momentum': 0.3
}

very_high_risk_scores = {}
for stock in very_high_risk_filtered:
    very_high_risk_scores[stock] = (
        weights['alpha'] * alpha_normalized.get(stock, 0) +
        weights['beta'] * beta_normalized.get(stock, 0) +
        weights['momentum'] * momentum_normalized.get(stock, 0)
    )

very_high_risk_sorted = sorted(very_high_risk_scores.items(), key=lambda x: x[1])
very_high_risk_portfolio = {
    "Stocks": [stock for stock, score in very_high_risk_sorted],
    "Weights": {stock: 1 / very_high_risk_scores[stock] for stock, score in very_high_risk_sorted}
}

print("Very High-Risk Portfolio:", very_high_risk_portfolio)


Very High-Risk Portfolio: {'Stocks': ['NXPI', 'MCHP', 'TER', 'FCX', 'AMD', 'ENPH', 'UAL', 'SYF', 'MGM', 'AMAT', 'MPWR', 'KLAC', 'DFS', 'ON', 'RCL', 'NVDA', 'TSLA', 'CZR'], 'Weights': {'NXPI': 8.21563890546478, 'MCHP': 6.864308348784896, 'TER': 5.761356988359571, 'FCX': 4.968686341608956, 'AMD': 4.45411011855111, 'ENPH': 4.186375703035643, 'UAL': 4.041128749992507, 'SYF': 3.7788817375010684, 'MGM': 3.7502183890826433, 'AMAT': 3.719018756415534, 'MPWR': 3.6972007543112357, 'KLAC': 3.6947507134925726, 'DFS': 3.041678372805405, 'ON': 2.53734007538781, 'RCL': 2.108990304572877, 'NVDA': 1.8580738333253017, 'TSLA': 1.804915415723732, 'CZR': 1.6136989472809677}}


In [None]:
risk_free_rate = 0.04  

def evaluate_portfolio(portfolio, data):
    stocks = portfolio["Stocks"]
    weights = portfolio["Weights"]

    total_return = 0
    total_variance = 0
    total_beta = 0
    downside_risk = 0

    for stock in stocks:
        weight = weights[stock]
        stock_data = data[stock]

        annualized_return = stock_data['Return'].mean() * 252 
        total_return += weight * annualized_return
        
        variance = (stock_data['Return'].std() * np.sqrt(252))**2
        total_variance += weight**2 * variance

        total_beta += weight * beta[stock]
        
        negative_returns = np.minimum(0, stock_data['Return'])
        downside = np.sqrt(np.mean(negative_returns**2))
        downside_risk += weight * downside

    portfolio_std_dev = np.sqrt(total_variance)
    sharpe_ratio = (total_return - risk_free_rate) / portfolio_std_dev
    sortino_ratio = (total_return - risk_free_rate) / downside_risk
    treynor_ratio = (total_return - risk_free_rate) / total_beta

    return {
        "Annualized Return": total_return,
        "Standard Deviation": portfolio_std_dev,
        "Beta": total_beta,
        "Downside Deviation": downside_risk,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Treynor Ratio": treynor_ratio
    }

portfolio_evaluations = {
    "Low-Risk": evaluate_portfolio(low_risk_portfolio, loaded_data),
    "Moderate-Risk": evaluate_portfolio(mod_risk_portfolio, loaded_data),
    "High-Risk": evaluate_portfolio(high_risk_portfolio, loaded_data),
    "Very High-Risk": evaluate_portfolio(very_high_risk_portfolio, loaded_data)
}

portfolio_eval_df = pd.DataFrame(portfolio_evaluations).T
print(portfolio_eval_df)


                Annualized Return  Standard Deviation        Beta  \
Low-Risk                22.915939           13.364121  139.898053   
Moderate-Risk           15.197546            5.200960   71.340829   
High-Risk               72.571934           21.272275  347.544521   
Very High-Risk          21.935155            9.238252  113.437547   

                Downside Deviation  Sharpe Ratio  Sortino Ratio  Treynor Ratio  
Low-Risk                  2.444300      1.711743       9.358891       0.163519  
Moderate-Risk             1.015212      2.914375      14.930419       0.212467  
High-Risk                 4.977812      3.409693      14.571046       0.208698  
Very High-Risk            1.589623      2.370054      13.773804       0.193015  
