In [34]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
import yfinance as yf
import warnings
warnings.filterwarnings("ignore")

In [35]:
tickers = ["META", "NFLX", "GOOGL", "YELP", "COCO"]
ticker_data = yf.download(tickers = tickers, start = "2018-01-01", end = "", auto_adjust = True, progress = False, group_by = "Ticker")

rows = []
for sym in tickers:
    px = ticker_data[sym]["Close"].reset_index()
    px["firm_id"] = sym
    rows.append(px)
px_all = pd.concat(rows)
px_all

Unnamed: 0,Date,Close,firm_id
0,2018-01-02,180.161499,META
1,2018-01-03,183.388977,META
2,2018-01-04,183.051346,META
3,2018-01-05,185.553864,META
4,2018-01-08,186.973938,META
...,...,...,...
2018,2026-01-13,55.279999,COCO
2019,2026-01-14,51.990002,COCO
2020,2026-01-15,52.709999,COCO
2021,2026-01-16,53.540001,COCO


In [36]:
px_all["month"] = px_all["Date"].dt.to_period("M")
px_group = px_all.groupby(["firm_id", "month"])["Close"].agg(["first", "last"]).reset_index()
px_group["ret_1m"] = px_group["last"] / px_group["first"] - 1
###
px_all["ret_daily"] = px_all["Close"].pct_change()
vol_1m = px_all.groupby(["firm_id", "month"])["ret_daily"].std().rename("vol_1m").reset_index()
equity_m = px_group.merge(vol_1m, on = ["firm_id", "month"])
equity_m = equity_m[["firm_id", "month", "ret_1m", "vol_1m"]]

In [37]:
ret = equity_m.pivot(index="month", columns="firm_id", values="ret_1m")
vol = equity_m.pivot(index="month", columns="firm_id", values="vol_1m")

combined = pd.concat({"ret_1m": ret, "vol_1m": vol}, axis=1)

# NOTE: calling method, for example BAC -> bac_vol = combined[("vol_1m", "BAC")] -> remember this call method

combined

Unnamed: 0_level_0,ret_1m,ret_1m,ret_1m,ret_1m,ret_1m,vol_1m,vol_1m,vol_1m,vol_1m,vol_1m
firm_id,COCO,GOOGL,META,NFLX,YELP,COCO,GOOGL,META,NFLX,YELP
month,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2018-01,,0.101574,0.030151,0.344308,0.013413,0.000000,0.086399,0.015685,0.216028,0.189839
2018-02,,-0.065733,-0.076493,0.099257,-0.048908,0.000000,0.025589,0.024881,0.028369,0.040960
2018-03,,-0.031986,-0.091793,0.017080,-0.029295,0.000000,0.021870,0.025404,0.033000,0.018040
2018-04,,0.005876,0.106892,0.114774,0.101695,0.000000,0.017595,0.026855,0.029001,0.013965
2018-05,,0.056930,0.103071,0.122247,-0.037079,0.000000,0.013249,0.010273,0.012735,0.028352
...,...,...,...,...,...,...,...,...,...,...
2025-09,0.156590,0.151253,-0.000319,-0.012511,-0.006053,0.030073,0.023575,0.011060,0.014413,0.010765
2025-10,-0.029003,0.148183,-0.096175,-0.044444,0.048315,0.031947,0.016920,0.028338,0.025906,0.030779
2025-11,0.308427,0.128507,0.016057,-0.022080,-0.125529,0.023962,0.024689,0.018255,0.021580,0.030414
2025-12,-0.021414,-0.005352,0.030831,-0.140841,0.061845,0.014790,0.013799,0.013409,0.017757,0.012687


In [38]:
equity_m

Unnamed: 0,firm_id,month,ret_1m,vol_1m
0,COCO,2018-01,,0.000000
1,COCO,2018-02,,0.000000
2,COCO,2018-03,,0.000000
3,COCO,2018-04,,0.000000
4,COCO,2018-05,,0.000000
...,...,...,...,...
480,YELP,2025-09,-0.006053,0.010765
481,YELP,2025-10,0.048315,0.030779
482,YELP,2025-11,-0.125529,0.030414
483,YELP,2025-12,0.061845,0.012687


In [39]:
def fundamentals(ticker):
    output = []
    for sym in ticker:
        info = yf.Ticker(sym).get_info()
        rows = {"firm_id": sym, "Sector": info.get("sector"), "Industry": info.get("industry"), "ROE": info.get("returnOnEquity"), 
                "ROA": info.get("returnOnAssets"), "Profit Margin": info.get("profitMargins"), "debtToEquity": info.get("debtToEquity"), "Beta": info.get("beta"), "EBITDA Margins": info.get("ebitdaMargins"), "GrossMargins": info.get("grossMargins")}
        output.append(rows)
    out = pd.DataFrame(output)
    return out
fund_tab = fundamentals(tickers)      
fund_tab

Unnamed: 0,firm_id,Sector,Industry,ROE,ROA,Profit Margin,debtToEquity,Beta,EBITDA Margins,GrossMargins
0,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013
1,NFLX,Communication Services,Entertainment,0.42861,0.1474,0.24047,65.822,1.711,0.29899,0.48085
2,GOOGL,Communication Services,Internet Content & Information,0.3545,0.16276,0.32233,11.424,1.086,0.37661,0.59172
3,YELP,Communication Services,Internet Content & Information,0.20374,0.12425,0.10227,3.737,0.553,0.14727,0.90468
4,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.1176,0.11351,4.566,0.37,0.12714,0.35997


In [40]:
fund_tab_px = fund_tab.merge(equity_m, on = "firm_id")
fund_tab_px

Unnamed: 0,firm_id,Sector,Industry,ROE,ROA,Profit Margin,debtToEquity,Beta,EBITDA Margins,GrossMargins,month,ret_1m,vol_1m
0,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-01,0.030151,0.015685
1,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-02,-0.076493,0.024881
2,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-03,-0.091793,0.025404
3,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-04,0.106892,0.026855
4,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-05,0.103071,0.010273
...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-09,0.156590,0.030073
481,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-10,-0.029003,0.031947
482,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-11,0.308427,0.023962
483,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-12,-0.021414,0.014790


In [41]:
rf_annual = 0.4
rf_monthly = rf_annual / 12

In [42]:

bench_data = yf.download(tickers = "SPY", start  = "2018-01-01", end = "", progress = False, auto_adjust = True, group_by = "Ticker")

px_b = bench_data["SPY"]["Close"].reset_index()
px_b["Bench"] = "SPY"
px_b["month"] = px_b["Date"].dt.to_period("M")
px_b_group = px_b.groupby(["Bench", "month"])["Close"].agg(["first", "last"]).reset_index()
px_b_group["ret_1m"] = px_b_group["last"] / px_b_group["first"] - 1

###
exp_firms = combined["ret_1m"] - rf_monthly # We must set up a combined pivot to use this formula -> without a 3D df,  we would simply return a series (which we can't use cov and var => no indexes)
exp_b = px_b_group.set_index("month")["ret_1m"] - rf_monthly #Set up a month index

###
beta = {}
for sym in tickers:
    beta[sym] = exp_firms[sym].cov(exp_b) / exp_b.var()
beta_series = pd.DataFrame(beta, index = ["Beta"])
beta_series_T = beta_series.T
beta_series_T = beta_series_T.sort_values(by = "Beta")

beta_series_complete = beta_series_T.T
beta_series_complete

Unnamed: 0,COCO,GOOGL,YELP,NFLX,META
Beta,0.177473,1.096137,1.29717,1.304938,1.352242


In [43]:
fund_tab_px

Unnamed: 0,firm_id,Sector,Industry,ROE,ROA,Profit Margin,debtToEquity,Beta,EBITDA Margins,GrossMargins,month,ret_1m,vol_1m
0,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-01,0.030151,0.015685
1,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-02,-0.076493,0.024881
2,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-03,-0.091793,0.025404
3,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-04,0.106892,0.026855
4,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-05,0.103071,0.010273
...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-09,0.156590,0.030073
481,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-10,-0.029003,0.031947
482,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-11,0.308427,0.023962
483,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-12,-0.021414,0.014790


In [44]:


fund_tab_px["mom_ret_6M"] = fund_tab_px.groupby("firm_id")["ret_1m"].transform(lambda s: (1 + s).rolling(6).apply(np.prod) - 1)
fund_tab_px["vol_6M_ret"] = fund_tab_px.groupby("firm_id")["ret_1m"].transform(lambda s: s.rolling(6).std())
fund_tab_px["vol_6M_spikes"] = fund_tab_px.groupby("firm_id")["vol_1m"].transform(lambda s: s.rolling(6).std())

def maxdd(x):
    cumret = (1+x).cumprod()
    peak = cumret.cummax()
    dd = cumret/peak - 1
    maxdd = dd.min()
    return maxdd
    
fund_tab_px["Max_dd_6M"] = fund_tab_px.groupby("firm_id")["ret_1m"].transform(lambda s: s.rolling(6).apply(maxdd))
fund_tab_px

Unnamed: 0,firm_id,Sector,Industry,ROE,ROA,Profit Margin,debtToEquity,Beta,EBITDA Margins,GrossMargins,month,ret_1m,vol_1m,mom_ret_6M,vol_6M_ret,vol_6M_spikes,Max_dd_6M
0,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-01,0.030151,0.015685,,,,
1,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-02,-0.076493,0.024881,,,,
2,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-03,-0.091793,0.025404,,,,
3,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-04,0.106892,0.026855,,,,
4,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,2018-05,0.103071,0.010273,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-09,0.156590,0.030073,0.374201,0.068102,0.009748,-0.042108
481,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-10,-0.029003,0.031947,0.212418,0.071683,0.006343,-0.042108
482,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-11,0.308427,0.023962,0.491865,0.134027,0.006044,-0.042108
483,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.11760,0.11351,4.566,0.370,0.12714,0.35997,2025-12,-0.021414,0.014790,0.416242,0.139110,0.006928,-0.029003


In [53]:
df_new = fund_tab_px

fwd = df_new.sort_values(["firm_id", "month"]).groupby("firm_id")
fwd_dd_6M = fwd['Max_dd_6M'].shift(-6)
fwd_ret_mom_6M = fwd["mom_ret_6M"].shift(-6)
fwd_vol_6M_ret = fwd["vol_6M_ret"].shift(-6)


df_new["Market Stress"] = ((fwd_dd_6M <= -0.30) 
                      | ((fwd_ret_mom_6M <= -0.25) & (fwd_vol_6M_ret > fwd_vol_6M_ret.groupby(df_new["month"]).transform(lambda s: s.quantile(0.8)))))

Signal_columns = ["ret_1m", "vol_1m", "mom_ret_6M", "vol_6M_spikes", "vol_6M_ret", "Max_dd_6M", 
                  "ROA", "ROE", "Profit Margin", "EBITDA Margins", "GrossMargins"] # No debt_to_equity since the current deflation of the AI bubble had made this data rather unreliable.

def z_score_cal(s,m):
    g = s.groupby(m) # Group by month to conduct a Z_score -> since we are doing monthly returns
    Z = s - g.transform(lambda s: s.mean()) / g.transform(lambda s: s.std())
    return Z
    
for c in Signal_columns: 
    df_new[f"z_{c}"] = z_score_cal(df_new[c], df_new["month"])

X_cols = ["z_ret_1m", "z_vol_1m", "z_mom_ret_6M", "z_vol_6M_ret", "vol_6M_spikes", "z_Max_dd_6M", "z_ROA", "z_ROE", "z_Profit Margin", "z_EBITDA Margins", "z_GrossMargins"]

model_df = df_new.dropna(subset = X_cols + ["Market Stress"]) # MAKE SURE TO CLEAN THE DF

train_indx = model_df[model_df["month"] <= "2022-01-01"]
test_indx = model_df[model_df["month"] >= "2022-01-01"]

model = LogisticRegression(class_weight="balanced")
##
cal = CalibratedClassifierCV(model, cv = 3)
cal.fit(train_indx[X_cols], train_indx["Market Stress"])
##
Probability_Market_Stress = cal.predict_proba(test_indx[X_cols])
Probability_Market_Stress

array([[0.90927684, 0.09072316],
       [0.89985994, 0.10014006],
       [0.91085156, 0.08914844],
       [0.87572172, 0.12427828],
       [0.92381951, 0.07618049],
       [0.94535471, 0.05464529],
       [0.94494015, 0.05505985],
       [0.92426834, 0.07573166],
       [0.93009368, 0.06990632],
       [0.86205515, 0.13794485],
       [0.92041331, 0.07958669],
       [0.93743964, 0.06256036],
       [0.92914864, 0.07085136],
       [0.89842565, 0.10157435],
       [0.91789239, 0.08210761],
       [0.87468479, 0.12531521],
       [0.83061062, 0.16938938],
       [0.92750498, 0.07249502],
       [0.94298858, 0.05701142],
       [0.87842265, 0.12157735],
       [0.8806033 , 0.1193967 ],
       [0.92626396, 0.07373604],
       [0.93768984, 0.06231016],
       [0.94269649, 0.05730351],
       [0.90440918, 0.09559082],
       [0.87197744, 0.12802256],
       [0.92158569, 0.07841431],
       [0.92951724, 0.07048276],
       [0.92145159, 0.07854841],
       [0.91731746, 0.08268254],
       [0.

In [54]:
present_df = model_df.copy()
present_time = present_df["month"].max()
present_df = present_df[present_df["month"] == present_time]

Probability_Market_Stress_present = cal.predict_proba(present_df[X_cols])[:,1]
present_df["Probability of Market Stress"] = Probability_Market_Stress_present
###
present_df["Probability of Market Stress"] = (present_df["Probability of Market Stress"] * 100).map("{:.2f}%".format)
present_df = present_df.drop(columns = "Market Stress")
present_df

Unnamed: 0,firm_id,Sector,Industry,ROE,ROA,Profit Margin,debtToEquity,Beta,EBITDA Margins,GrossMargins,...,z_vol_6M_spikes,z_vol_6M_ret,z_Max_dd_6M,z_ROA,z_ROE,z_Profit Margin,z_debtToEquity,z_EBITDA Margins,z_GrossMargins,Probability of Market Stress
96,META,Communication Services,Internet Content & Information,0.32643,0.18013,0.30892,26.311,1.287,0.51937,0.82013,...,-4.022117,-2.322746,1.015971,-5.428511,-3.121626,-1.765652,25.447916,-1.277333,-1.947619,9.54%
193,NFLX,Communication Services,Entertainment,0.42861,0.1474,0.24047,65.822,1.711,0.29899,0.48085,...,-4.022708,-2.313009,0.897276,-5.461241,-3.019446,-1.834102,64.958916,-1.497713,-2.286899,11.15%
290,GOOGL,Communication Services,Internet Content & Information,0.3545,0.16276,0.32233,11.424,1.086,0.37661,0.59172,...,-4.022887,-2.304539,1.131621,-5.445881,-3.093556,-1.752242,10.560916,-1.420093,-2.176029,9.23%
387,YELP,Communication Services,Internet Content & Information,0.20374,0.12425,0.10227,3.737,0.553,0.14727,0.90468,...,-4.018955,-2.301164,0.998805,-5.484391,-3.244316,-1.972302,2.873916,-1.649433,-1.863069,13.31%
484,COCO,Consumer Defensive,Beverages - Non-Alcoholic,0.24021,0.1176,0.11351,4.566,0.37,0.12714,0.35997,...,-4.022164,-2.240556,1.107971,-5.491041,-3.207846,-1.961062,3.702916,-1.669563,-2.407779,11.57%


In [47]:
# from sklearn.metrics import (
#     roc_auc_score,
#     average_precision_score,
#     precision_recall_fscore_support,
#     confusion_matrix,
#     brier_score_loss
# )

# p = Probability_distress[:, 1]
# y = test_indx["Distress"].astype(int)

# # core metrics
# print("ROC-AUC :", round(roc_auc_score(y, p), 4))
# print("PR-AUC  :", round(average_precision_score(y, p), 4))
# print("Brier   :", round(brier_score_loss(y, p), 4))

# # threshold metrics
# th = 0.5
# pred = (p >= th).astype(int)
# prec, rec, f1, _ = precision_recall_fscore_support(y, pred, average="binary", zero_division=0)

# print("\n(threshold =", th, ")")
# print("Precision:", round(prec, 4))
# print("Recall   :", round(rec, 4))
# print("F1       :", round(f1, 4))
# print("CM       :\n", confusion_matrix(y, pred))

KeyError: 'Distress'

In [62]:
present_df.sort_values("Probability of Market Stress", ascending=False)[
    ["firm_id","Probability of Market Stress","debtToEquity","ROA","Profit Margin","Max_dd_6M"]
].head(15)

Unnamed: 0,firm_id,Probability of Market Stress,debtToEquity,ROA,Profit Margin,Max_dd_6M
96,META,9.54%,26.311,0.18013,0.30892,-0.121003
290,GOOGL,9.23%,11.424,0.16276,0.32233,-0.005352
387,YELP,13.31%,3.737,0.12425,0.10227,-0.138168
484,COCO,11.57%,4.566,0.1176,0.11351,-0.029003
193,NFLX,11.15%,65.822,0.1474,0.24047,-0.239697
