<a href="https://colab.research.google.com/github/csciulla/stress-test-dashboard/blob/main/stresstest_ntbk.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [460]:
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.optimize import minimize
from hmmlearn.hmm import GaussianHMM

In [None]:
class Portfolio:
  def __init__(self, portfolio:list,  lower_bound:float, upper_bound:float):
    try:
      if lower_bound >= upper_bound:
        raise ValueError("Lower bound must be less than upper bound.")

      self.portfolio = portfolio
      self.weights = None
      self.dfclose = None
      self.lower_bound = lower_bound
      self.upper_bound = upper_bound

    except Exception as e:
      print(f"Error in intializer function: {e}")
      return None

  def get_data(self, period:str=None, start_date:str=None, end_date:str=None):
    """
    Downloads the portfolios adjusted closes either by 'period' or 'start_date' and 'end_date'.
    Only one method of date input should be provided.
    Data downloaded should be big enough to handle calculations.
    """
    try:
      if period and (start_date or end_date): #checks if both methods of date input are used
        raise ValueError("Provide either 'period' OR both 'start_date' and 'end_date' -- not both.")

      if period:
        period = period.strip()
        self.dfclose = yf.download(self.portfolio, period=period, progress=False, auto_adjust=False)["Adj Close"]
      elif start_date and end_date:
        start_date = start_date.strip()
        end_date = end_date.strip()
        self.dfclose = yf.download(self.portfolio, start=start_date, end=end_date, progress=False, auto_adjust=False)["Adj Close"]
      else:
        raise ValueError("You must provide either a 'period' or both 'start_date' and 'end_date'.")

      if self.dfclose.empty or self.dfclose is None:
        raise ValueError("Downloaded price data is empty or unavailable.")
      elif len(self.dfclose) <= 2:
        raise ValueError("Downloaded price data is too short.")
      elif len(self.dfclose) < 21: #average trading days in a month
        print("Warning: Limited price history may lead to unreliable metrics.")

      return self.dfclose

    except Exception as e:
      print(f"Error in get_data: {e}")
      return None

  def get_weights(self, type_weight:str):
    """
    Returns a list of weights for the portfolio.

    type_weight: Input 'eq' for equal-weighted portfolio or 'opt' for optimized weights based on sharpe-ratio
    """
    try:
      dfclose = self.dfclose
      if dfclose is None or dfclose.empty:
        raise ValueError("The portfolio's price data is missing. Please properly run 'get_data' first.")
      elif len(dfclose) <= 2:
        raise ValueError("Downloaded price data is too short.")

      #Get log returns of each asset
      log_returns = np.log(dfclose/dfclose.shift()).dropna()

      #Calculate initial portfolio metrics
      weights = np.repeat(1/len(self.portfolio), len(self.portfolio))
      expected_returns = log_returns.mean()*252
      port_returns = weights.T @ expected_returns
      cov_matrix = log_returns.cov()*252
      port_vol = np.sqrt(weights.T @ cov_matrix @ weights)
      rf = 0.045

      #Set bounds and constraints for objective function
      bounds = [(self.lower_bound, self.upper_bound) for _ in range(len(self.portfolio))]
      constraints = {"type": "eq", "fun": lambda w: np.sum(w)-1}
      def neg_sharpe(w):
        port_ret = w.T @ expected_returns
        port_std = np.sqrt(w.T @ cov_matrix @ w)
        return -((port_ret - rf)/port_std)

      if type_weight.strip().lower() == "eq":
        self.weights = [float(i) for i in weights]
      elif type_weight.strip().lower() == "opt":
        optimized_weights = minimize(neg_sharpe, weights, method="SLSQP", bounds=bounds, constraints=constraints)
        self.weights = [round(float(i),4) for i in optimized_weights.x]
      else:
        raise ValueError("Select a valid input for 'type_weight' -- either 'eq' or 'opt'.")

      return self.weights

    except Exception as e:
      print(f"Error in get_weights: {e}")
      return None


In [595]:
#test weights
test = Portfolio(["AAPL", "MSFT", "GOOG", "JNJ", "XOM"], 0.0, 0.5)
test.get_data('5y')
test.get_weights("opt")

[0.0, 0.0501, 0.0, 0.4499, 0.5]

In [604]:
def monte_carlo(T:int, sims:int, weights:list, df:pd.DataFrame, regime:str, level:str, factorReturns:np.array=None, rand:bool=None ):
  """
  Returns simulated portfolio returns using Monte Carlo Simulation.

  T: number of days in a path
  sims: number of paths
  weights: list of asset weights
  df: dataframe of the assets adjusted closes
  regime: determines how much or how little the portfolio is affected by the crisis event    
  level: scale of the crisis event
  rand: input the boolean True to return a random path, otherwise ignore

  regime options: 'Low', 'Medium', 'High'
  level options: 'Mild', 'Moderate', 'Severe', 'Tail Risk', 'Regulatory'
  """
  try:
    if T <= 2:
      raise ValueError("The length of each simulated path is too short.")
    elif T < 21:
      print("Warning: Limited price data may lead to unreliable metrics.")

    #Intialize dictionary to store simulated paths of T days for each ticker
    tickers = list(df.columns) + ['SPY']
    weights = weights + [0.000]
    sims_returns = {ticker: np.full(shape=(T, sims), fill_value=0.0) for ticker in tickers}
    
    #Correspond regime with scaling factor
    regime = regime.strip().capitalize()
    level = level.strip().capitalize()
    factorDict = {"Mild": 1.0,
                  "Moderate": 1.3,
                  "Severe": 1.7,
                  "Tail risk": 2.0,
                  "Regulatory": 2.5}
    scaling_factor = factorDict[level]

    #Calculate log returns and align with market returns
    start_date = pd.to_datetime(df.index[0])
    end_date = pd.to_datetime(df.index[-1])
    market = yf.download('SPY', start=start_date, end=end_date, progress=False, auto_adjust=False)['Adj Close']
    market_returns = np.log(market/market.shift()).dropna()
    log_returns = np.log(df/df.shift()).dropna()
    aligned_index = log_returns.index.intersection(market_returns.index)
    market_returns = market_returns.loc[aligned_index]
    log_returns = log_returns.loc[aligned_index]
    log_returns['SPY'] = market_returns

    #Create mean matrix 
    if factorReturns is not None:
      meanM = np.full(shape=(T, len(tickers)), fill_value=factorReturns)
    else:
      expected_return = log_returns.mean()
      meanM = np.full(shape=(T, len(tickers)), fill_value=expected_return)

    #Initalize HMM
    port_returns = (log_returns @ weights).values.reshape(-1,1) #HMM requires 2D array
    historical_port_vol = np.std(port_returns)
    model = GaussianHMM(n_components=3, covariance_type="full", n_iter=1000, random_state=42)
    model.fit(port_returns)

    #Gather the volatility regimes established by the HMM and correspond them with their respective state
    vol_states = ["Low","Medium","High"]
    vol_regimes = np.sqrt([var[0][0] for var in model.covars_])
    vol_regimes = np.sort(vol_regimes)
    vol_dict = {state: vol for state, vol in zip(vol_states, vol_regimes)}

    #Calculate the scale factor needed for the historical data to reach the desired volatility and then apply it to L
    desired_vol = vol_dict[regime]*scaling_factor
    vol_scale_factor = desired_vol / historical_port_vol
    cov_matrix = log_returns.cov()* (vol_scale_factor**2)
    L = np.linalg.cholesky(cov_matrix)

    #Generate paths
    for m in range(sims):
      Z = np.random.normal(size=(T, len(tickers)))
      dailyReturns = meanM + Z @ L.T
      for i, ticker in enumerate(tickers):
        sims_returns[ticker][:,m] = dailyReturns[:,i]

    #Get a random path
    if rand:
      random_int = np.random.randint(0,sims)
      random_sims_returns = {ticker: sims_returns[ticker][:,random_int] for ticker in tickers}
      random_sims_df = pd.DataFrame(random_sims_returns)
      return random_sims_df
    elif rand != None:
      raise ValueError("Invaild input for 'rand'. Input the string 'yes' to return a random path, otherwise ignore.")
    else:
        return sims_returns

  except Exception as e:
    print(f"Error in monte_carlo: {e}")
    return None

In [None]:
df = test.get_data('5y')
monte_carlo(T=25, sims=5, weights=[0.0, 0.5, 0.1916, 0.3084], df=df, regime="Low", level='Mild', rand=True)

Unnamed: 0,AAPL,AMZN,GOOG,META,SPY
0,-0.00432,0.009421,0.004009,0.022391,8.5e-05
1,0.001009,-0.00666,0.003171,0.003998,-0.001092
2,-0.01069,0.007693,0.005137,0.008174,0.001524
3,0.025747,0.006577,0.004364,0.017804,0.009827
4,-0.004544,-0.006421,-0.005521,0.00021,-0.005997
5,0.00336,0.01514,-0.005773,-0.002457,0.006004
6,0.029339,0.005541,0.025235,0.019597,0.009682
7,-0.014329,-0.025169,0.001756,-0.003922,-0.001783
8,-0.003135,-0.001377,0.017513,0.023794,0.002498
9,0.011489,0.001355,-0.004409,0.004214,0.004298


In [420]:
def calculate_metrics(weights:list, df:pd.DataFrame):
  """
  Calculates annual portfolio volatilty, Sharpe Ratio, 95% VaR, Max Drawdown, and Beta.
  weights: list of each assets weight in the portfolio
  df: dataframe of the assets adjusted closes or simulated log_returns
  """
  try:
    if df is None or df.empty:
      raise ValueError("Price data is empty or unavailable. Make sure historical/simulated data is properly downloaded.")

    #Core calculations
    if df.iloc[0,0] < 1:
      log_returns = df
      weights = weights + [0.000]
    else:
      log_returns = np.log(df/df.shift()).dropna()

    tickers = list(df.columns)
    weights = np.array(weights)
    expected_returns = log_returns.mean()*252
    cov_matrix = log_returns.cov()*252
    rf = 0.045
    port_returns = weights.T @ expected_returns
    port_returns_series = log_returns @ weights

    #Metrics
    port_vol = np.sqrt(weights.T @ cov_matrix @ weights)
    sharpe = (port_returns - rf)/port_vol
    VaR_95 = np.percentile(port_returns_series, 5)
    CVaR_95 = port_returns_series[port_returns_series <= VaR_95].mean()

    #Max Drawdown
    cum_returns = (1+port_returns_series).cumprod()
    cum_max = np.maximum.accumulate(cum_returns)
    drawdown = cum_returns/cum_max - 1
    mdd = drawdown.min() #drawdown values are negative

    #Beta
    if pd.api.types.is_integer_dtype(port_returns_series.index):
      #Simulated case: align by length
      market_returns = df['SPY']
    else:
      market = yf.download("SPY", period='max', progress=False, auto_adjust=False)["Adj Close"]
      market_returns = (np.log(market/market.shift()).dropna()).squeeze() #convert to series so that it works properly with port_returns_series
      
      #Simulated historical case: align by date
      start_date = pd.to_datetime(port_returns_series.index[0])
      end_date = pd.to_datetime(port_returns_series.index[-1])
      if start_date and end_date not in market_returns.index: #first make sure that market data contains crisis event
        market = yf.download("SPY", start=start_date, end=end_date, progress=False, auto_adjust=False)["Adj Close"]
        market_returns = (np.log(market/market.shift()).dropna()).squeeze()

      #align by date for either simulated historical or historical case
      aligned_index = port_returns_series.index.intersection(market_returns.index)
      market_returns = market_returns.loc[aligned_index]
      port_returns_series = port_returns_series.loc[aligned_index]
    beta = port_returns_series.cov(market_returns) / market_returns.var()

    #Calculate PCR
    PCRdict = {}
    if 'SPY' in tickers:
      tickers = tickers[:-1] #Remove 'SPY' 
      weights = weights[:-1]
    for i, ticker in enumerate(tickers):
      ticker_vol = np.std(log_returns[ticker]) * np.sqrt(252)
      ticker_corr = log_returns[ticker].corr(port_returns_series)
      MRC = ticker_vol*ticker_corr
      PCR = (weights[i]*MRC)/port_vol
      PCRdict[ticker] = (f"{PCR*100:.2f}%")
    PCRframe = pd.DataFrame(data=PCRdict, index=["PCR"])

    metrics = pd.DataFrame(data=[[port_vol, sharpe, VaR_95, CVaR_95, mdd, beta]] ,columns=["Annual Volatilty", "Sharpe","95% VaR", "95% CVaR", "Max DD", "Beta"], index=["Portfolio"])
    return metrics, PCRframe

  except Exception as e:
    print(f"Error in calculate_metrics: {e}")
    return None


In [417]:
df = test.get_data('5y')
rand = monte_carlo(100,100,[0.353, 0.0, 0.4469, 0.2001], df, 'Medium', 'Tail risk', rand=True)
calculate_metrics([0.353, 0.0, 0.4469, 0.2001], rand)

(           Annual Volatilty    Sharpe   95% VaR  95% CVaR    Max DD     Beta
 Portfolio          0.470834 -2.919566 -0.047641 -0.076213 -0.527386  1.19959,
        AAPL   AMZN    GOOG    META
 PCR  32.16%  0.00%  46.77%  20.58%)

In [415]:
#test the simulated max and min sharpe metrics
df = test.get_data('10y')
mc = monte_carlo(504,50,[0.353, 0.0, 0.4469, 0.2001], df, regime="High", level='Moderate')
tickers = list(df.columns) + ['SPY']
sims = len(mc[tickers[0]][0])
all_metrics = []
all_PCR = []
for m in range(sims):
  mth_df = pd.DataFrame({ticker: mc[ticker][:,m] for ticker in tickers})
  metrics = calculate_metrics([0.353, 0.0, 0.4469, 0.2001], mth_df)
  all_metrics.append(metrics[0])
  all_PCR.append(metrics[1])
sharpes = [df.loc["Portfolio", "Sharpe"] for df in all_metrics]
min_idx = np.argmin(sharpes)
max_idx = np.argmax(sharpes)
all_metrics[min_idx].index = ["Worst Portfolio"]
all_metrics[max_idx].index = ["Best Portfolio"]
print(all_metrics[min_idx])
print(all_PCR[min_idx])
print(all_metrics[max_idx])
print(all_PCR[max_idx])

                 Annual Volatilty    Sharpe  95% VaR    Max DD      Beta
Worst Portfolio          0.579095 -1.586592 -0.06329 -0.887424  1.176545
       AAPL   AMZN    GOOG    META
PCR  31.98%  0.00%  44.12%  23.81%
                Annual Volatilty    Sharpe   95% VaR    Max DD      Beta
Best Portfolio           0.56331  2.447176 -0.053075 -0.367143  1.172476
       AAPL   AMZN    GOOG    META
PCR  33.08%  0.00%  42.89%  23.94%


In [421]:
def historical(df:pd.DataFrame, crisis:str):
  """
  Simulates the prices of your portfolio if a historical event were to happen again.

  df: dataframe of the assets adjusted closes
  crisis: string of the event you want to simulate

  Crisis Options:
  "DOT-COM" -- The Dot-Com bubble
  "2008 GFC" -- 2008 Global Financial Crisis
  "2011 Euro" -- 2011 Eurozone Crisis
  "COVID" -- COVID-19 Pandemic
  "2022 Inf" -- 2022 Inflation Crash
  """
  try:
    crisis_periods = {"DOT-COM": ("2000-03-01", "2002-10-01"),
                      "2008 GFC": ("2007-10-01", "2009-03-01"),
                      "2011 Euro": ("2011-07-01", "2011-12-01"),
                      "COVID": ("2020-02-14", "2020-04-15"),
                      "2022 Inf": ("2022-01-01", "2022-10-01")
                      }
    crisis = crisis.strip()
    if crisis not in crisis_periods.keys():
      raise ValueError("Input a valid crisis event.")

    tickers = list(df.columns)
    start_date = pd.to_datetime(crisis_periods[crisis][0])
    end_date = pd.to_datetime(crisis_periods[crisis][1])

    if start_date not in df.index: #check if crisis event does not exist in existing df
      dfcrisis = yf.download(tickers, start=start_date, end=end_date, progress=False, auto_adjust=False)["Adj Close"]
    else:
      dfcrisis = df.loc[start_date:end_date]

    for ticker in tickers:
      if dfcrisis[ticker].isna().sum() >= len(dfcrisis[ticker])//3: #checks if any ticker reaches NA threshold
        raise ValueError(f"{ticker} price data does not exist for crisis period.")

    last_price = df.iloc[-1]
    crisisReturns = np.log(dfcrisis/dfcrisis.shift()).dropna()
    cumReturns = (1+crisisReturns).cumprod()
    crisisPrices = last_price.mul(cumReturns)
    return crisisPrices

  except Exception as e:
    print(f" \n Error in historical: {e}")
    return None

In [422]:
df = yf.download(["AAPL", "MSFT", "GOOG", "JNJ", "XOM"], period='10y', auto_adjust=False)["Adj Close"]
hist = historical(df, "COVID")
calculate_metrics([0.0, 0.056, 0.0, 0.444, 0.5], hist)

[*********************100%***********************]  5 of 5 completed


(           Annual Volatilty    Sharpe   95% VaR  95% CVaR    Max DD      Beta
 Portfolio          0.829948 -2.356408 -0.106502 -0.126618 -0.439156  1.097892,
       AAPL   GOOG    JNJ    MSFT     XOM
 PCR  0.00%  4.41%  0.00%  44.53%  49.81%)

In [587]:
def factor_stress(df:pd.DataFrame, factors:list, shocks:list):
    """
    df: dataframe of the assets historical adjusted closes
    factors: list of strings of the factors to include in the multi-factor model
    shocks: list of floats the determine the shock to each factor; corresponds to the order of the factors parameter.
            (e.g., [0.4, -0.2] = 40% shock to SMB, -20% to HML)
    """
    try:
        if factors == ['FF3']:
            factors = ['Mkt-RF', 'SMB', 'HML']
        elif factors == ['FF5']:
            factors = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']

        log_returns = np.log(df/df.shift()).dropna()
        FFdf = pd.read_csv('../data/F-F_Research_Data_5_Factors_2x3_daily.csv', skiprows=3, index_col=0)
        FFdf = FFdf.iloc[:-1]
        FFdf.index = pd.to_datetime(FFdf.index)
        aligned_index = FFdf.index.intersection(log_returns.index) #FF data only goes up to 05-30-2025 currently
        factors_df = FFdf.loc[aligned_index, factors]
        log_returns = log_returns.loc[aligned_index]
        rf = FFdf.loc[aligned_index, 'Mkt-RF'].copy()
        rf = rf/100

        results = {}
        stressed_returns = {}
        tickers = list(df.columns)
        for ticker in tickers:
            excess_returns = log_returns[ticker] - rf
            X = factors_df/100
            X = sm.add_constant(X)
            y = excess_returns

            model = sm.OLS(y, X).fit()
            results[ticker] = {
                'alpha': model.params['const'],
                'betas': model.params.drop('const').to_dict(),
                'r-squared': model.rsquared
            }
            stressed_betas = [beta*(1+shock) for beta,shock in zip(list(results[ticker]['betas'].values()), shocks)]
            stressed_returns[ticker] = results[ticker]['alpha'] + sum(stressed_betas)
            stressed_returns_array = np.array(list(stressed_returns.values()))
                    
        return stressed_returns_array

    except Exception as e:
        print(f"Error in factor_stress: {e}")
        return None



In [606]:
df = test.get_data('5y')
factor = factor_stress(df, ['FF5'], [0.4, 0, 0, 0.2, -0.2])
rand = monte_carlo(100, 25, ["AAPL", "MSFT", "GOOG", "JNJ", "XOM"], df, 'Medium', 'Moderate', factorReturns=factor, rand=True)
rand
#calculate_metrics([0.0, 0.0501, 0.0, 0.4499, 0.5], rand)

Error in monte_carlo: could not broadcast input array from shape (5,) into shape (100,6)
