In [13]:
import numpy as np
import pandas as pd
from scipy.stats import norm, t
from scipy.integrate import quad

In [14]:
from risk_management.measurements import compute_correlation
from risk_management.risk_metrics import *

In [15]:
from risk_management.distributions import fit_normal_dist_from_data, fit_t_dist
from risk_management.risk_metrics import univariate_normal_VaR, univariate_t_VaR

In [209]:
def check_matrix_correctness(input_matrix, test_matrix, error_epsilon = 1e-12):

    input_matrix = np.asarray(input_matrix)
    test_matrix = np.asarray(test_matrix)
    assert input_matrix.shape == test_matrix.shape, "Input matrix and test matrix have different shapes"
    for i in range(input_matrix.shape[0]):
        for j in range(input_matrix.shape[1]):
            assert abs(input_matrix[i,j] - test_matrix[i,j]) < error_epsilon, f"row {i} and column {j} of the data do not match"
    
    return True

## Test 8.4 ES From Normal Distribtution

In [16]:
def expected_shortfall_normal(x: pd.DataFrame, alpha=0.05):

    abs_VaR, rel_VaR = univariate_normal_VaR(x, alpha = alpha)

    mu_vector, cov = fit_normal_dist_from_data(x)
    mu = mu_vector.iloc[0]
    sigma = np.sqrt(cov.iloc[0,0])

    def ev(x, mu, sigma):
        return x * norm.pdf(x, loc=mu, scale=sigma)
    result, error = quad(lambda x: ev(x, mu, sigma), -np.inf, -abs_VaR)

    abs_ES = -1/alpha * result
    diff_ES = -(-abs_ES - mu)

    # delta VaR es
    # quantile = norm.ppf(alpha, loc = mu, scale = sigma)
    # expected_shortfall = -mu + sigma * norm.pdf(quantile, loc=mu, scale=sigma)

    return abs_ES, diff_ES

In [17]:
data_8_4 = pd.read_csv("testfiles/data/test7_1.csv")
abs_ES, diff_ES = expected_shortfall_normal(data_8_4, alpha=0.05)

test_8_4 = pd.read_csv("testfiles/data/testout8_4.csv")
test_abs_ES = test_8_4.loc[0, "ES Absolute"]
test_diff_ES = test_8_4.loc[0, "ES Diff from Mean"]

error_epsilon = 1e-11
assert abs(abs_ES - test_abs_ES) < error_epsilon, "Abs Expected shortfall and test abs expected shortfall don't match"
assert abs(diff_ES - diff_ES) < error_epsilon, "Diff Expected shortfall and test diff expected shortfall don't match"

## Test 8.5 ES From T Distribution

In [18]:
def expected_shortfall_t(x: pd.DataFrame, alpha=0.05):

    abs_VaR, rel_VaR = univariate_t_VaR(x, alpha = alpha)

    mu, sigma, nu = fit_t_dist(x)

    def ev(x, mu, sigma, nu):
        return x * t.pdf(x, loc=mu, scale=sigma, df=nu)
    result, error = quad(lambda x: ev(x, mu, sigma, nu), -np.inf, -abs_VaR)

    abs_ES = -1/alpha * result
    diff_ES = -(-abs_ES - mu)

    # delta VaR es
    # quantile = norm.ppf(alpha, loc = mu, scale = sigma)
    # expected_shortfall = -mu + sigma * norm.pdf(quantile, loc=mu, scale=sigma)

    return abs_ES, diff_ES

In [19]:
data_8_5 = pd.read_csv("testfiles/data/test7_2.csv")
abs_ES, diff_ES = expected_shortfall_t(data_8_5, alpha=0.05)
test_8_5 = pd.read_csv("testfiles/data/testout8_5.csv")
test_abs_ES = test_8_5.loc[0, "ES Absolute"]
test_diff_ES = test_8_5.loc[0, "ES Diff from Mean"]

error_epsilon = 1e-6
assert abs(abs_ES - test_abs_ES) < error_epsilon, "Abs Expected shortfall and test abs expected shortfall don't match"
assert abs(diff_ES - diff_ES) < error_epsilon, "Diff Expected shortfall and test diff expected shortfall don't match"

## Test 8.6 ES From Simulation

In [20]:
data_8_6 = pd.read_csv("testfiles/data/test7_2.csv")

## take the mean from a vector of simulation results to get the expected shortfall
def expected_shortfall_sim(x:pd.DataFrame, alpha=0.05):
    pass
    # x = sim_portfolio_vals.sorted()
    # x = x[:alpha_index]
    # ES = mean(x)
    # return ES, diff_es

In [53]:
test_8_6

Unnamed: 0,ES Absolute,ES Diff from Mean
0,0.076906,0.122426


In [94]:
sorted_sims.iloc[:6].mean()

x1   -0.075512
dtype: float64

In [91]:
index

5

In [133]:
# NOTE -> The answer is not using the data as the sample
sorted_sims = data_8_6.sort_values(by="x1").reset_index(drop=True)
alpha = 0.05
index = int(np.floor(alpha*sorted_sims.shape[0])) # index of VaR
abs_ES = -sorted_sims.iloc[:index+1, 0].mean()
rel_ES = -(-abs_ES - sorted_sims.iloc[:, 0].mean())
abs_ES, rel_ES

(np.float64(0.07551216317584007), np.float64(0.12142106916980894))

In [21]:
from risk_management import normal_monte_carlo_simulation, near_psd, fit_normal_dist_from_data, fit_t_dist

In [62]:
# mean_vector, cov = fit_normal_dist_from_data(pd.concat([data_8_6, data_8_6], axis=1))

mean_vector, cov = fit_normal_dist_from_data(data_8_6)
# simulation_data = normal_monte_carlo_simulation(mean_vector=mean_vector, covariance_matrix=cov, n_sims = 100_000, fix_method=near_psd)
# mu, sigma, nu = fit_t_dist(data_8_6)
# mean_vector = np.array([mu])
# cov = np.array([[sigma**2]])
simulation_data = normal_monte_carlo_simulation(mean_vector=mean_vector, covariance_matrix=cov, n_sims = 10_000_000, fix_method=near_psd, seed=42)

In [63]:
simulation_data.shape

(1, 10000000)

In [64]:
# simulation_data.mean(axis=1)
holdings = np.ones(len(mean_vector))
prices = np.ones(len(mean_vector))
portfolio_value = prices.dot(holdings)


In [65]:
new_prices = prices[:, np.newaxis] + prices[:, np.newaxis] * simulation_data
new_prices.shape

(1, 10000000)

In [70]:
portfolio_values = (holdings[np.newaxis, :].dot(new_prices) - portfolio_value)/portfolio_value

In [81]:
sorted_portfolios = np.sort(portfolio_values)

In [83]:
sorted_portfolios.shape[1]*alpha

500000.0

In [79]:
alpha = 0.05
# np.percentile(sorted_portfolios, 100 * alpha)

index = int(np.floor(alpha*sorted_portfolios.shape[1]))

# VaR = sorted_portfolios[:, index]
shortfall_portfolios = sorted_portfolios[:,:index]
abs_ES = - np.mean(shortfall_portfolios)
diff_ES = -(-abs_ES - sorted_portfolios.mean())

In [80]:
abs_ES, diff_ES

(np.float64(0.06700405962203071), np.float64(0.11290946423885989))

In [33]:
test_8_6 = pd.read_csv("testfiles/data/testout8_6.csv")
test_8_6

Unnamed: 0,ES Absolute,ES Diff from Mean
0,0.076906,0.122426


In [30]:
abs_ES, diff_ES = expected_shortfall_sim(data_8_5, alpha=0.05)
test_8_6 = pd.read_csv("testfiles/data/testout8_6.csv")
test_abs_ES = test_8_5.loc[0, "ES Absolute"]
test_diff_ES = test_8_5.loc[0, "ES Diff from Mean"]

error_epsilon = 1e-6
assert abs(abs_ES - test_abs_ES) < error_epsilon, "Abs Expected shortfall and test abs expected shortfall don't match"
assert abs(diff_ES - diff_ES) < error_epsilon, "Diff Expected shortfall and test diff expected shortfall don't match"

TypeError: expected_shortfall_sim() got an unexpected keyword argument 'alpha'

In [None]:
test_8_6

Unnamed: 0,ES Absolute,ES Diff from Mean
0,0.076906,0.122426


## Test 9.1 VaR/ES on 2 levels from simulated values - Copula

In [241]:
def VaR_ES_2_level_sim_from_copula(sample_data: pd.DataFrame, holdings: np.array, prices: np.array, fix_method, n_sims = 100_000, alpha=0.05, seed=1234):

    if prices.shape != holdings.shape or len(sample_data.columns) != prices.shape[0]:
        raise ValueError("Data columns, holdings, and prices must all contain data for the sam amount of assets")

    means, cov = fit_normal_dist_from_data(sample_data)

    quantile_vectors = pd.DataFrame()

    for i, column in enumerate(sample_data.columns):
        mean = means.iloc[i]
        sigma = np.sqrt(cov.iloc[i,i])
        # print(i, column, mean, sigma)
        col_data = sample_data.loc[:, column]
        U_vector = norm.cdf(col_data, loc=mean, scale=sigma) # get U from observations
        Z_vector = norm.ppf(U_vector, loc=0, scale = 1) # get Z from U vectors (not needed if using spearman correlation)
        quantile_vectors[column] = Z_vector

    corr = compute_correlation(quantile_vectors, method="spearman")
    # print("CORR", corr)
    simulated_Zs = normal_monte_carlo_simulation(mean_vector=np.zeros(len(means)), covariance_matrix=corr, n_sims=n_sims, fix_method=fix_method, seed=seed)
    simulated_results = pd.DataFrame()

    for i, column in enumerate(sample_data.columns):

        col_data = simulated_Zs.T[:, i]
        U_vector = norm.cdf(col_data, loc=0, scale=1)

        # get original distribution back
        mean = means.iloc[i]
        sigma = np.sqrt(cov.iloc[i,i])
        F_vector = norm.ppf(U_vector, loc=mean, scale = sigma)

        simulated_results[column] = F_vector

    simulated_results["Total"] = ((simulated_results.dot(holdings*prices)) / holdings.dot(prices))

    risk_results = []
    for i, column in enumerate(simulated_results.columns):
        if column == "Total":
            invest_value = prices.dot(holdings)
        else:
            invest_value = prices[i]*holdings[i]

        sim_size = len(simulated_results[column])
        index = int(np.floor(alpha*sim_size))
        sorted_col = simulated_results[column].sort_values().reset_index(drop=True)

        # VaR 95% is var expressed as a percentage of initial investment
        # VaR95 / initial portfolio value = VaR 95_Pct

        VaR_95pct = -sorted_col.iloc[index]
        VaR95 = VaR_95pct * invest_value
        ES_95pct = -sorted_col[:index+1].mean()
        ES95 = ES_95pct * invest_value
        risk_results.append({
                    "Stock": column,
                    "VaR95": VaR95,
                    "ES95": ES95,
                    "VaR95_Pct": VaR_95pct,
                    "ES95_Pct": ES_95pct
                }
        )
    
    return pd.DataFrame(risk_results)
        

In [242]:
data_9_1 = pd.read_csv("testfiles/data/test9_1_returns.csv")
test_9_1 = pd.read_csv("testfiles/data/testout9_1.csv")

# Back out from results how much value is in each asset
A_value = test_9_1[test_9_1["Stock"] == "A"]["VaR95"].iloc[0]/test_9_1[test_9_1["Stock"] == "A"]["VaR95_Pct"].iloc[0]
B_value = test_9_1[test_9_1["Stock"] == "B"]["VaR95"].iloc[0]/test_9_1[test_9_1["Stock"] == "B"]["VaR95_Pct"].iloc[0]
total_value = test_9_1[test_9_1["Stock"] == "Total"]["VaR95"].iloc[0]/test_9_1[test_9_1["Stock"] == "Total"]["VaR95_Pct"].iloc[0]

holdings = np.ones(2)
price = np.array([A_value, B_value])
risk_outputs = VaR_ES_2_level_sim_from_copula(sample_data=data_9_1, holdings=holdings, prices=prices, fix_method=near_psd, n_sims = 1_000_000, alpha=0.05, seed=1234)

check_matrix_correctness(risk_outputs[["VaR95_Pct",	"ES95_Pct"]], test_9_1[["VaR95_Pct", "ES95_Pct"]], error_epsilon=1e-2)
check_matrix_correctness(risk_outputs[["VaR95",	"ES95"]], test_9_1[["VaR95", "ES95"]], error_epsilon=1e2) # NOTE -> simulation is off by a bit here and portfolio values increase size of deviation

True

In [240]:
risk_outputs

Unnamed: 0,Stock,VaR95,ES95,VaR95_Pct,ES95_Pct
0,A,94.175215,117.916311,0.047088,0.058958
1,B,113.308137,140.740947,0.037769,0.046914
2,Total,155.919658,194.246296,0.031184,0.038849


In [239]:
test_9_1

Unnamed: 0,Stock,VaR95,ES95,VaR95_Pct,ES95_Pct
0,A,94.460376,118.289371,0.04723,0.059145
1,B,107.880427,151.218174,0.03596,0.050406
2,Total,152.565684,199.704532,0.030513,0.039941
