In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from numpy.linalg import cholesky

In [20]:
def check_matrix_correctness(input_matrix, test_matrix, error_epsilon = 1e-12):

    input_matrix = np.asarray(input_matrix)
    test_matrix = np.asarray(test_matrix)
    assert input_matrix.shape == test_matrix.shape, "Input matrix and test matrix have different shapes"
    for i in range(input_matrix.shape[0]):
        for j in range(input_matrix.shape[1]):
            assert abs(input_matrix[i,j] - test_matrix[i,j]) < error_epsilon, f"row {i} and column {j} of the data do not match"
    
    return True

## Test 5.1 Normal Simulation PD Input 0 mean - 100,000 simulations, compare input vs output covariance

In [21]:
data_5_1 = pd.read_csv("testfiles/data/test5_1.csv")

def normal_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims, fix_method, seed=1234):

    np.random.seed(seed=seed)

    # check for positive-semidefiniteness
    eigvals = np.linalg.eigvalsh(covariance_matrix)
    if np.any(eigvals < 0):
        # if not positive-semidefinite, use fix_method to fix
        input_cov = covariance_matrix
        covariance_matrix = fix_method(input_cov)

    simulation_data = np.random.multivariate_normal(mean_vector, covariance_matrix, n_sims).T # len(cov), n
    sim_cov = np.cov(simulation_data, bias=False)
    
    return sim_cov

In [22]:
covariance_matrix = data_5_1
mean_vector = np.zeros(shape=(len(covariance_matrix)))
n_sims = 100_000
sim_cov = normal_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims = n_sims, fix_method=lambda x: x, seed=42)
test_5_1 = pd.read_csv("testfiles/data/testout_5.1.csv")
check_matrix_correctness(sim_cov, test_5_1, error_epsilon=1e-3)

True

## Test 5.2 Normal Simulation PSD Input 0 mean - 100,000 simulations, compare input vs output covariance

In [23]:
from covariance_comp import higham_covariance, near_psd

In [24]:
data_5_2 = pd.read_csv("testfiles/data/test5_2.csv")

In [25]:
covariance_matrix = data_5_2
mean_vector = np.zeros(shape=(len(covariance_matrix)))
n_sims = 100_000
sim_cov = normal_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims = n_sims, fix_method=near_psd)
test_5_2 = pd.read_csv("testfiles/data/testout_5.2.csv")
check_matrix_correctness(sim_cov, test_5_2, error_epsilon=1e-3)

True

## Test 5.3 Normal Simulation nonPSD Input, 0 mean, near_psd fix - 100,000 simulations, compare input vs output covariance

In [26]:
data_5_3 = pd.read_csv("testfiles/data/test5_3.csv")
covariance_matrix = data_5_3
mean_vector = np.zeros(shape=(len(covariance_matrix)))
n_sims = 100_000
sim_cov = normal_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims = n_sims, fix_method=near_psd)
test_5_3 = pd.read_csv("testfiles/data/testout_5.3.csv")
check_matrix_correctness(sim_cov, test_5_3, error_epsilon=1e-3)

True

## Test 5.4 Normal Simulation PSD Input, 0 mean, higham fix - 100,000 simulations, compare input vs output covariance

In [27]:
data_5_4 = pd.read_csv("testfiles/data/test5_3.csv")
covariance_matrix = data_5_4
mean_vector = np.zeros(shape=(len(covariance_matrix)))
n_sims = 100_000
sim_cov = normal_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims = n_sims, fix_method=higham_covariance)
test_5_4 = pd.read_csv("testfiles/data/testout_5.4.csv")
check_matrix_correctness(sim_cov, test_5_4, error_epsilon=1e-3)

True

## Test 5.5 PCA Simulation, 99% explained, 0 mean - 100,000 simulations compare input vs output covariance

In [28]:
mean_vector = np.zeros(shape=len(data_5_2))

data_5_5 = pd.read_csv("testfiles/data/test5_2.csv")
covariance_matrix = data_5_2
def pca_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims, explained_threshold = 0.99, seed=1234):

    np.random.seed(seed)
    eigvals, eigvecs = np.linalg.eigh(covariance_matrix)
    eigvals = np.clip(eigvals, 0, None)
    idx = np.argsort(eigvals)[::-1]
    eigvals = eigvals[idx]
    eigvecs = eigvecs[:, idx]

    k = len(eigvals)
    pct_explained = eigvals[:k-1].sum() / eigvals.sum()
    while pct_explained > explained_threshold:
        k -= 1
        pct_explained = eigvals[:k-1].sum() / eigvals.sum()

    L = eigvecs[:,:k] @ np.diag(np.sqrt(eigvals[:k]))
    simulation_data = np.random.multivariate_normal(np.zeros(k), np.identity(k), n_sims).T # len(cov), n
    transformed_data = L @ simulation_data + mean_vector[:,np.newaxis]

    sim_cov = np.cov(transformed_data, bias=False)

    return sim_cov

In [29]:
data_5_5 = pd.read_csv("testfiles/data/test5_2.csv")
covariance_matrix = data_5_2
mean_vector = np.zeros(shape=len(covariance_matrix))
n_sims = 100_000
sim_cov = pca_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims, explained_threshold = 0.99, seed=42)
test_5_5 = pd.read_csv("testfiles/data/testout_5.5.csv")
check_matrix_correctness(sim_cov, test_5_5, error_epsilon=1e-3)

True

## Test 8.1 Normal VaR

In [30]:
data_8_1 = pd.read_csv("testfiles/data/test7_1.csv")

In [31]:
# function from homework 1
def fit_normal_dist_from_data(x: pd.DataFrame):
    mu_vector = x.mean()
    covariance_matrix = x.cov()

    return mu_vector, covariance_matrix

mu_vector, covariance_matrix = fit_normal_dist_from_data(data_8_1)
mean, std = mu_vector.iloc[0], np.sqrt(covariance_matrix.iloc[0,0])


def univariate_normal_VaR(x: pd.DataFrame, alpha = 0.05):
    mu_vector, covariance_matrix = fit_normal_dist_from_data(x)
    mean, std = mu_vector.iloc[0], np.sqrt(covariance_matrix.iloc[0,0])

    abs_VaR = -norm.ppf(alpha, loc = mean, scale = std)
    rel_VaR = mean - norm.ppf(alpha, loc = mean, scale = std)

    return abs_VaR, rel_VaR

In [32]:
abs_VaR, rel_VaR = univariate_normal_VaR(data_8_1, alpha = 0.05)
test_8_1 = pd.read_csv("testfiles/data/testout8_1.csv")
error_epsilon =1e-12
assert 	abs(abs_VaR - test_8_1.loc[0, "VaR Absolute"]) < error_epsilon, "Absolute value at risk does not match"
assert 	abs(rel_VaR - test_8_1.loc[0, "VaR Diff from Mean"]) < error_epsilon, "Absolute value at risk does not match"

## Test 8.2 VaR T Distribution

In [33]:
data_8_2 = pd.read_csv("testfiles/data/test7_2.csv")

# function from homework 1
from scipy.stats import t
nu, mu, sigma = t.fit(data_8_2)
mu, sigma, nu
alpha = 0.05
# print(t.ppf(alpha, df = nu, loc = mu, scale = sigma))

def univariate_t_VaR(x: pd.DataFrame, alpha = 0.05):
    nu, mu, sigma = t.fit(x)

    abs_VaR = -t.ppf(alpha, df = nu, loc = mu, scale = sigma)
    rel_VaR = mu - t.ppf(alpha, df = nu, loc = mu, scale = sigma)
        
    return abs_VaR, rel_VaR

In [34]:
data_8_2 = pd.read_csv("testfiles/data/test7_2.csv")
abs_VaR, rel_VaR = univariate_t_VaR(data_8_2, alpha = 0.05)

test_8_2 = pd.read_csv("testfiles/data/testout8_2.csv")
error_epsilon = 1e-7
assert 	abs(abs_VaR - test_8_2.loc[0, "VaR Absolute"]) < error_epsilon, "Absolute value at risk does not match"
assert 	abs(rel_VaR - test_8_2.loc[0, "VaR Diff from Mean"]) < error_epsilon, "Absolute value at risk does not match"

## Test 8.3 VaR from Simulation

In [35]:
data_8_3 = pd.read_csv("testfiles/data/test7_2.csv")

In [77]:
def monte_carlo_VaR_sim(mean_vector, covariance_matrix, current_prices, holdings, n_draws, return_type = "arithmetic", alpha = 0.05, seed = 1234):

    if return_type not in ["arithmetic", "geometric", "brownian"]:
        raise ValueError("Returns must be one of arithmetic, geometric, brownian")

    if covariance_matrix.shape[0] != covariance_matrix.shape[1]:
        raise ValueError("Covariance matrix must be square")
        
    if len(mean_vector) != covariance_matrix.shape[0]:
        raise ValueError("Mean matrix length must match Covariance matrix dimensions")

    rng = np.random.default_rng(seed)

    portfolio_value = current_prices.dot(holdings)

    simulated_returns = rng.multivariate_normal(mean_vector, covariance_matrix, n_draws)
    # simulated_returns = np.random.normal(mean_vector.iloc[0], np.sqrt(cov.iloc[0,0]), size = n_draws)[:, np.newaxis]

    if return_type == "arithmetic":
        simulated_prices = (1 + simulated_returns) * current_prices
    elif return_type == "geometric":
        simulated_prices = current_prices * np.exp(simulated_returns)
    elif return_type == "brownian":
        simulated_prices = current_prices + simulated_returns

    sim_portfolio_values = simulated_prices.dot(holdings)
    sorted_values = np.sort(sim_portfolio_values)

    percentile_portfolio = np.percentile(sim_portfolio_values, 100 * alpha)
    abs_VaR = portfolio_value - percentile_portfolio
    rel_VaR = np.mean(sim_portfolio_values) - percentile_portfolio
    return abs_VaR, rel_VaR

# nu, mu, sigma = t.fit(data_8_3)
# random_returns = t.rvs(df=nu, loc=mu, scale=sigma, size=n_draws, random_state=seed)



In [None]:

mu, cov = fit_normal_dist_from_data(data_8_3)
mean_vector = mu
current_prices=np.array([1])
holdings = np.array([1])
# cov = np.array([[sigma**2]])
abs_VaR, rel_VaR = monte_carlo_VaR_sim(mean_vector=mean_vector, covariance_matrix = cov, current_prices=current_prices, holdings=holdings, n_draws=100_000, return_type = "arithmetic", alpha = 0.05, seed = 42)

test_8_3 = pd.read_csv("testfiles/data/testout8_3.csv")
error_epsilon = 1e-2 # NOTE this simulation only gets somewhat close even when doing 100M simulations, but the implementation seems correct
assert 	abs(abs_VaR - test_8_3.loc[0, "VaR Absolute"]) < error_epsilon, "Absolute value at risk does not match"
assert 	abs(rel_VaR - test_8_3.loc[0, "VaR Diff from Mean"]) < error_epsilon, "Absolute value at risk does not match"