In [2]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from numpy.linalg import cholesky

In [30]:
np.asarray(data_5_1)

array([[0.08497905, 0.08758581, 0.04230441, 0.00898354, 0.00387595],
       [0.08758581, 0.16048451, 0.05813615, 0.01234549, 0.00532646],
       [0.04230441, 0.05813615, 0.03744009, 0.00596294, 0.00257271],
       [0.00898354, 0.01234549, 0.00596294, 0.00168834, 0.00054633],
       [0.00387595, 0.00532646, 0.00257271, 0.00054633, 0.00031428]])

In [31]:
def check_matrix_correctness(input_matrix, test_matrix, error_epsilon = 1e-12):

    input_matrix = np.asarray(input_matrix)
    test_matrix = np.asarray(test_matrix)
    assert input_matrix.shape == test_matrix.shape, "Input matrix and test matrix have different shapes"
    for i in range(input_matrix.shape[0]):
        for j in range(input_matrix.shape[1]):
            assert abs(input_matrix[i,j] - test_matrix[i,j]) < error_epsilon, f"row {i} and column {j} of the data do not match"
    
    return True

In [25]:
data_5_1.shape[0]

5

## Test 5.1 Normal Simulation PD Input 0 mean - 100,000 simulations, compare input vs output covariance

In [36]:
np.any(np.array([1,2,-3]) < 0)

np.True_

In [39]:
data_5_1 = pd.read_csv("testfiles/data/test5_1.csv")

def normal_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims, fix_method, seed=1234):

    np.random.seed(seed=seed)

    # check for positive-semidefiniteness
    eigvals = np.linalg.eigvalsh(covariance_matrix)
    if np.any(eigvals < 0):
        # if not positive-semidefinite, use fix_method to fix
        input_cov = covariance_matrix
        covariance_matrix = fix_method(input_cov)

    simulation_data = np.random.multivariate_normal(mean_vector, covariance_matrix, n_sims).T # len(cov), n
    sim_cov = np.cov(simulation_data, bias=False)
    
    return sim_cov

In [40]:
covariance_matrix = data_5_1
mean_vector = np.zeros(shape=(len(covariance_matrix)))
n_sims = 100_000
sim_cov = normal_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims = n_sims, fix_method=lambda x: x)
test_5_1 = pd.read_csv("testfiles/data/testout_5.1.csv")
check_matrix_correctness(sim_cov, test_5_1, error_epsilon=1e-3)

True

## Test 5.2 Normal Simulation PSD Input 0 mean - 100,000 simulations, compare input vs output covariance

In [41]:
from covariance_comp import higham_covariance, near_psd

In [44]:
data_5_2 = pd.read_csv("testfiles/data/test5_2.csv")

In [56]:
covariance_matrix = data_5_2
mean_vector = np.zeros(shape=(len(covariance_matrix)))
n_sims = 100_000
sim_cov = normal_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims = n_sims, fix_method=near_psd)
test_5_2 = pd.read_csv("testfiles/data/testout_5.2.csv")
check_matrix_correctness(sim_cov, test_5_2, error_epsilon=1e-3)

True

In [57]:
sim_cov

array([[0.08510358, 0.11695222, 0.04232675, 0.00897542, 0.00388869],
       [0.11695222, 0.16071969, 0.05816685, 0.01233432, 0.00534397],
       [0.04232675, 0.05816685, 0.03748337, 0.00595592, 0.00258017],
       [0.00897542, 0.01233432, 0.00595592, 0.00168238, 0.00054672],
       [0.00388869, 0.00534397, 0.00258017, 0.00054672, 0.00031543]])

## Test 5.3 Normal Simulation nonPSD Input, 0 mean, near_psd fix - 100,000 simulations, compare input vs output covariance

In [64]:
data_5_3 = pd.read_csv("testfiles/data/test5_3.csv")
covariance_matrix = data_5_3
mean_vector = np.zeros(shape=(len(covariance_matrix)))
n_sims = 100_000
sim_cov = normal_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims = n_sims, fix_method=near_psd)
test_5_3 = pd.read_csv("testfiles/data/testout_5.3.csv")
check_matrix_correctness(sim_cov, test_5_3, error_epsilon=1e-3)

True

## Test 5.4 Normal Simulation PSD Input, 0 mean, higham fix - 100,000 simulations, compare input vs output covariance

In [65]:
data_5_4 = pd.read_csv("testfiles/data/test5_3.csv")
covariance_matrix = data_5_4
mean_vector = np.zeros(shape=(len(covariance_matrix)))
n_sims = 100_000
sim_cov = normal_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims = n_sims, fix_method=higham_covariance)
test_5_4 = pd.read_csv("testfiles/data/testout_5.4.csv")
check_matrix_correctness(sim_cov, test_5_4, error_epsilon=1e-3)

True

## Test 5.5 PCA Simulation, 99% explained, 0 mean - 100,000 simulations compare input vs output covariance

In [203]:
mean_vector = np.zeros(shape=len(data_5_2))

data_5_5 = pd.read_csv("testfiles/data/test5_2.csv")
covariance_matrix = data_5_2
def pca_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims, explained_threshold = 0.99, seed=1234):

    np.random.seed(seed)
    eigvals, eigvecs = np.linalg.eigh(covariance_matrix)
    eigvals = np.clip(eigvals, 0, None)
    idx = np.argsort(eigvals)[::-1]
    eigvals = eigvals[idx]
    eigvecs = eigvecs[:, idx]

    k = len(eigvals)
    pct_explained = eigvals[:k-1].sum() / eigvals.sum()
    while pct_explained > explained_threshold:
        k -= 1
        pct_explained = eigvals[:k-1].sum() / eigvals.sum()

    L = eigvecs[:,:k] @ np.diag(np.sqrt(eigvals[:k]))
    simulation_data = np.random.multivariate_normal(np.zeros(k), np.identity(k), n_sims).T # len(cov), n
    transformed_data = L @ simulation_data + mean_vector[:,np.newaxis]

    sim_cov = np.cov(transformed_data, bias=False)

    return sim_cov

In [204]:
data_5_5 = pd.read_csv("testfiles/data/test5_2.csv")
covariance_matrix = data_5_2
mean_vector = np.zeros(shape=len(covariance_matrix))
n_sims = 100_000
sim_cov = pca_monte_carlo_simulation(mean_vector, covariance_matrix, n_sims, explained_threshold = 0.99)
test_5_5 = pd.read_csv("testfiles/data/testout_5.5.csv")
check_matrix_correctness(sim_cov, test_5_5, error_epsilon=1e-3)

True

## Test 8.1 Normal VaR

In [3]:
data_8_1 = pd.read_csv("testfiles/data/test7_1.csv")

In [4]:
# function from homework 1
def fit_normal_dist_from_data(x: pd.DataFrame):
    mu_vector = x.mean()
    covariance_matrix = x.cov()

    return mu_vector, covariance_matrix

mu_vector, covariance_matrix = fit_normal_dist_from_data(data_8_1)
mean, std = mu_vector.iloc[0], np.sqrt(covariance_matrix.iloc[0,0])


def univariate_normal_VaR(x: pd.DataFrame, alpha = 0.05):
    mu_vector, covariance_matrix = fit_normal_dist_from_data(x)
    mean, std = mu_vector.iloc[0], np.sqrt(covariance_matrix.iloc[0,0])

    abs_VaR = np.abs(norm.ppf(alpha, loc = mean, scale = std))
    rel_VaR = np.abs(norm.ppf(alpha, loc = mean, scale = std) - mean)

    return abs_VaR, rel_VaR

In [8]:
abs_VaR, rel_VaR = univariate_normal_VaR(data_8_1, alpha = 0.05)
test_8_1 = pd.read_csv("testfiles/data/testout8_1.csv")
error_epsilon =1e-12
assert 	abs(abs_VaR - test_8_1.loc[0, "VaR Absolute"]) < error_epsilon, "Absolute value at risk does not match"
assert 	abs(rel_VaR - test_8_1.loc[0, "VaR Diff from Mean"]) < error_epsilon, "Absolute value at risk does not match"

## Test 8.2 VaR T Distribution

In [6]:
data_8_2 = pd.read_csv("testfiles/data/test7_2.csv")

# function from homework 1
from scipy.stats import t
nu, mu, sigma = t.fit(data_8_2)
mu, sigma, nu
alpha = 0.05
# print(t.ppf(alpha, df = nu, loc = mu, scale = sigma))

def univariate_t_VaR(x: pd.DataFrame, alpha = 0.05):
    nu, mu, sigma = t.fit(x)

    abs_VaR = abs(t.ppf(alpha, df = nu, loc = mu, scale = sigma))
    rel_VaR = abs(t.ppf(alpha, df = nu, loc = mu, scale = sigma) - mu)
        
    return abs_VaR, rel_VaR

In [9]:
data_8_2 = pd.read_csv("testfiles/data/test7_2.csv")
abs_VaR, rel_VaR = univariate_t_VaR(data_8_2, alpha = 0.05)

test_8_2 = pd.read_csv("testfiles/data/testout8_2.csv")
error_epsilon = 1e-7
assert 	abs(abs_VaR - test_8_2.loc[0, "VaR Absolute"]) < error_epsilon, "Absolute value at risk does not match"
assert 	abs(rel_VaR - test_8_2.loc[0, "VaR Diff from Mean"]) < error_epsilon, "Absolute value at risk does not match"

## Test 8.3 VaR from Simulation

In [16]:
data_8_3 = pd.read_csv("testfiles/data/test7_2.csv")

Unnamed: 0,x1
0,0.062695
1,-0.001343
2,0.058816
3,0.074756
4,0.014312
...,...
95,0.083073
96,0.125152
97,0.046132
98,0.036900


In [12]:
np.sort(t.rvs(df=3, loc=0, scale=1, size=10, random_state=0))
# np.floor(3.1)

array([-2.19746122, -1.53369098, -0.92363499, -0.15447375,  0.42911921,
        0.81651664,  1.1574403 ,  1.67969022,  1.85192399,  1.868763  ])

In [86]:
0.99/1.00 - 1

-0.010000000000000009

In [90]:
(-.01)* 1

-0.01

In [57]:
np.array([1,2]) * np.array([1,3])

array([1, 6])

In [58]:
np.exp(np.array([1,2]))

array([2.71828183, 7.3890561 ])

In [93]:
def monte_carlo_VaR_sim(mean_vector, covariance_matrix, current_prices, holdings, n_draws, return_type = "arithmetic", alpha = 0.05, seed = 1234):

    if return_type not in ["arithmetic", "geometric", "brownian"]:
        raise ValueError("Returns must be one of arithmetic, geometric, brownian")

    if covariance_matrix.shape[0] != covariance_matrix.shape[1]:
        raise ValueError("Covariance matrix must be square")
        
    if len(mean_vector) != covariance_matrix.shape[0]:
        raise ValueError("Mean matrix length must match Covariance matrix dimensions")

    np.random.seed(seed)

    portfolio_value = current_prices.dot(holdings)
    # print("Portfolio Val", portfolio_value)

    simulated_returns = np.random.multivariate_normal(mean_vector, covariance_matrix, n_draws)
    # print("Sim Rets", simulated_standard_normal)
    if return_type == "arithmetic":
        simulated_prices = (1 + simulated_returns) * current_prices
    elif return_type == "geometric":
        simulated_prices = current_prices * np.exp(simulated_returns)
    elif return_type == "brownian":
        simulated_prices = current_prices + simulated_returns

    # print("Prices", simulated_prices)
    sim_portfolio_values = simulated_prices.dot(holdings)
    # print("Portfolio Vals", sim_portfolio_values)
    sorted_values = np.sort(sim_portfolio_values)

    percentile_portfolio = np.percentile(sim_portfolio_values, 100 * alpha)
    abs_VaR = portfolio_value - percentile_portfolio
    rel_VaR = np.mean(sim_portfolio_values) - percentile_portfolio
    return abs_VaR, rel_VaR

# nu, mu, sigma = t.fit(data_8_3)
# random_returns = t.rvs(df=nu, loc=mu, scale=sigma, size=n_draws, random_state=seed)

mu, cov = fit_normal_dist_from_data(data_8_3)
mean_vector = mu
# cov = np.array([[sigma**2]])
current_prices=np.array([1])
holdings = np.array([1])
monte_carlo_VaR_sim(mean_vector=mean_vector, covariance_matrix = cov, current_prices=current_prices, holdings=holdings, n_draws=100_000, return_type = "arithmetic", alpha = 0.05, seed = 0)

(np.float64(0.04396698284271605), np.float64(0.08996220587152681))

In [64]:
test_8_3 = pd.read_csv("testfiles/data/testout8_3.csv")

In [114]:
test_8_3

Unnamed: 0,VaR Absolute,VaR Diff from Mean
0,0.040212,0.086586
