In [2]:
import numpy as np, pandas as pd, scipy.stats as st

data = pd.read_csv('data.csv')
num_obvs = 30_000
# risks per sector
r = np.array([.295, .49, .41, .415, .338, .64, .403, .476])
#sec_loading maps sector to its risk, t is the threshold for defaults
data['sec_loading'], data['t'] = r[data['sector'].values], st.norm.ppf(data.p)
# 100k monte carlo simulations and len(r)+len(data) risk factors per one sample
num_dimensions = len(r) + len(data)
from scipy.stats import qmc

sobol_sampler = qmc.Sobol(d=num_dimensions, scramble=True)
quasi_random_samples = sobol_sampler.random_base2(m=int(np.log2(num_obvs)))
from scipy.stats import norm

#factors = st.norm.ppf(quasi_random_samples)
shift = 2.0  # Shift parameter to bias the sampling distribution towards the tail

# Simulate factors and initialize
factors = np.random.normal(shift, 1, (100_000, len(r) + len(data)))  # Shifted Gaussian

def process_obs(obs):
    m_factor, sec_factor, res_factor = obs[0], obs[:len(r)][data.sector.values], obs[len(r):]

    # Adjusted loss indicator threshold
    ind = np.sqrt(r[0]) * m_factor + np.sqrt(data.sec_loading - r[0]) * sec_factor \
          + np.sqrt(1 - data.sec_loading) * res_factor < (data.t - shift)

    # Compute losses
    loss = np.zeros((len(data),))
    loss[ind] = data[ind].m + data[ind].d * np.random.standard_t(3, size=sum(ind))

    # Compute weight for importance sampling correction
    weight = np.exp(-shift * obs[0] + 0.5 * shift**2)
    return np.sum(loss) * weight, weight

from joblib import Parallel, delayed
import time
from scipy.stats import t

sample = []
sobol_student = qmc.Sobol(d=1, scramble=True)

answers = Parallel(n_jobs=-1)(delayed(process_obs)(obs) for obs in factors)
answers = np.array(answers)

In [3]:
samples = answers[:, 0]
print(len(samples))

100000


In [4]:
weights = answers[:, 1]
print(len(weights))

100000


In [8]:
# Calculate VaR using weighted losses
weighted_losses = -np.array(samples)
sorted_losses = np.sort(weighted_losses)
VaR = sorted_losses[int(0.999 * len(sorted_losses))]  # Empirical 99.9% quantile
print(f"VaR with Importance Sampling: {VaR}")

VaR with Importance Sampling: -0.0


In [None]:
sorted_samples = np.argsort(samples)
sorted_weights = np.argsort(weights)
cumulative_weights = np.cumsum(sorted_samples) / np.sum(sorted_weights)

In [15]:
VaR_index = np.searchsorted(cumulative_weights, 0.0001)
VaR = sorted_samples[VaR_index]

In [17]:
sorted_samples[:100]

array([[   0, 3773, 3772, ...,  180, 3630, 2646],
       [2555, 3956, 5161, ..., 1884, 2127, 5657],
       [1415, 3810, 5563, ..., 1881, 1945, 5657],
       ...,
       [   0, 3746, 3745, ..., 3864, 3084, 1280],
       [   0, 3777, 3776, ..., 3198,  156, 1563],
       [   0, 3776, 3775, ..., 1880, 1944, 5657]], dtype=int64)

In [1]:
VaR = sorted([-s for s in sample])[int(0.01 * num_obvs)]
VaR

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [2]:
sample

[array([-6.92061804e+18, -6.92061804e+18, -6.92061804e+18, ...,
        -6.92061804e+18, -6.92061804e+18, -6.92061804e+18]),
 array([-1.26675731e+18, -1.26675731e+18, -1.26675731e+18, ...,
        -1.26675731e+18, -1.26675731e+18, -1.26675731e+18]),
 array([8.99471542e+18, 8.99471542e+18, 8.99471542e+18, ...,
        8.99471542e+18, 8.99471542e+18, 8.99471542e+18]),
 array([-4.28254282e+17, -4.28254282e+17, -4.28254282e+17, ...,
        -4.28254282e+17, -4.28254282e+17, -4.28254282e+17]),
 array([-2.90148673e+18, -2.90148673e+18, -2.90148673e+18, ...,
        -2.90148673e+18, -2.90148673e+18, -2.90148673e+18]),
 array([-6.68532385e+19, -6.68532385e+19, -6.68532385e+19, ...,
        -6.68532385e+19, -6.68532385e+19, -6.68532385e+19]),
 array([-4.18853468e+19, -4.18853468e+19, -4.18853468e+19, ...,
        -4.18853468e+19, -4.18853468e+19, -4.18853468e+19]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([2.60610591e+18, 2.60610591e+18, 2.60610591e+1

ANTH

In [1]:
import numpy as np, pandas as pd, scipy.stats as st

data = pd.read_csv('data.csv')
num_obvs = 30_000
# risks per sector
r = np.array([.295, .49, .41, .415, .338, .64, .403, .476])
#sec_loading maps sector to its risk, t is the threshold for defaults
sec_loading, datat = r[data['sector'].values], st.norm.ppf(data.p)


In [2]:
import numpy as np, pandas as pd
from scipy.stats import t

data = pd.read_csv('data.csv')

num_obvs = 30_000
r = np.array([.295, .49, .41, .415, .338, .64, .403, .476])
factors, sample = np.random.normal(0,1, (num_obvs, len(r)+len(data))), []

antithetic_factors = -factors

factors = np.vstack([factors, antithetic_factors])
sample_antithetic = []
def process_obs(obs):
    m_factor = obs[0]
    sec_factor = obs[:len(r)][data.sector.values]
    res_factor = obs[len(r):]

    ind = (
                  r[0]**0.5 * m_factor
                  + (sec_loading - r[0])**0.5 * sec_factor
                  + (1 - sec_loading)**0.5 * res_factor
          ) < datat
    loss = np.zeros((len(data),))
    loss[ind] = data[ind].m + data[ind].d * t.rvs(df=3,size=sum(ind))
    return sum(loss), np.var(loss)

In [3]:

from joblib import Parallel, delayed
import time
from scipy.stats import t

answers = Parallel(n_jobs=-1)(delayed(process_obs)(obs) for obs in factors)
answers = np.array(answers)

In [4]:
sample_antithetic = answers[:, 0]
vars = answers[:, 1]
Var_antithetic = sorted([-s for s in sample_antithetic])[100]
Var_antithetic

-14784.02781846971

In [5]:
np.mean(vars)

1377.2826951205109

NEW

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import t, norm
from joblib import Parallel, delayed

# Wczytanie danych
data = pd.read_csv('data.csv')

# Parametry symulacji
num_obvs = 30_000
r = np.array([.295, .49, .41, .415, .338, .64, .403, .476])
sector_indices = data['sector'].values
sec_loading = r[sector_indices]
datat = norm.ppf(data['p'])

# Parametry do stratyfikacji
num_strata = 10
strata_bounds = np.linspace(-3, 3, num_strata + 1)

# Funkcja generująca stratyfikowane czynniki
def stratified_factors(num_samples, dim, strata_bounds):
    factors = []
    samples_per_stratum = num_samples // len(strata_bounds)
    for i in range(len(strata_bounds) - 1):
        low, high = strata_bounds[i], strata_bounds[i + 1]
        stratum = np.random.uniform(low, high, (samples_per_stratum, dim))
        factors.append(stratum)
    return np.vstack(factors)

# Funkcja obliczająca straty
def process_obs(obs):
    m_factor = obs[0]  # Czynnik ogólny
    sec_factor = obs[:len(r)][sector_indices]  # Czynniki sektorowe
    res_factor = obs[len(r):]  # Czynniki resztowe

    # Zmienna kontrolna: przybliżenie straty jako wartość oczekiwana
    control_variate = (
            r[0]**0.5 * m_factor
            + (sec_loading - r[0])**0.5 * sec_factor
            + (1 - sec_loading)**0.5 * res_factor
    )

    # Ograniczanie wartości t-Studenta, aby zmniejszyć wpływ ekstremalnych strat
    ind = control_variate < datat
    loss = np.zeros((len(data),))
    if np.any(ind):
        loss[ind] = data.loc[ind, 'm'].values + data.loc[ind, 'd'].values * np.clip(t.rvs(df=3, size=sum(ind)), -5, 5)

    # Korekcja zmienną kontrolną
    E_control_variate = 0  # Oczekiwana wartość dla zmiennej kontrolnej
    c = np.cov(loss, control_variate)[0, 1] / np.var(control_variate) if np.var(control_variate) > 0 else 0
    corrected_loss = loss - c * (control_variate - E_control_variate)
    return np.sum(corrected_loss), np.var(corrected_loss)

# Generowanie stratyfikowanych próbek
factors = stratified_factors(num_obvs, len(r) + len(data), strata_bounds)
antithetic_factors = -factors
combined_factors = np.vstack([factors, antithetic_factors])

# Obliczenia w równoległych procesach
answers = Parallel(n_jobs=-1)(delayed(process_obs)(obs) for obs in combined_factors)
answers = np.array(answers)

# Wyniki
sample_antithetic = answers[:, 0]
vars = answers[:, 1]

In [8]:

# Symetryzacja wyników i redukcja wariancji
Var_antithetic = sorted(np.abs(sample_antithetic))[int(0.001*num_obvs)]

# Wyniki końcowe
print("Średnia wariancji:", np.mean(vars))
print("VaR antetyczny (100-ty wynik):", Var_antithetic)


Średnia wariancji: 58933.14500682461
VaR antetyczny (100-ty wynik): 0.0


In [10]:
import numpy as np
import pandas as pd
from scipy.stats import t, norm
from joblib import Parallel, delayed

# Wczytanie danych
data = pd.read_csv('data.csv')

# Parametry symulacji
num_obvs = 100_000
r = np.array([.295, .49, .41, .415, .338, .64, .403, .476])
sector_indices = data['sector'].values
sec_loading = r[sector_indices]
datat = norm.ppf(data['p'])

# Funkcja obliczająca straty dla jednej obserwacji
def process_obs(obs):
    m_factor = obs[0]  # Czynnik ogólny
    sec_factor = obs[:len(r)][sector_indices]  # Czynniki sektorowe
    res_factor = obs[len(r):]  # Czynniki resztowe

    # Obliczanie zmiennej kontrolnej dla indywidualnych strat
    control_variate = (
            r[0]**0.5 * m_factor
            + (sec_loading - r[0])**0.5 * sec_factor
            + (1 - sec_loading)**0.5 * res_factor
    )

    # Indeks dla obserwacji poniżej progu
    ind = control_variate < datat
    loss = np.zeros(len(data))

    # Ograniczenie wartości z rozkładu t-Studenta
    if np.any(ind):
        loss[ind] = data.loc[ind, 'm'].values + data.loc[ind, 'd'].values * np.clip(t.rvs(df=3, size=sum(ind)), -5, 5)

    # Zwróć sumę strat i wariancję strat
    return np.sum(loss), np.var(loss)

# Generowanie próbek antetycznych
factors = np.random.normal(0, 1, (num_obvs, len(r) + len(data)))
antithetic_factors = -factors
combined_factors = np.vstack([factors, antithetic_factors])

# Równoległe przetwarzanie próbek
answers = Parallel(n_jobs=-1, verbose=5)(delayed(process_obs)(obs) for obs in combined_factors)
answers = np.array(answers)

# Wyniki
sample_losses = answers[:, 0]
sample_vars = answers[:, 1]

# Obliczenie VaR dla antetycznych próbek
VaR_antithetic = sorted(sample_losses)[int(0.001*num_obvs)]

# Wyniki końcowe
print("Średnia wariancji strat:", np.mean(sample_vars))
print("VaR antetyczny (100-ty wynik):", VaR_antithetic)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:    5.8s
[Parallel(n_jobs=-1)]: Done 2080 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 7264 tasks      | elapsed:    8.9s
[Parallel(n_jobs=-1)]: Done 13600 tasks      | elapsed:   11.6s
[Parallel(n_jobs=-1)]: Done 21088 tasks      | elapsed:   14.5s
[Parallel(n_jobs=-1)]: Done 29728 tasks      | elapsed:   18.0s
[Parallel(n_jobs=-1)]: Done 39520 tasks      | elapsed:   22.4s
[Parallel(n_jobs=-1)]: Done 50464 tasks      | elapsed:   27.5s
[Parallel(n_jobs=-1)]: Done 62560 tasks      | elapsed:   32.5s
[Parallel(n_jobs=-1)]: Done 75808 tasks      | elapsed:   37.4s
[Parallel(n_jobs=-1)]: Done 90208 tasks      | elapsed:   43.1s
[Parallel(n_jobs=-1)]: Done 105760 tasks      | elapsed:   49.4s
[Parallel(n_jobs=-1)]: Done 122464 tasks      | elapsed:   56.8s
[Parallel(n_jobs=-1)]: Done 140

Średnia wariancji strat: 1358.5380990544957
VaR antetyczny (100-ty wynik): -28057.32998597472


[Parallel(n_jobs=-1)]: Done 200000 out of 200000 | elapsed:  1.4min finished


In [11]:
losses = np.array(sample_losses)

In [12]:
quantile_99_9= np.percentile(losses, 0.001)
extreme_losses = losses[losses<quantile_99_9]
np.median(extreme_losses)

-57037.16083378078