# Stochastic Simulation: Variance Reduction

In [1]:
import math
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as st
import random as rd
from importlib import reload  
import statsmodels.api as sm
from typing import Union

from TidySimStat import *

In [75]:
def evaluate_integral(n):
    us = [rd.random() for i in range(n)]
    xs = [math.exp(u) for u in us]
    return xs

In [78]:
xs = evaluate_integral(10000)
cal_var_sample(xs)

0.24369775238350208

In [11]:
def eval_int_antithetic(n):
    us = [rd.random() for i in range(n)]
    ys = [(math.exp(u) + math.exp(1 - u)) / 2 for u in us]
    mean_ys = cal_mean_sample(ys)
    var_ys = cal_var_sample(ys)
    return [mean_ys, var_ys]

In [12]:
evaluate_integral_antithetic(1000)

[1.7203075556305543, 0.0038965827096504907]

In [24]:
def est_three_points(sample:list, alpha:float=0.05,
        var_pop:int=None, mute:bool=True) -> list:
    mean = cal_mean_sample(sample, var_pop, mute)
    interval = est_interval(sample, alpha, var_pop, mute)
    return [interval[0], mean, interval[1]]

In [25]:
def eval_int_control_variate(n):
    us = [rd.random() for i in range(n)]
    xs = [math.exp(u) for u in us]

    dat = np.array([xs, us])
    cov_xu = np.cov(dat)[0][0]
    # mean_xs = cal_mean_sample(xs)
    var_u = cal_var_sample(us)
    c = - cov_xu / var_u

    zs = [xs[i] + c * (us[i] - 0.5) for i in range(n)]
    return est_three_points(zs)

In [26]:
eval_int_control_variate(1000)

[1.695420080733904, 1.717361433723158, 1.739302786712412]

In [32]:
def eval_int_strata_sample(n, m:int=10):
    # us = [rd.random() for i in range(n)]
    # xs = [math.exp(u) for u in us]

    def stratify(m):
        us = [rd.random() for i in range(m)]
        w = sum([math.exp((i + us[i]) / m) for i in range(m)]) / m
        return w

    ws = [stratify(m) for i in range(n)]
    return ws

In [33]:
ws = eval_int_strata_sample(100)
est_three_points(ws)

[1.7157277719695654, 1.7193191103550132, 1.722910448740461]

$E[X| Y = y]$ is a function of $y$, while $E[X| Y]$ is a random variable.

In [60]:
test = pd.DataFrame({"x": [1, 2, 3, 4, 5, 6], "u": [rd.random() for i in range(6)]})
test

Unnamed: 0,x,u
0,1,0.733977
1,2,0.026808
2,3,0.523503
3,4,0.799387
4,5,0.483587
5,6,0.996781


In [155]:
test["y"] = [math.floor(i) + 1 for i in test["u"] * 10]
test.iloc[0]['x']

1.0

In [70]:
test["x"].tolist()

[1, 2, 3, 4, 5, 6]

In [94]:
test["u2"] = test.apply(lambda df: 1 - df["u"], axis=1)
test.shape[0]

6

In [71]:
math.exp(1) - 1

1.718281828459045

In [79]:
def analyse_stratified(sample:pd.core.frame.DataFrame):
    sample["y"] = [math.floor(i) + 1 for i in test["u"] * 10]
    xs_bar_y = [0 for i in range(10)]
    for i in range(10):
        xs_bar_y[i] = mean(test.loc[test["y"] == i+1])
    return xs_bar_y

In [84]:
np.var([1, 2, 3])

0.6666666666666666

In [112]:
test[['x', 'u']].cov()['x'][1]

0.29602407306451783

In [109]:
test['x'][1]

2

In [156]:
def cal_vcm(n:int):
    ## Calculate the theoretical variance of the condition expectation
    vcm = (sum([math.exp(i / 5) for i in range(1, 11)]) / 10 -
        (sum(math.exp(i / 10) for i in range(1, 11)) / 10)**2) * 100 * \
        (1 - math.exp(- 0.1))**2
    return vcm

In [130]:
cal_var_conditional_mean(10000) 

0.0006181177792205503

In [159]:
cal_vcm(10000)

0.23937616016051333

$$ \begin{aligned}
    E[X| Y] &= \int_{0.1 Y - 0.1}^{0.1 Y} e^u \mathrm{d} u \\
    &= 10 e^{0.1 Y} \left(1 - e^{- 0.1} \right)
\end{aligned} $$

The theoretical conditional expection of $X$ given $Y = k$ for $k = 1, 2, ... 10$:
$$ \begin{aligned}
    E[X| Y = k] = 10 e^{0.1 k} \left(1 - e^{- 0.1} \right) \quad \text{for } k = 1, 2, ... 10
\end{aligned} $$

The theoretical variance reduction because of stratified sampling should be:
$$ \begin{aligned}
    \frac{1}{n} \mathrm{Var} \left[E[X| Y] \right] & = \frac{1}{n} \mathrm{Var} \left[ 10 \left(1 - e^{- 0.1} \right) e^{0.1 Y} \right] \\
    &= \frac{100 \left(1 - e^{- 0.1} \right)^2}{n} \mathrm{Var} (e^{0.1 Y}) \\
    &= \frac{100 \left(1 - e^{- 0.1} \right)^2}{n} \left[E(e^{0.2 Y}) - E(e^{0.1 Y})^2 \right] \\
    &= 2.39376 E-5
\end{aligned} $$

The theoretical variance:
$$ \begin{aligned}
    \mathrm{Var} (X) = \frac{1}{2}\left(e^{2}-1\right)-(e-1)^{2}=0.2420
\end{aligned} $$


<!-- $$ \begin{aligned}
    \frac{1}{n} \mathrm{Var} \left[E[X| Y] \right] &= \frac{1}{n} E\left[(E[X | Y])^{2}\right] - \frac{1}{n} (E[X])^{2} \\
    &= \frac{100}{n} E \left[e^{0.2 Y} (1 - e^{- 0.1 Y})^2 \right] - \frac{(e - 1)^2}{n} \\
    &= \frac{100}{n} \sum_{y=1}^{10} e^{0.2 y} (1 - e^{- 0.1 y})^2 0.1 - \frac{(e - 1)^2}{n} \\
\end{aligned} $$ -->

In [133]:
[10 * math.exp(0.1 * y) * (1 - math.exp(- 0.1)) for y in range(1, 11)]

[1.051709180756477,
 1.1623184008452228,
 1.2845604941583335,
 1.419658900652673,
 1.5689657305885794,
 1.7339752969038094,
 1.9163390707996768,
 2.1178822102199124,
 2.340621826644822,
 2.586787173020957]

In [148]:
t = sum([math.exp(0.2 * y) for y in range(1, 11)]) / 10 - sum([math.exp(0.1 * y) for y in range(1, 11)])**2 / 100
t * 100 * (1 - math.exp(-0.1))**2 / 10000

2.393761601605129e-05

# test

In [9]:
cal_pvalue_chi2(45, 9)

9.226628711056506e-07

In [8]:
help(cal_pvalue_chi2)

Help on function cal_pvalue_chi2 in module TidySimStat.inference:

cal_pvalue_chi2(stat, df, mute: bool = True)
    Calculate the pvalue of two-sided t-test using Chi-Square distribution.

