# Stochastic Simulation: Variance Reduction

In [7]:
import math
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as st
import random as rd
from importlib import reload  
import statsmodels.api as sm
from typing import Union

from TidySimStat import *

In [75]:
def evaluate_integral(n):
    us = [rd.random() for i in range(n)]
    xs = [math.exp(u) for u in us]
    return xs

In [78]:
xs = evaluate_integral(10000)
cal_var_sample(xs)

0.24369775238350208

In [11]:
def eval_int_antithetic(n):
    us = [rd.random() for i in range(n)]
    ys = [(math.exp(u) + math.exp(1 - u)) / 2 for u in us]
    mean_ys = cal_mean_sample(ys)
    var_ys = cal_var_sample(ys)
    return [mean_ys, var_ys]

In [12]:
evaluate_integral_antithetic(1000)

[1.7203075556305543, 0.0038965827096504907]

In [24]:
def est_three_points(sample:list, alpha:float=0.05,
        var_pop:int=None, mute:bool=True) -> list:
    mean = cal_mean_sample(sample, var_pop, mute)
    interval = est_interval(sample, alpha, var_pop, mute)
    return [interval[0], mean, interval[1]]

In [25]:
def eval_int_control_variate(n):
    us = [rd.random() for i in range(n)]
    xs = [math.exp(u) for u in us]

    dat = np.array([xs, us])
    cov_xu = np.cov(dat)[0][0]
    # mean_xs = cal_mean_sample(xs)
    var_u = cal_var_sample(us)
    c = - cov_xu / var_u

    zs = [xs[i] + c * (us[i] - 0.5) for i in range(n)]
    return est_three_points(zs)

In [26]:
eval_int_control_variate(1000)

[1.695420080733904, 1.717361433723158, 1.739302786712412]

In [32]:
def eval_int_strata_sample(n, m:int=10):
    # us = [rd.random() for i in range(n)]
    # xs = [math.exp(u) for u in us]

    def stratify(m):
        us = [rd.random() for i in range(m)]
        w = sum([math.exp((i + us[i]) / m) for i in range(m)]) / m
        return w

    ws = [stratify(m) for i in range(n)]
    return ws

In [33]:
ws = eval_int_strata_sample(100)
est_three_points(ws)

[1.7157277719695654, 1.7193191103550132, 1.722910448740461]

$E[X| Y = y]$ is a function of $y$, while $E[X| Y]$ is a random variable.

In [60]:
test = pd.DataFrame({"x": [1, 2, 3, 4, 5, 6], "u": [rd.random() for i in range(6)]})
test

Unnamed: 0,x,u
0,1,0.733977
1,2,0.026808
2,3,0.523503
3,4,0.799387
4,5,0.483587
5,6,0.996781


In [66]:
test["y"] = [math.floor(i) + 1 for i in test["u"] * 10]
test

Unnamed: 0,x,u,y
0,1,0.733977,8
1,2,0.026808,1
2,3,0.523503,6
3,4,0.799387,8
4,5,0.483587,5
5,6,0.996781,10


In [70]:
test["x"].tolist()

[1, 2, 3, 4, 5, 6]

In [94]:
test["u2"] = test.apply(lambda df: 1 - df["u"], axis=1)
test.shape[0]

6

In [71]:
math.exp(1) - 1

1.718281828459045

In [79]:
def analyse_stratified(sample:pd.core.frame.DataFrame):
    sample["y"] = [math.floor(i) + 1 for i in test["u"] * 10]
    xs_bar_y = [0 for i in range(10)]
    for i in range(10):
        xs_bar_y[i] = mean(test.loc[test["y"] == i+1])
    return xs_bar_y

In [84]:
np.var([1, 2, 3])

0.6666666666666666