# Resampling and Monte Carlo Methods

Sources:

- [Scipy Resampling and Monte Carlo Methods](https://docs.scipy.org/doc/scipy/tutorial/stats/resampling.html)

In [None]:
# Manipulate data
import numpy as np
import pandas as pd

# Statistics
import scipy.stats
import statsmodels.api as sm
#import statsmodels.stats.api as sms
import statsmodels.formula.api as smf
from statsmodels.stats.stattools import jarque_bera

# Plot
import matplotlib.pyplot as plt
import seaborn as sns

# Adjust default figure size
fig_w, fig_h = plt.rcParams.get('figure.figsize')
plt.rcParams['figure.figsize'] = (fig_w, fig_h * .5)
%matplotlib inline

## Monte-Carlo simulation of Random Walk Process


### One-dimensional random walk

More information: [Random Walks, Central Limit Theorem](https://www.youtube.com/watch?v=BUJCF900I0A)

At each step $i$ the process moves with +1 or -1 with equal probability, ie, $X_i \in \{+1, -1\}$ with $P(X_i=+1)=P(X_i=-1)=1/2$.
Steps $X_i$'s are i.i.d..

Let $S_n = \sum^n_i X_i$, or $S_i$ (at time $i$) is $S_i = S_{i-1} + X_i$

Realizations of random walks obtained by Monte Carlo simulation
Plot Few random walks (trajectories), ie, $S_n$ for $n=0$ to $200$

In [None]:
np.random.seed(seed=42)  # make the example reproducible

n = 200 # trajectory depth
nsamp = 50000 #nb of trajectories

# X: each row (axis 0) contains one trajectory axis 1
#Xn = np.array([np.random.choice(a=[-1, +1], size=n,
#                                replace=True, p=np.ones(2) / 2)
#               for i in range(nsamp)])

Xn = np.array([np.random.choice(a=np.array([-1, +1]), size=n,
                                replace=True, p=np.ones(2)/2)
               for i in range(nsamp)])

# Sum of random walks (trajectories)
Sn = Xn.sum(axis=1)

print("True Stat. Mean={:.03f}, Sd={:.02f}".\
    format(0, np.sqrt(n) * 1))

print("Est. Stat. Mean={:.03f}, Sd={:.02f}".\
    format(Sn.mean(), Sn.std()))



Plot cumulative sum of 100 random walks (trajectories)


In [None]:
Sn_traj = Xn[:100, :].cumsum(axis=1)
_ = pd.DataFrame(Sn_traj.T).plot(legend=False)

Distribution of $S_n$ vs $\mathcal{N}(0, \sqrt(n))$

In [None]:
x_low, x_high = Sn.mean()-3*Sn.std(), Sn.mean()+3*Sn.std()
h_ = plt.hist(Sn, range=(x_low, x_high), density=True, bins=43, alpha=0.4, color='g',
             label="Histogram")

x_range = np.linspace(x_low, x_high, 30)
prob_x_range = scipy.stats.norm.pdf(x_range, loc=Sn.mean(), scale=Sn.std())
_ = plt.plot(x_range, prob_x_range, 'r-', label="PDF: P(X)")
#print(h_)

## Permutation Tests


## The Bootstrap