In [2]:
import pandas as pd
import numpy as np
import plotly.express as px

In [3]:
dist_df = pd.DataFrame({"x":[3,4,6,8],
  "P(X=x)":[0.1,0.2,0.4,0.3]})
dist_df

Unnamed: 0,x,P(X=x)
0,3,0.1
1,4,0.2
2,6,0.4
3,8,0.3


In [4]:
fig= px.bar(dist_df, x="x",y="P(X=x)", title="Distribution of X")
fig

In [5]:
N=80000
samples=np.random.choice(
    dist_df["x"],
    size=N,
    p=dist_df["P(X=x)"])

sim_df = pd.DataFrame({"X(s)":samples})
sim_df

Unnamed: 0,X(s)
0,3
1,8
2,8
3,8
4,6
...,...
79995,6
79996,6
79997,6
79998,6


In [6]:
fig = px.histogram(sim_df, x=("X(s)"), title="Empirical distribution of X",
                   histnorm="probability")
fig

In [7]:
print("Simulated E[X]:", sim_df['X(s)'].mean())
print("Simulated Var[X]:", sim_df['X(s)'].var())

Simulated E[X]: 5.9069
Simulated Var[X]: 2.8930185527336603


In [8]:
E_x = dist_df["x"] @ dist_df["P(X=x)"]
print("E[X]:", E_x)

E[X]: 5.9


In [9]:
Var_x = dist_df["x"]**2 @ dist_df["P(X=x)"] - E_x**2
print("Var[X]:", Var_x)

Var[X]: 2.8900000000000006


In [10]:
roll_df = pd.DataFrame({"x": [1,2,3,4,5,6],
                        "P(X=x)": np.ones(6)/6})
roll_df

Unnamed: 0,x,P(X=x)
0,1,0.166667
1,2,0.166667
2,3,0.166667
3,4,0.166667
4,5,0.166667
5,6,0.166667


In [11]:
fig = px.bar(roll_df, x="x", y="P(X=x)", title="Distribution of X")
fig

In [12]:
N = 80000

sim_rolls_df = pd.DataFrame({
    "X_1": np.random.choice(roll_df["x"], size=N, p=roll_df["P(X=x)"]),
    "X_2": np.random.choice(roll_df["x"], size=N, p=roll_df["P(X=x)"])
})

sim_rolls_df

Unnamed: 0,X_1,X_2
0,2,2
1,6,4
2,1,5
3,6,5
4,6,3
...,...,...
79995,3,3
79996,1,5
79997,1,3
79998,4,5


In [13]:
sim_rolls_df['Y'] = 2 * sim_rolls_df['X_1']
sim_rolls_df['Z'] = sim_rolls_df['X_1'] + sim_rolls_df['X_2']
sim_rolls_df

Unnamed: 0,X_1,X_2,Y,Z
0,2,2,4,4
1,6,4,12,10
2,1,5,2,6
3,6,5,12,11
4,6,3,12,9
...,...,...,...,...
79995,3,3,6,6
79996,1,5,2,6
79997,1,3,2,4
79998,4,5,8,9


In [14]:
px.histogram(sim_rolls_df[["Y", "Z"]].melt(), x="value", color="variable",
             barmode="overlay", histnorm="probability",
             title="Empirical Distributions")

In [15]:
pd.DataFrame([
    sim_rolls_df[["Y", "Z"]].mean().rename("Mean"),
    sim_rolls_df[["Y", "Z"]].var().rename("Var"),
    np.sqrt(sim_rolls_df[["Y", "Z"]].var()).rename("SD")
])

Unnamed: 0,Y,Z
Mean,7.011775,7.011838
Var,11.719508,5.843895
SD,3.423377,2.417415


In [16]:
p = 0.5
coin_df = pd.DataFrame({"x": [1,0],
                        "P(X=x)": [p,1-p]})
coin_df

Unnamed: 0,x,P(X=x)
0,1,0.5
1,0,0.5


In [17]:
N = 10000

np.random.rand(N,2) < p

array([[ True,  True],
       [False,  True],
       [ True,  True],
       ...,
       [ True,  True],
       [ True,  True],
       [False, False]])

In [18]:
sim_flips = pd.DataFrame(
    {"Choice A": np.sum((np.random.rand(N,2) < p) * 10, axis = 1)})
sim_flips

Unnamed: 0,Choice A
0,10
1,20
2,0
3,10
4,0
...,...
9995,10
9996,10
9997,10
9998,10


In [19]:
sim_flips["Choice B"] = np.sum((np.random.rand(N,20) < p), axis = 1)
sim_flips

Unnamed: 0,Choice A,Choice B
0,10,9
1,20,12
2,0,11
3,10,12
4,0,8
...,...,...
9995,10,12
9996,10,15
9997,10,8
9998,10,10


In [20]:
sim_flips["Choice C"] = 20 * (np.random.rand(N,1) < p)
sim_flips

Unnamed: 0,Choice A,Choice B,Choice C
0,10,9,0
1,20,12,0
2,0,11,20
3,10,12,20
4,0,8,0
...,...,...,...
9995,10,12,0
9996,10,15,20
9997,10,8,20
9998,10,10,0


In [21]:
px.histogram(sim_flips.melt(), x="value", facet_row="variable",
             barmode="overlay", histnorm="probability",
             title="Empirical Distributions",
             width=600, height=600)

In [22]:
pd.DataFrame([
    sim_flips.mean().rename("Mean"),
    sim_flips.var().rename("Var"),
    np.sqrt(sim_flips.var()).rename("SD")
])

Unnamed: 0,Choice A,Choice B,Choice C
Mean,10.067,9.9857,10.002
Var,49.850496,4.996195,100.009997
SD,7.060488,2.235217,10.0005


In [23]:
dist_df

Unnamed: 0,x,P(X=x)
0,3,0.1
1,4,0.2
2,6,0.4
3,8,0.3


In [24]:
N = 100000
all_samples = np.random.choice(dist_df["x"], N, p=dist_df["P(X=x)"])
sim_pop_df = pd.DataFrame({"X(s)":all_samples})
sim_pop_df

Unnamed: 0,X(s)
0,8
1,8
2,6
3,8
4,8
...,...
99995,6
99996,3
99997,3
99998,6


In [25]:
n = 100
sample_df = (
    sim_pop_df.sample(n, replace=True)
    .reset_index(drop=True)
    .rename(columns={"X(s)": "X"})
)
sample_df

Unnamed: 0,X
0,4
1,8
2,6
3,8
4,8
...,...
95,4
96,4
97,8
98,4


In [26]:
px.histogram(sample_df, x="X", histnorm="probability", title="Sample (n=100)")

In [27]:
px.histogram(sim_df, x="X(s)", histnorm="probability", title="Population of X")

In [29]:
pd.DataFrame(
    {"Sample": [sample_df["X"].mean(), sample_df["X"].var(), np.sqrt(sample_df["X"].var())],
     "Population": [sim_df["X(s)"].mean(), sim_df["X(s)"].var(), np.sqrt(sim_df["X(s)"].var())]}
)

Unnamed: 0,Sample,Population
0,5.99,5.9069
1,2.979697,2.893019
2,1.72618,1.700888
