In [None]:
import random, math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
!pip install koreanize-matplotlib
import koreanize_matplotlib

# 그래프 설정
plt.rcParams["figure.figsize"] = (6,4)
plt.rcParams["axes.grid"] = True


In [16]:
def monty_hall_once(strategy="switch"):
    prize  = random.randint(0, 2)      # 자동차 문
    choice = random.randint(0, 2)      # 참가자의 첫 선택
    opened_candidates = [d for d in (0,1,2) if d != choice and d != prize]
    opened = random.choice(opened_candidates)
    switched = [d for d in (0,1,2) if d != choice and d != opened][0]

    if strategy == "stay":
        return choice == prize
    else:  # "switch"
        return switched == prize

In [None]:
def simulate(n=1000, strategy="switch"):
    wins = sum(monty_hall_once(strategy) for _ in range(n))
    loses = n - wins
    win_rate = wins / n
    print(f"{strategy.upper()} — n={n}, wins={wins}, loses={loses}, win_rate={win_rate:.3f}")

    df = pd.DataFrame({
        "Category": ["Win","Lose"],
        "Count": [wins, loses]
    })
    ax = df.plot.bar(x="Category", y="Count", legend=False)
    ax.set_title(f"{strategy.upper()} (n={n}) – wins={wins} ({win_rate*100:.1f}%)")
    ax.set_xlabel("")
    ax.set_ylabel("Count")
    plt.show()
    return wins, loses, win_rate

# 예시 실행
_ = simulate(n=300, strategy="switch")
_ = simulate(n=300, strategy="stay")

In [None]:
def cumulative_plot(n=200, strategy="switch"):
    cum_win = 0
    rates = []
    for i in range(1, n+1):
        if monty_hall_once(strategy):
            cum_win += 1
        rates.append(cum_win/i)

    x = np.arange(1, n+1)
    plt.plot(x, rates, marker=".", linestyle="-", alpha=0.8)
    plt.axhline(2/3 if strategy=="switch" else 1/3, color="red", linestyle="--", label="theory")
    plt.title(f"Cumulative Win Rate – {strategy.upper()} (n={n})")
    plt.xlabel("Trials")
    plt.ylabel("Win rate")
    plt.legend()
    plt.show()

# 예시
cumulative_plot(n=200, strategy="switch")

# 정규분포

랜덤 확률 실험을 여러 번 반복했을 때 그 결과의 평균(혹은 비율)은 중심극한정리에 의해 정규분포로 수렴한다.


In [None]:
# 평균(mu) = 0, 표준편차(sigma) = 1
mu = 0
sigma = 1

# x 범위 (-4 ~ 4, 400개 점)
x = np.linspace(-4, 4, 400)

# 표준 정규분포 확률밀도함수 (PDF)
y = (1/(np.sqrt(2*np.pi)*sigma)) * np.exp(-0.5*((x-mu)/sigma)**2)

# 그래프 그리기
plt.figure(figsize=(8,5))
plt.plot(x, y, color="blue", label="N(0,1)")   # N(0,1): 표준 정규분포
plt.axvline(mu, color="red", linestyle="--", label="Mean (0)")  # 평균선
plt.title("Standard Normal Distribution (mean=0, std=1)")
plt.xlabel("x")
plt.ylabel("Probability Density")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
p = 2/3   # 몬티홀 승률 (switch 전략)

# 시도 횟수 3가지
n_values = [10, 100, 1000]

x = np.linspace(0.4, 0.9, 400)  # 승률 범위

plt.figure(figsize=(10,6))

for n in n_values:
    mu = p
    sigma = np.sqrt(p*(1-p)/n)  # 표준오차
    y = (1/(np.sqrt(2*np.pi)*sigma)) * np.exp(-0.5*((x-mu)/sigma)**2)
    plt.plot(x*100, y, label=f"n={n}")

plt.axvline(p*100, color="red", linestyle="--", label="True mean 66.7%")
plt.title("Monty Hall Win Rate Distribution by Sample Size")
plt.xlabel("Winning rate (%)")
plt.ylabel("Probability Density")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# 몬티홀(변경) 이론값과 표본 크기
p = 2/3        # 이론 승률 = 0.666...
n = 100
z = 1.96       # 95% 신뢰수준

# 정규근사 파라미터 (승률의 분포)
mu = p
se = np.sqrt(p*(1-p)/n)

# 95% 신뢰구간
ci_low  = mu - z*se
ci_high = mu + z*se

# 곡선용 x, y (승률을 % 단위로 표시)
x = np.linspace(mu - 4*se, mu + 4*se, 600)
y = (1/(se*np.sqrt(2*np.pi))) * np.exp(-0.5*((x-mu)/se)**2)

plt.figure(figsize=(9,5))
plt.plot(x*100, y, label=f"Normal approx (n={n})")
plt.axvline(mu*100, color="red", linestyle="--", label=f"mean = {mu*100:.1f}%")

# 95% 신뢰구간 음영
mask = (x >= ci_low) & (x <= ci_high)
plt.fill_between((x*100)[mask], y[mask], alpha=0.25, label=f"95% CI [{ci_low*100:.1f}%, {ci_high*100:.1f}%]")

plt.title("Monty Hall (Switch) — Win Rate Distribution, n=100 with 95% CI")
plt.xlabel("Winning rate (%)")
plt.ylabel("Density")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

print(f"SE = {se:.4f}")
print(f"95% CI = [{ci_low*100:.1f}%, {ci_high*100:.1f}%]")

# 신뢰수준 95%

95% 신뢰수준은 “안전성과 실용성의 균형점”이라서 통계학·과학 연구에서 기본값처럼 쓰인다.

In [None]:
def margin_and_ci(n, p, z=1.96):  # z=1.96(95%), 2.576(99%)
    se = math.sqrt(p*(1-p)/n)
    e = z * se
    return se, e, (p-e, p+e)

def pretty_ci(n, p=2/3):
    for z, name in [(1.96,"95%"), (2.576,"99%")]:
        se, e, (lo, hi) = margin_and_ci(n, p, z)
        print(f"{name}: n={n}, SE={se:.4f}, ±{e*100:.1f}%p → [{lo*100:.1f}%, {hi*100:.1f}%]")

# 예시
pretty_ci(100)   # 100회일 때
pretty_ci(300)   # 300회일 때
pretty_ci(1000)  # 1000회일 때

In [None]:
# 실행 후 셀 출력 맨 아래에 슬라이더 나타남
from ipywidgets import interact, IntSlider, Dropdown

def live(n=300, strategy="switch"):
    simulate(n=n, strategy=strategy)
    cumulative_plot(n=n, strategy=strategy)
    pretty_ci(n)

interact(
    live,
    n=IntSlider(value=300, min=10, max=5000, step=10, description="trials"),
    strategy=Dropdown(options=[("Switch","switch"),("Stay","stay")], value="switch")
);