In [2]:
import numpy as np
from scipy.stats import norm, binom
import math
import matplotlib.pyplot as plt
import pandas as pd
import os

### Task 1: Conditional probability with cards
Let W = event of drawing a white card on the first draw, and B = event of drawing a black card on the second draw. Two cards are drawn without replacement.

- Input: x = P(W ∩ B) and y = P(W) (two lines)
- Output: P(B | W) = x / y (print as a float, no rounding)

In [None]:
def q1(x, y):
    return x / y

def task1_io():
    try:
        x = float(input().strip())
        y = float(input().strip())
    except Exception:
        x, y = 0.03, 0.8
    print(q1(x, y))

In [4]:
print(q1(0.03, 0.8))

0.0375


### Task 2: Normal distribution cutoff probability
Scores are N(m, s^2). Cutoff = 90. Compute P(X ≥ 90).
- Input: m then s (two lines)
- Output: probability rounded to two decimals.

In [5]:
def q2(m, s):
    z = (90 - m) / s
    prob = 1 - norm.cdf(z)
    return prob


def task2_io():
    try:
        m = float(input().strip())
        s = float(input().strip())
    except Exception:
        m, s = 80.0, 10.0
    print(f"{q2(m, s):.2f}")

In [6]:
p = q2(80, 10)
print(p)
print(f"{p:.2f}")

0.15865525393145707
0.16


### Task 3: Seating around a circular table
N people sit around a circle. Find probability that two particular people are not adjacent.
- Total circular permutations: (N-1)!
- Favorable: place the pair together as a block to count complement cases: 2*(N-2)! arrangements have them adjacent; so P(not adjacent) = 1 - [2*(N-2)! / (N-1)!] = 1 - 2/(N-1) = (N-3)/(N-1).
- Input: N (one line). Output: probability rounded to 4 decimals.

In [7]:
def q3(n):
    if n < 3:
        return 0.0
    return (n - 3) / (n - 1)


def task3_io():
    try:
        n = int(input().strip())
    except Exception:
        n = 5
    print(f"{q3(n):.4f}")
print(f"q3(5) -> {q3(5):.4f}")

q3(5) -> 0.5000


### Task 4: Dice-sum simulations and histograms
Roll counts r in [1..50], trials T in [100, 500, 1000, 5000, 10000, 50000, 100000].
For each r and T: simulate T experiments of summing r fair dice (values 1..6), plot and save histogram.
Also compute mean and variance for each (r, T).
Notes: Running the full grid is heavy; the code supports it, and a small demo grid will run by default. You can toggle full run.

In [8]:
def simulate_dice_sums(rolls: int, trials: int, rng=None):
    if rng is None:
        rng = np.random.default_rng()
    samples = rng.integers(1, 7, size=(trials, rolls)).sum(axis=1)
    return samples


def plot_hist(data, bins, title, out_path):
    plt.figure(figsize=(6,4))
    plt.hist(data, bins=bins, color='#4C78A8', edgecolor='white', alpha=0.9)
    plt.title(title)
    plt.xlabel('Sum')
    plt.ylabel('Frequency')
    plt.tight_layout()
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    plt.savefig(out_path)
    plt.close()


def task4(run_full=False):
    roll_range = range(1, 51) if run_full else [1, 2, 3]
    trial_list = [100, 500, 1000, 5000, 10000, 50000, 100000] if run_full else [100, 1000, 10000]
    stats = []
    rng = np.random.default_rng(303)
    for r in roll_range:
        for T in trial_list:
            data = simulate_dice_sums(r, T, rng)
            mean = float(np.mean(data))
            var = float(np.var(data, ddof=0))
            stats.append({'rolls': r, 'trials': T, 'mean': mean, 'variance': var})
            bins = np.arange(r, 6*r + 2)
            out_file = f"lab4_outputs/task4_hist_r{r}_T{T}.png"
            plot_hist(data, bins=bins, title=f"Dice sums: r={r}, T={T}", out_path=out_file)
    df = pd.DataFrame(stats)
    os.makedirs('lab4_outputs', exist_ok=True)
    df.to_csv('lab4_outputs/task4_stats.csv', index=False)
    return df

task4_demo_df = task4(run_full=False)
task4_demo_df.head()

Unnamed: 0,rolls,trials,mean,variance
0,1,100,3.48,3.1096
1,1,1000,3.552,2.853296
2,1,10000,3.4942,2.906566
3,2,100,6.85,6.5875
4,2,1000,7.043,5.683151


### Task 5: Binomial distribution and estimated success rate
We observed 97 positives out of 100 (n=100, k=97).
- Construct binomial models for a range of success rates p and visualize likelihood via histograms of sampled counts.
- Estimate the success rate and support with plots. We'll also compute a simple proportion CI.

In [9]:
def task5(n=100, k=97, p_grid=None, samples=20000):
    if p_grid is None:
        p_grid = np.concatenate([np.linspace(0.80, 0.95, 4, endpoint=False), np.linspace(0.95, 0.995, 10)])
    rng = np.random.default_rng(42)
    os.makedirs('lab4_outputs/task5', exist_ok=True)
    ps = np.clip(p_grid, 1e-6, 1-1e-6)
    logL = binom.logpmf(k, n, ps)
    mle_p = ps[np.argmax(logL)]
    phat = k / n
    se = math.sqrt(phat*(1-phat)/n)
    ci95 = (max(0.0, phat - 1.96*se), min(1.0, phat + 1.96*se))
    chosen_ps = [0.90, 0.95, 0.97, 0.98, 0.99]
    records = []
    for p in chosen_ps:
        counts = rng.binomial(n=n, p=p, size=samples)
        records.append({'p': p, 'mean': float(np.mean(counts)), 'var': float(np.var(counts, ddof=0))})
        plt.figure(figsize=(6,4))
        plt.hist(counts, bins=np.arange(0, n+2)-0.5, color='#72B7B2', edgecolor='white')
        plt.axvline(k, color='red', linestyle='--', label=f'observed k={k}')
        plt.title(f'Binomial counts, n={n}, p={p:.3f}')
        plt.xlabel('Positive count out of n')
        plt.ylabel('Frequency')
        plt.legend()
        plt.tight_layout()
        plt.savefig(f'lab4_outputs/task5/binom_hist_n{n}_p{str(p).replace(".", "p")}.png')
        plt.close()
    df = pd.DataFrame(records)
    df.to_csv('lab4_outputs/task5/sim_stats.csv', index=False)
    plt.figure(figsize=(6,4))
    plt.plot(ps, logL - np.max(logL), marker='o')
    plt.axvline(phat, color='green', linestyle='--', label=f'phat={phat:.3f}')
    plt.axvline(mle_p, color='orange', linestyle=':', label=f'mle≈{mle_p:.3f}')
    plt.title('Log-likelihood (shifted) for p')
    plt.xlabel('p')
    plt.ylabel('logL - max(logL)')
    plt.legend()
    plt.tight_layout()
    plt.savefig('lab4_outputs/task5/loglik_curve.png')
    plt.close()
    return {'phat': phat, 'ci95': ci95, 'mle_p_grid': float(mle_p), 'p_grid': ps.tolist()}

task5_summary = task5()
task5_summary

{'phat': 0.97,
 'ci95': (0.9365649046659053, 1.0),
 'mle_p_grid': 0.97,
 'p_grid': [0.8,
  0.8375,
  0.875,
  0.9125,
  0.95,
  0.955,
  0.96,
  0.965,
  0.97,
  0.975,
  0.98,
  0.985,
  0.99,
  0.995]}