In [None]:
# ──────────────────────────────
# Day 5 — Probability & Statistics for Machine Learning
# ──────────────────────────────

"""
# Day 5 — Probability & Statistics for Machine Learning

**Author:** Dhairya Patel  

This notebook covers:
1. Random variables & distributions
2. Mean, median, mode
3. Variance & standard deviation
4. Probability basics
5. Visualization of distributions
"""


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, bernoulli, binom


In [None]:
"""
## 1) Random Variables & Distributions
We can simulate random variables using NumPy.
"""


In [None]:
# Uniform random variable
uniform_rv = np.random.uniform(0, 1, 1000)

plt.hist(uniform_rv, bins=20, edgecolor='black')
plt.title("Uniform Distribution (0,1)")
plt.show()


In [None]:
"""
## 2) Mean, Median, Mode
"""


In [None]:
data = [1, 2, 2, 3, 4, 5, 100]

mean = np.mean(data)
median = np.median(data)

# Mode (manual since scipy.mode is deprecated in newer versions)
values, counts = np.unique(data, return_counts=True)
mode = values[np.argmax(counts)]

mean, median, mode


In [None]:
"""
## 3) Variance & Standard Deviation
Measures how spread out data is.
"""


In [None]:
variance = np.var(data)
std_dev = np.std(data)

variance, std_dev


In [None]:
"""
## 4) Probability Basics
Simple probability experiments using Bernoulli and Binomial distributions.
"""


In [None]:
# 1 = success, 0 = failure
bernoulli_trials = bernoulli.rvs(p=0.5, size=1000)

plt.hist(bernoulli_trials, bins=2, edgecolor='black')
plt.title("Bernoulli Distribution (p=0.5)")
plt.show()


In [None]:
binom_data = binom.rvs(n=10, p=0.5, size=1000)

plt.hist(binom_data, bins=10, edgecolor='black')
plt.title("Binomial Distribution (n=10, p=0.5)")
plt.show()


In [None]:
normal_data = np.random.normal(0, 1, 1000)

plt.hist(normal_data, bins=30, density=True, edgecolor='black', alpha=0.6)

# PDF curve
x = np.linspace(-4, 4, 100)
plt.plot(x, norm.pdf(x, 0, 1), 'r', label='PDF')
plt.legend()
plt.title("Normal Distribution (mean=0, std=1)")
plt.show()


In [None]:
"""
---

### Notes
- Probability helps model uncertainty in ML.
- Statistics (mean, variance) summarize data.
- Distributions (Normal, Bernoulli, Binomial) appear everywhere in ML.

**End of Day 5.**
"""
