# Implementing Maximum Likelihood Estimation (MLE) for Different Distributions

## ðŸ“š Learning Objectives

By completing this notebook, you will:
- Implement Maximum Likelihood Estimation for different distributions
- Understand the likelihood function and log-likelihood
- Estimate parameters using MLE for Gaussian, Poisson, and Bernoulli distributions
- Compare MLE estimates with true parameters

## ðŸ”— Prerequisites

- âœ… Understanding of probability distributions
- âœ… Understanding of optimization concepts
- âœ… Python, NumPy, SciPy knowledge

---

## Official Structure Reference

This notebook covers practical activities from **Course 03, Unit 5**:
- Implementing Maximum Likelihood Estimation (MLE) for different distributions
- **Source:** `DETAILED_UNIT_DESCRIPTIONS.md` - Unit 5 Practical Content

---

## Introduction

**Maximum Likelihood Estimation (MLE)** is a method for estimating parameters of a probability distribution by maximizing the likelihood function, which measures how likely the observed data is given the parameter values.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.optimize import minimize_scalar, minimize
import warnings
warnings.filterwarnings('ignore')

print("âœ… Libraries imported!")
print("\nImplementing Maximum Likelihood Estimation (MLE)")
print("=" * 60)

## Part 1: MLE for Gaussian Distribution


In [None]:
print("=" * 60)
print("Part 1: MLE for Gaussian Distribution")
print("=" * 60)
# Generate data from Gaussian distribution
np.random.seed(42)
true_mu = 5.0
true_sigma = 2.0
data = np.random.normal(true_mu, true_sigma, 100)
print(f"\nTrue parameters:")
print(f" Î¼ (mean): {true_mu}")
print(f" Ïƒ (std): {true_sigma}")
print(f" Sample size: {len(data)}")
# MLE for Gaussian: sample mean and sample std
mle_mu = np.mean(data)
mle_sigma = np.std(data, ddof=0) # MLE uses N, not N-1
print(f"\nMLE estimates:")
print(f" Î¼_MLE (sample mean): {mle_mu:.4f}")
print(f" Ïƒ_MLE (sample std): {mle_sigma:.4f}")
# Negative log-likelihood function (we minimize this)
def neg_log_likelihood_gaussian(params, data):
    """Negative log-likelihood for Gaussian distribution"""
    mu, sigma = params
    if sigma <= 0:
        return np.inf
    n = len(data)
    log_likelihood = -n * np.log(sigma * np.sqrt(2 * np.pi)) - np.sum((data - mu)**2) / (2 * sigma**2)
    return -log_likelihood # Return negative for minimization
    """Negative log-likelihood for Gaussian distribution"""
    mu, sigma = params
    if sigma <= 0:
        return np.inf
    n = len(data)
    log_likelihood = -n * np.log(sigma * np.sqrt(2 * np.pi)) - np.sum((data - mu)**2) / (2 * sigma**2)
    return -log_likelihood # Return negative for minimization
# Optimize using scipy
result = minimize(lambda p: neg_log_likelihood_gaussian(p, data), 
 x0=[mle_mu, mle_sigma], 
 method='BFGS')
mle_mu_opt = result.x[0]
mle_sigma_opt = result.x[1]
print(f"\nMLE from optimization:")
print(f" Î¼_MLE: {mle_mu_opt:.4f}")
print(f" Ïƒ_MLE: {mle_sigma_opt:.4f}")
# Visualize
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.hist(data, bins=20, density=True, alpha=0.7, color='blue', label='Data')
x = np.linspace(data.min(), data.max(), 100)
plt.plot(x, stats.norm.pdf(x, true_mu, true_sigma), 'r-', linewidth=2, label=f'True (Î¼={true_mu}, Ïƒ={true_sigma})')
plt.plot(x, stats.norm.pdf(x, mle_mu, mle_sigma), 'g--', linewidth=2, label=f'MLE (Î¼={mle_mu:.2f}, Ïƒ={mle_sigma:.2f})')
plt.xlabel('Value')
plt.ylabel('Density')
plt.title('Gaussian Distribution MLE')
plt.legend()
plt.grid(True, alpha=0.3)
plt.subplot(1, 2, 2)
mu_range = np.linspace(3, 7, 100)
log_likelihoods = [-neg_log_likelihood_gaussian([mu, mle_sigma], data) for mu in mu_range]
plt.plot(mu_range, log_likelihoods, 'b-', linewidth=2)
plt.axvline(true_mu, color='r', linestyle='--', label=f'True Î¼={true_mu}')
plt.axvline(mle_mu, color='g', linestyle='--', label=f'MLE Î¼={mle_mu:.2f}')
plt.xlabel('Î¼ (mean)')
plt.ylabel('Log-Likelihood')
plt.title('Log-Likelihood vs Mean')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print("\nâœ… MLE for Gaussian distribution implemented!")


## Part 2: MLE for Poisson Distribution


In [None]:
print("\n" + "=" * 60)
print("Part 2: MLE for Poisson Distribution")
print("=" * 60)

# Generate data from Poisson distribution
np.random.seed(42)
true_lambda = 3.5
data_poisson = np.random.poisson(true_lambda, 100)

print(f"\nTrue parameter:")
print(f" Î» (rate): {true_lambda}")
print(f" Sample size: {len(data_poisson)}")

# MLE for Poisson: sample mean
mle_lambda = np.mean(data_poisson)

print(f"\nMLE estimate:")
print(f" Î»_MLE (sample mean): {mle_lambda:.4f}")

# Negative log-likelihood
def neg_log_likelihood_poisson(lambda_param, data):
 
    
    
    """Negative log-likelihood for Poisson distribution"""
 if lambda_param <= 0:
 return np.inf
 n = len(data)
 log_likelihood = np.sum(data) * np.log(lambda_param) - n * lambda_param - np.sum([np.math.lgamma(x + 1) for x in data])
 return -log_likelihood

# Visualize
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
unique, counts = np.unique(data_poisson, return_counts=True)
plt.bar(unique, counts/len(data_poisson), alpha=0.7, color='blue', label='Data (empirical)')
x = np.arange(0, max(unique) + 3)
plt.plot(x, stats.poisson.pmf(x, true_lambda), 'ro-', markersize=8, label=f'True (Î»={true_lambda})')
plt.plot(x, stats.poisson.pmf(x, mle_lambda), 'go--', markersize=8, label=f'MLE (Î»={mle_lambda:.2f})')
plt.xlabel('Value')
plt.ylabel('Probability')
plt.title('Poisson Distribution MLE')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
lambda_range = np.linspace(2, 5, 100)
log_likelihoods = [-neg_log_likelihood_poisson(lam, data_poisson) for lam in lambda_range]
plt.plot(lambda_range, log_likelihoods, 'b-', linewidth=2)
plt.axvline(true_lambda, color='r', linestyle='--', label=f'True Î»={true_lambda}')
plt.axvline(mle_lambda, color='g', linestyle='--', label=f'MLE Î»={mle_lambda:.2f}')
plt.xlabel('Î» (rate)')
plt.ylabel('Log-Likelihood')
plt.title('Log-Likelihood vs Î»')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nâœ… MLE for Poisson distribution implemented!")

## Part 3: MLE for Bernoulli Distribution


In [None]:
print("\n" + "=" * 60)
print("Part 3: MLE for Bernoulli Distribution")
print("=" * 60)

# Generate data from Bernoulli distribution
np.random.seed(42)
true_p = 0.6
data_bernoulli = np.random.binomial(1, true_p, 100)

print(f"\nTrue parameter:")
print(f" p (success probability): {true_p}")
print(f" Sample size: {len(data_bernoulli)}")
print(f" Number of successes: {data_bernoulli.sum()}")

# MLE for Bernoulli: sample proportion
mle_p = np.mean(data_bernoulli)

print(f"\nMLE estimate:")
print(f" p_MLE (sample proportion): {mle_p:.4f}")

# Visualize
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
unique, counts = np.unique(data_bernoulli, return_counts=True)
plt.bar(unique, counts/len(data_bernoulli), alpha=0.7, color='blue', label='Data (empirical)')
x = np.array([0, 1])
plt.plot(x, stats.bernoulli.pmf(x, true_p), 'ro-', markersize=15, label=f'True (p={true_p})')
plt.plot(x, stats.bernoulli.pmf(x, mle_p), 'go--', markersize=15, label=f'MLE (p={mle_p:.2f})')
plt.xlabel('Value')
plt.ylabel('Probability')
plt.title('Bernoulli Distribution MLE')
plt.xticks([0, 1])
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
p_range = np.linspace(0.01, 0.99, 100)
# Log-likelihood: k*log(p) + (n-k)*log(1-p) where k = sum(data)
k = data_bernoulli.sum()
n = len(data_bernoulli)
log_likelihoods = k * np.log(p_range) + (n - k) * np.log(1 - p_range)
plt.plot(p_range, log_likelihoods, 'b-', linewidth=2)
plt.axvline(true_p, color='r', linestyle='--', label=f'True p={true_p}')
plt.axvline(mle_p, color='g', linestyle='--', label=f'MLE p={mle_p:.2f}')
plt.xlabel('p (probability)')
plt.ylabel('Log-Likelihood')
plt.title('Log-Likelihood vs p')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nâœ… MLE for Bernoulli distribution implemented!")

## Summary

### Key Concepts:
1. **Likelihood Function**: Measures probability of observing data given parameters
2. **Maximum Likelihood Estimation**: Finds parameters that maximize likelihood
3. **Log-Likelihood**: Often easier to work with (converts products to sums)
4. **MLE for Common Distributions**:
   - Gaussian: sample mean and sample std
   - Poisson: sample mean
   - Bernoulli: sample proportion

### Best Practices:
- Use log-likelihood to avoid numerical underflow
- Verify constraints (e.g., Ïƒ > 0, 0 < p < 1)
- Compare MLE estimates with true parameters
- Understand when MLE is appropriate

### Applications:
- Parameter estimation
- Model fitting
- Statistical inference
- Machine learning (loss functions)

**Reference:** Course 03, Unit 5: "Probability and Statistical Inference" - Maximum Likelihood Estimation practical content