<a href="https://colab.research.google.com/github/jburchfield76/datasharing/blob/master/MLE_Chi_Square.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.optimize import minimize
from scipy.special import gammaln  # Log gamma function

# Step 1: Generate synthetic data from a Chi-Square distribution
np.random.seed(42)  # For reproducibility
true_k = 5          # True degrees of freedom
n = 100             # Sample size

data = np.random.chisquare(df=true_k, size=n)  # Generate Chi-Square distributed data

# Step 2: Define the log-likelihood function
def neg_log_likelihood(k, data):
    """
    Computes the negative log-likelihood for the Chi-Square distribution.
    Since optimization functions minimize, we take the negative of log-likelihood.
    """
    if k <= 0:  # k must be positive
        return np.inf
    log_likelihood = np.sum((k/2 - 1) * np.log(data) - data/2 - (k/2) * np.log(2) - gammaln(k/2))
    return -log_likelihood  # We return the negative log-likelihood for minimization

# Step 3: Optimize to find MLE for k
result = minimize(neg_log_likelihood, x0=[2], args=(data,), bounds=[(0.1, None)])  # Start search at k=2
mle_k = result.x[0]  # Extract MLE estimate for k

# Print results
print(f"True k: {true_k}, MLE estimated k: {mle_k:.4f}")

# Step 4: Visualizing the results
x = np.linspace(0, max(data), 1000)
true_pdf = stats.chi2.pdf(x, df=true_k)  # True Chi-Square PDF
mle_pdf = stats.chi2.pdf(x, df=mle_k)  # MLE estimated Chi-Square PDF

plt.hist(data, bins=15, density=True, alpha=0.6, color='g', label="Sample Data")
plt.plot(x, true_pdf, 'r--', label="True Distribution")
plt.plot(x, mle_pdf, 'b-', label="MLE Estimated Distribution")
plt.xlabel("Value")
plt.ylabel("Density")
plt.title("MLE Estimation of Chi-Square Distribution")
plt.legend()
plt.show()
