## Maximum Likelihood Estimation

We begin with a random process -- a sequence of events that may influence each other non-deterministically. In such a process, the initial state can predict subsequent state(s) only to some extent and never perfectly.

In [1]:
import numpy as np; np.random.seed(666)
import matplotlib.pyplot as plt

class RandomProcess():
    def __init__(self, prob_func, **params):
        self.prob_func = prob_func
        self.params = params
        
    def sample(self):
        return self.prob_func(**self.params, size=1)[0]

        
def make_observations(N, process):
    return np.array([process.sample() for i in range(N)])

## Collect data

In [2]:
MU = 5   # population mean
SIGMA = 2 # population SD
n = 100      # sample size
        
Gaussian_process = RandomProcess(
    prob_func = np.random.normal,
    loc = MU,
    scale = SIGMA
)

X = make_observations(n, Gaussian_process)

## Assumptions

In [3]:
# Assumption 1
def p_sample(X):
    return np.prod(X)

# Assumption 2
def pdf_gauss(X, mu, sigma):
    exp_numer = (X - mu)**2
    exp_denom = 2*(sigma**2)
    exp_term = exp_numer/exp_denom
    normalizer = sigma*np.sqrt(np.pi*2)
    return np.exp(-exp_term)/normalizer

## Approach 1: brute force parameter estimation

We can try a brute force method and just iterate over different parameter values and find the maximum. However, this approach depends too much on luck.

In [4]:
# Probability of our sample given the population params
mus = np.arange(0,10,.1)
sigmas = np.arange(0.001,3,.1)

running_max = -1
for mu in mus:
    for sigma in sigmas:
        prob_given_params = p_sample(pdf_gauss(X, mu, sigma))
        if prob_given_params > running_max:
            running_max = prob_given_params
            best_mu = mu
            best_sigma = sigma
        
print(best_mu, best_sigma)

5.0 1.901
