In [5]:
import numpy as np

In [6]:
data = np.array([
    -0.39,  0.12,  0.94,  1.67,  1.76,  2.44,  3.72,  4.28,  4.92,  5.53,
     0.06,  0.48,  1.01,  1.68,  1.80,  3.25,  4.12,  4.60,  5.28,  6.22
])
N = data.shape[0]

In [7]:
np.random.seed(42)

# Randomly choose initial means from data

In [8]:
# Number of Gaussian components
K = 2
mu = np.random.choice(data, K, replace=False)

In [9]:
# Initializing variances to the overall variance of the data
sigma2 = np.full(K, data.var())

# Initializing mixing coefficients equally
pi = np.full(K, 1.0 / K)

max_iters = 100
tol = 1e-6

print("Initial means:", mu)
print("Initial variances:", sigma2)
print("Initial mixing coefficients:", pi)

Initial means: [-0.39  4.6 ]
Initial variances: [3.96777475 3.96777475]
Initial mixing coefficients: [0.5 0.5]


# Gaussian PDF

In [10]:
def gaussian(x, mu, var):
    return (1 / np.sqrt(2 * np.pi * var)) * np.exp(-0.5 * ((x - mu) ** 2) / var)

# EM Loop

In [11]:
log_likelihood_prev = -np.inf

for iteration in range(max_iters):
    resp_unnorm = np.zeros((N, K))
    for k in range(K):
        resp_unnorm[:, k] = pi[k] * gaussian(data, mu[k], sigma2[k])
    density = resp_unnorm.sum(axis=1)             
    resp = resp_unnorm / density[:, np.newaxis]   

    # --- M-step: update parameters ---
    N_k = resp.sum(axis=0)                        
    mu = (resp * data[:, np.newaxis]).sum(axis=0) / N_k
    sigma2 = (resp * (data[:, np.newaxis] - mu) ** 2).sum(axis=0) / N_k
    pi = N_k / N                                  

    # --- Check convergence via log-likelihood ---
    log_likelihood = np.sum(np.log(density))
    if abs(log_likelihood - log_likelihood_prev) < tol:
        print(f"Converged after {iteration+1} iterations")
        break
    log_likelihood_prev = log_likelihood
else:
    print("Reached maximum iterations without full convergence")

Converged after 23 iterations


# Results

In [12]:
print("Estimated means:     ", mu)
print("Estimated variances: ", sigma2)
print("Mixing coefficients: ", pi)

Estimated means:      [1.08300458 4.65573178]
Estimated variances:  [0.81111563 0.81905761]
Mixing coefficients:  [0.55454326 0.44545674]
