In [9]:
import numpy as np
import math
from matplotlib import pyplot as plt

### 4.1 K-means and K-medoids

In [4]:
P = np.array([
    [0,-6],
    [4,4],
    [0,0],
    [-5,2]
])

In [5]:
def l1_norm(x, y):
    return np.abs(np.sum(np.abs(x)) - np.sum(np.abs(y)))

def l2_norm(x, y):
    return np.abs(np.sqrt(np.sum(np.abs(x)**2)) - np.sqrt(np.sum(np.abs(y)**2)))

In [6]:
def kmedoids(a, k, centers, norm_fn=l1_norm):
    clusters = [ [] for _ in range(k)]
    
    for a_i in a:
        dist = norm_fn(a_i, centers[0]) - norm_fn(a_i, centers[1])
        clusters[0].append(a_i) if dist <= 0 else clusters[1].append(a_i)

    print(clusters)

#### Clustering 1

l1 norm = [[array([4, 4]), array([-5,  2])], [array([ 0, -6]), array([0, 0])]]

Centrals = [4,4] and [0,-6]

In [7]:
kmedoids(a=P, k=2, centers=np.array([[-5,2], [0,-6]]))

[[array([4, 4]), array([-5,  2])], [array([ 0, -6]), array([0, 0])]]


#### Clustering 2

l2 norm = [[array([4, 4]), array([0, 0]), array([-5,  2])], [array([ 0, -6])]]

Centrals remains the same

In [8]:
kmedoids(a=P, k=2, centers=np.array([[-5,2], [0,-6]]), norm_fn=l2_norm)

[[array([4, 4]), array([0, 0]), array([-5,  2])], [array([ 0, -6])]]


#### Clustering 3

K-means with l1 norm, should have used np.median(a, axis=0). 

Centrals will be some point not in the original dataset

### 4.2 Maximum Likelihood Estimation

theta_A = 0.42857
theta_B = 0.35714
theta_C = 0.2142857

ABC = 0.032798459
BBB = 0.045552842
ABB = 0.054663666
AAC = 0.039358335

### 4.3 EM Algorithm


In [4]:
pi_1 = 0.5
pi_2 = 0.5
mu_1 = 6
mu_2 = 7
sigma_1 = 1
sigma_2 = 4

x = np.array([
    -1,
    0,
    4,
    5,
    6
])

In [15]:
def pdf_gaussian(x, mean, variance):
    return (1/np.sqrt(2*np.pi * variance)) * np.exp(-((x - mean)**2/(2*variance)))


In [17]:
#data log-likelihood p(x;theta) = pi_1 * gaussian(params 1) + pi_2 * gaussian(params 2)

result = np.log(pi_1 * pdf_gaussian(x, mu_1, sigma_1) + pi_2 * pdf_gaussian(x, mu_2, sigma_2))

print(np.sum(result))

-24.512532330086678


In [22]:
weights_1 = np.log(pi_1 * pdf_gaussian(x, mu_1, sigma_1))
weights_2 = np.log(pi_2 * pdf_gaussian(x, mu_2, sigma_2)) 

print(weights_1 > weights_2)

[False False False  True  True]
