# PSL Coding Assignment 4

Members: Amy Hwang, Christian Tam, Monil Kaneria

Contributions:

In [53]:
import numpy as np
import math
from scipy.stats import norm

## Part 1: Gaussian Mixtures

### Functions

In [54]:
# Return an n-by-G matrix, where the (i, j)th entry is the conditional probability P(Zi = k | xi). 
# i ranges from 1 to n and k ranges from 1 to G.

def eStep(sigma, G, p, x, mu):
    U, D, UT = np.linalg.svd(sigma)
    dBar = np.diag(1.0 / np.sqrt(D))

    xBar = x @ UT @ dBar
    muBar = mu @ UT @ dBar

    diff = xBar[:, np.newaxis, :] - muBar
    distances = np.sum(diff ** 2, axis=2)

    probs = np.exp(distances * -0.5)
    probs *= p
    probs = (probs / probs.sum(axis=1, keepdims=True))

    return probs

In [55]:
#  Return the updated parameters for the Gaussian mixture model.

# Input:
#    data: nxp matrix
#    mu_k: mean vector for component k
#    p: nx1 vector, the probability of each sample belonging to the kth component

def mStep(data, probs, G):
    n, d = data.shape
    sigma_new = np.zeros((G, d, d))

    weighted_sums = probs.sum(axis=0)
    p_new = weighted_sums / n
    mu_new = (probs.T @ data) / weighted_sums[:, np.newaxis]
    weighted_cov = np.zeros((d, d))
    for k in range(G):
        x_centered = data - mu_new[k]
        weighted_cov += (probs[:, k][:, np.newaxis] * x_centered).T @ x_centered
    sigma_new = weighted_cov / np.sum(weighted_sums)

    return p_new, mu_new, sigma_new

In [56]:
#  Computes the log-likelihood of the data given the parameters.
# Input:
#    data: nxp matrix
#    mu: mean vector for component k
#    p: nx1 vector, the probability of each sample belonging to the kth component

def logik(data, G, sigma, mu, p):
    density = np.zeros((np.shape(data)[0], G))
    
    # Calculate the multivariable normal pdf
    for k in range(G):
        # calculate mahalanobis distance between each data point and mean of component k
        mu_k = mu[k]
        data_mu = data - mu_k
        inv_cov = np.linalg.inv(sigma)
        left = np.dot(data_mu, inv_cov)
        mahal = np.dot(left, data_mu.T)
        mahal_distance = mahal.diagonal()

        # Use mahalanobis distance and the determinant of the covariance matrix of component k to get the multivar normal pdf.
        normal_pdf = np.exp(-0.5 * mahal_distance) / (2 * math.pi * np.linalg.det(sigma))

        # Multiply the pdf by the mixture weight p_k to get the probability density of the data point under component k.
        density[:, k] = p[k] @ normal_pdf

    # Get the log of the sum of the probability densities across all components
    log_row_sums = np.log(np.sum(density, axis=1))

    # Sum the log-likelihoods of all data points to get the total log-likelihood.
    log_likelihood = np.sum(log_row_sums)
    
    return log_likelihood

In [57]:
# Main function. Call the Estep and Mstep functions. Returns the estimated parameters and log-likelihood (via
# the loglik function)
#
# Input:
#   data: the dataset.
#   G: The number of components.
#   params: Initial parameters.
#   itmax: The number of iterations.
# Output:
#   prob: A G-dimensional probability vector (p1,…,pG)
#   mean: A p-by-G matrix with the k-th column being μk, the p-dimensional mean for the k-th Gaussian component.
#   Sigma: A p-by-p covariance matrix Σ shared by all G components

def myEM(data, G, sigma_init, mu_init, p_init, itmax):
    sigma = sigma_init
    mu = mu_init
    p = p_init
    pi = np.zeros((G))
    li_threshold = 1e-3
    li_previous = 0
    li_current = 1
    loop_count = 0

    for i in range(itmax):
    # while abs(li_current - li_previous) > li_threshold or loop_count < itmax:
        # Call Estep to get the updated probability matrix
        probs = eStep(sigma, G, p, data, mu)

        # Call Mstep to get the updated parameters
        p, mu, sigma = mStep(data, probs, G)

        # Call logik to get the log-likelihood of the data given the updated parameters
        # li_current = loglik(sigma, G, p, data, mu)
            
    return p, mu.T, sigma

### Testing

In [58]:
data = []
with open('faithful.dat') as faithful:
    for row in faithful:
        data.append(row.split()[1:])
        
    # convert to float values
    data = np.array([[float(value) for value in row] for row in data[1:]])

#### Case 1: G = 2

In [59]:
G = 2
itmax = 20
n = len(data)
p1 = 10 / n
p2 = 1 - p1
p_init = [p1, p2]

cluster_1 = data[:10]
cluster_2 = data[10:]
mu1 = np.mean(cluster_1, axis=0)
mu2 = np.mean(cluster_2, axis=0)
mu_init = np.array([mu1, mu2])

cov_matrix_1 = np.sum([(i - mu1).reshape(-1, 1) @ (i - mu1).reshape(-1, 1).T for i in cluster_1], axis=0)
cov_matrix_2 = np.sum([(i - mu2).reshape(-1, 1) @ (i - mu2).reshape(-1, 1).T for i in cluster_2], axis=0)
# 2x2 matrix
cov_matrix_init = (cov_matrix_1 + cov_matrix_2) / n

In [63]:
prob, mean, Sigma = myEM(data, G, cov_matrix_init, mu_init, p_init, itmax)
print("prob")
print(prob)

print("mean")
print(mean)

print("Sigma")
print(Sigma)

# print("Prob: " + prob + "\nMean: " + mean + "\nSigma: " + Sigma + "\nlogik_val: " + logik_val)

prob
[0.04363422 0.07718656 0.87917922]
mean
[[ 3.51006918  2.81616674  3.54564083]
 [77.10563811 63.35752634 71.25084801]]
Sigma
[[  1.26015772  13.51153756]
 [ 13.51153756 177.96419105]]


#### Case 2: G = 3

In [61]:
G = 3
itmax = 20
x = data
n = len(x)
p1 = 10 / n
p2 = 20 / n
p3 = 1 - p1 - p2
p_init = [p1, p2, p3]

cluster_1 = x[:10]
cluster_2 = x[10:30]
cluster_3 = x[30:]
mu1 = np.mean(cluster_1, axis=0)
mu2 = np.mean(cluster_2, axis=0)
mu3 = np.mean(cluster_3, axis=0)
mu_init = np.array([mu1, mu2, mu3])

cov_matrix_1 = np.sum([(i - mu1).reshape(-1, 1) @ (i - mu1).reshape(-1, 1).T for i in cluster_1], axis=0)
cov_matrix_2 = np.sum([(i - mu2).reshape(-1, 1) @ (i - mu2).reshape(-1, 1).T for i in cluster_2], axis=0)
cov_matrix_3 = np.sum([(i - mu3).reshape(-1, 1) @ (i - mu3).reshape(-1, 1).T for i in cluster_3], axis=0)

# 2x2 matrix
cov_matrix_init = (cov_matrix_1 + cov_matrix_2 + cov_matrix_3) / n


In [65]:
prob, mean, Sigma = myEM(data, G, cov_matrix_init, mu_init, p_init, itmax)

print("prob")
print(prob)

print("mean")
print(mean)

print("Sigma")
print(Sigma)

# print("Prob: " + prob + "\nMean: " + mean + "\nSigma: " + Sigma + "\nlogik_val: " + logik_val)

prob
[0.04363422 0.07718656 0.87917922]
mean
[[ 3.51006918  2.81616674  3.54564083]
 [77.10563811 63.35752634 71.25084801]]
Sigma
[[  1.26015772  13.51153756]
 [ 13.51153756 177.96419105]]


## Part 2: HMM

### Functions

In [None]:
def BS_onestep(data, ):
    

In [None]:
def myBW():
    

In [None]:
def myViterbi(data, mx, mz, w, A, B):
    

### Testing

Part 1

In [None]:
mz = 2
w = [0.5, 0.5]
A = [[0.5, 0.5], [0.5, 0.5]]
B = [[1/9, 3/9, 5/9], [1/6, 2/6, 3/6]]


Part 2

In [None]:
mz = 2
w = [0.5, 0.5]
A = [[0.5, 0.5], [0.5, 0.5]]
B = [[1/3, 1/3, 1/3], [1/3, 1/3, 1/3]]

# Run Baum-Welch algorithm for 20 iterations

# Run Baum-Welch algorithm for 100 iterations



Explain why the resulting A and B matrices had these outcomes. You should understand why we cannot initialize our parameters in a way that
makes the latent states indistinguishable.