In [1]:
from scipy.stats import multivariate_normal
from random import randint
import numpy as np

In [2]:
x, y = [], []
file = open("3gaussian.txt", 'r')
k = 3
for line in file:
    x.append(float(line.split()[0]))
    y.append(float(line.split()[1]))

In [3]:
u = [[2,2], [6,3], [4,6]]
cov_mat = [[[1,0],[0,2]], [[1,0.9], [0.9,1]], [[1, 0.9], [0.9, 1]]]
weights = {0: 0.323, 1: 0.328, 2: 0.349}
pi_0 = {}
pi_1 = {}
pi_2 = {}

In [4]:
def probablity(data_point, u, cov_mat):
    var = multivariate_normal(mean=u, cov=cov_mat)
    prob = var.pdf([data_point[0], data_point[1]])
    return prob

In [5]:
def expectation():
    for i in range(0, len(x)):
        data_point = (x[i], y[i])
        probs = []
        for j in range(0, k):
            prob = probablity(data_point, u[j], cov_mat[j])
            if j == 0:
                pi_0[data_point] = prob * weights[0]
            elif j == 1:
                pi_1[data_point] = prob * weights[1]
            else:
                pi_2[data_point] = prob * weights[2]
            probs.append((prob, j))
        denom = 0
        for prob in probs:
            label = prob[1]
            denom += prob[0] * weights[label]

        pi_0[data_point] /= denom
        pi_1[data_point] /= denom
        pi_2[data_point] /= denom

In [6]:
def maximization():
    weights[0] = sum(list(pi_0.values())) / len(x)
    weights[1] = sum(list(pi_1.values())) / len(x)
    weights[2] = sum(list(pi_2.values())) / len(x)
    sum_u0_x = 0
    sum_u0_y = 0
    sum_u1_x = 0
    sum_u1_y = 0
    sum_u2_x = 0
    sum_u2_y = 0
    for i in range(0, len(x)):
        data_point = (x[i], y[i])
        p0 = pi_0[data_point]
        p1 = pi_1[data_point]
        p2 = pi_2[data_point]
        sum_u0_x += (p0 * x[i])
        sum_u0_y += (p0 * y[i])
        sum_u1_x += (p1 * x[i])
        sum_u1_y += (p1 * y[i])
        sum_u2_x += (p2 * x[i])
        sum_u2_y += (p2 * y[i])
    sum_u0_x /= sum(list(pi_0.values()))
    sum_u0_y /= sum(list(pi_0.values()))
    sum_u1_x /= sum(list(pi_1.values()))
    sum_u1_y /= sum(list(pi_1.values()))
    sum_u2_x /= sum(list(pi_2.values()))
    sum_u2_y /= sum(list(pi_2.values()))
    u[0][0] = sum_u0_x
    u[0][1] = sum_u0_y
    u[1][0] = sum_u1_x
    u[1][1] = sum_u1_y
    u[2][0] = sum_u2_x
    u[2][1] = sum_u2_y
    update_cov_mat()

In [7]:
def update_cov_mat():
    final_cov_mat0 = [[0,0], [0,0]]
    final_cov_mat1 = [[0,0], [0,0]]
    final_cov_mat2 = [[0,0], [0,0]]
    for i in range(len(x)):
        data_point = (x[i], y[i])
        prob0 = pi_0[data_point]
        prob1 = pi_1[data_point]
        prob2 = pi_2[data_point]
        
        factor01 = [data_point[0] - u[0][0], data_point[1] - u[0][1]]
        factor01 = np.array(factor01).reshape(1,-1)
        factor02 = np.dot(factor01.reshape(-1,1), factor01)
        factor03 = prob0 * factor02
        
        for m in range(2):
            for n in range(2):
                final_cov_mat0[m][n] += factor03[m][n]
        
        factor11 = [data_point[0] - u[1][0], data_point[1] - u[1][1]]
        factor11 = np.array(factor11).reshape(1,-1)
        factor12 = np.dot(factor11.reshape(-1,1), factor11)
        factor13 = prob1 * factor12
        
        for m in range(2):
            for n in range(2):
                final_cov_mat1[m][n] += factor13[m][n]
                
        factor21 = [data_point[0] - u[2][0], data_point[1] - u[2][1]]
        factor21 = np.array(factor21).reshape(1,-1)
        factor22 = np.dot(factor21.reshape(-1,1), factor21)
        factor23 = prob2 * factor22
        
        for m in range(2):
            for n in range(2):
                final_cov_mat2[m][n] += factor23[m][n]
                
    for m in range(2):
        for n in range(2):
            final_cov_mat0[m][n] = final_cov_mat0[m][n] / sum(list(pi_0.values()))
    
    for m in range(2):
        for n in range(2):
            final_cov_mat1[m][n] = final_cov_mat1[m][n] / sum(list(pi_1.values()))
    
    for m in range(2):
        for n in range(2):
            final_cov_mat2[m][n] = final_cov_mat2[m][n] / sum(list(pi_2.values()))
    
    cov_mat[0] = final_cov_mat0
    cov_mat[1] = final_cov_mat1
    cov_mat[2] = final_cov_mat2

In [8]:
def repeat_untill_convergence():
    i = 0
    while(i <= 76):
        print("Iteration: " + str(i))
        expectation()
        print("Expectation done")
        maximization()
        print("Maximization done")
        print("Mean 1: ", end = " ")
        print(u[0])
        print("Mean 2: ", end = " ")
        print(u[1])
        print("Mean 3: ", end = " ")
        print(u[2])
        print("Covariance 1: ", end = " ")
        print(cov_mat[0])
        print("Covariance 2: ", end = " ")
        print(cov_mat[1])
        print("Covariance 3: ", end = " ")
        print(cov_mat[2])
        print("Weights: ", end = " ")
        print(weights)
        print("=============================================")
        i += 1

In [9]:
repeat_untill_convergence()

Iteration: 0
Expectation done
Maximization done
Mean 1:  [3.2173589954742741, 3.573353315400063]
Mean 2:  [6.916258079078812, 3.9308918720488526]
Mean 3:  [4.9076929630955277, 6.8763679118867609]
Covariance 1:  [[1.5157658896686761, 0.69294070283565223], [0.69294070283565223, 5.6225548964876833]]
Covariance 2:  [[1.246548567972138, 0.82193639005949826], [0.82193639005949826, 1.3083416716337857]]
Covariance 3:  [[1.0748014947950191, 0.57963505179417196], [0.57963505179417196, 1.095612464344738]]
Weights:  {0: 0.19737083254298637, 1: 0.31466285235763547, 2: 0.48796631509937743}
Iteration: 1
Expectation done
Maximization done
Mean 1:  [3.1968331859618258, 3.5136618322255178]
Mean 2:  [6.9712402744356563, 4.0018247222554146]
Mean 3:  [4.9758093570545814, 6.9465976287983091]
Covariance 1:  [[1.3072651630082799, 0.65469137898380714], [0.65469137898380714, 5.1640511695743472]]
Covariance 2:  [[1.1082639050692749, 0.60584403938582287], [0.60584403938582287, 1.1285158069734587]]
Covariance 3:  

Maximization done
Mean 1:  [3.0790521756733331, 3.1614731451579074]
Mean 2:  [7.0287199747664904, 4.0195005928565752]
Mean 3:  [5.0305549680934165, 7.0174842679315486]
Covariance 1:  [[1.0586891500451348, 0.10509818698319663], [0.10509818698319663, 3.6201082951476278]]
Covariance 2:  [[0.97848208297651762, 0.49657805185426429], [0.49657805185426429, 0.99590069981704421]]
Covariance 3:  [[0.95746723779401266, 0.1727094629411205], [0.1727094629411205, 0.95285737295217132]]
Weights:  {0: 0.21469654927224438, 1: 0.29753154245053348, 2: 0.48777190827722317}
Iteration: 14
Expectation done
Maximization done
Mean 1:  [3.0747426952366541, 3.1488910266917673]
Mean 2:  [7.0280720787462005, 4.0190963618421858]
Mean 3:  [5.028533711339005, 7.0158262320605234]
Covariance 1:  [[1.055423599791236, 0.096010198328354474], [0.096010198328354474, 3.5931464069791477]]
Covariance 2:  [[0.97955151261160689, 0.49695652932272238], [0.49695652932272238, 0.99576850576041698]]
Covariance 3:  [[0.95980928832334655

Maximization done
Mean 1:  [3.0476745905294957, 3.0711564942537697]
Mean 2:  [7.0232167468928512, 4.0163575222129522]
Mean 3:  [5.0156198100806204, 7.0048993027891129]
Covariance 1:  [[1.0346630166150315, 0.04189136489454974], [0.04189136489454974, 3.4307022342599511]]
Covariance 2:  [[0.9876204233197744, 0.49989656936571136], [0.49989656936571136, 0.99555364428025628]]
Covariance 3:  [[0.97500611040564467, 0.18226791523651895], [0.18226791523651895, 0.96995189488070255]]
Weights:  {0: 0.20744421215000672, 1: 0.29822231431873353, 2: 0.49433347353125917}
Iteration: 27
Expectation done
Maximization done
Mean 1:  [3.0467196929962985, 3.068441005334694]
Mean 2:  [7.0230238440024682, 4.016252672978351]
Mean 3:  [5.0151562494154565, 7.0044944086880374]
Covariance 1:  [[1.0339262584171014, 0.040068205096659558], [0.040068205096659558, 3.4251575224325279]]
Covariance 2:  [[0.98794464130124759, 0.50001877553216734], [0.50001877553216734, 0.99556158894186408]]
Covariance 3:  [[0.9755631955228826

Maximization done
Mean 1:  [3.0411887893320944, 3.0527314835119022]
Mean 2:  [7.0218796607759195, 4.0156326714377739]
Mean 3:  [5.0124578868406422, 7.0021194854952071]
Covariance 1:  [[1.0296574139226635, 0.029618562003807105], [0.029618562003807105, 3.3932571980298625]]
Covariance 2:  [[0.98987443939925979, 0.50075267466230811], [0.50075267466230811, 0.99562594417195271]]
Covariance 3:  [[0.97882557747305565, 0.18460378885272047], [0.18460378885272047, 0.97367699960327381]]
Weights:  {0: 0.20594289209580838, 1: 0.29839536385478793, 2: 0.49566174404940283}
Iteration: 40
Expectation done
Maximization done
Mean 1:  [3.0410065626931995, 3.0522143680201554]
Mean 2:  [7.0218411898879971, 4.0156118692050304]
Mean 3:  [5.0123685801290359, 7.0020403584451163]
Covariance 1:  [[1.0295167481423406, 0.029277466027185381], [0.029277466027185381, 3.3922122714084706]]
Covariance 2:  [[0.98993952773192551, 0.50077761886952288], [0.50077761886952288, 0.99562859115101743]]
Covariance 3:  [[0.97893413731

Maximization done
Mean 1:  [3.0399661665291178, 3.0492624381840017]
Mean 2:  [7.0216206081828192, 4.0154926357466634]
Mean 3:  [5.011858183120216, 7.0015874959474695]
Covariance 1:  [[1.0287136389551002, 0.027333907587307006], [0.027333907587307006, 3.3862536649682191]]
Covariance 2:  [[0.99031298416511526, 0.50092097838222738], [0.50092097838222738, 0.9956443410075384]]
Covariance 3:  [[0.97955530445591654, 0.18505885020632121], [0.18505885020632121, 0.97438980816369425]]
Weights:  {0: 0.20565947610008503, 1: 0.29842893748546456, 2: 0.49591158641445265}
Iteration: 53
Expectation done
Maximization done
Mean 1:  [3.0399323153685618, 3.0491664045352045]
Mean 2:  [7.0216134043923315, 4.0154887429019146]
Mean 3:  [5.0118415616423002, 7.0015727297265631]
Covariance 1:  [[1.0286875085386715, 0.027270781206224899], [0.027270781206224899, 3.3860599970638749]]
Covariance 2:  [[0.99032518797098645, 0.50092566988898912], [0.50092566988898912, 0.99564487164042315]]
Covariance 3:  [[0.9795755544160

Expectation done
Maximization done
Mean 1:  [3.0397395551142017, 3.0486195700795471]
Mean 2:  [7.0215723514164781, 4.0154665595824124]
Mean 3:  [5.0117468952593391, 7.0014886077061469]
Covariance 1:  [[1.0285387132979045, 0.026911450621682147], [0.026911450621682147, 3.384957430797241]]
Covariance 2:  [[0.99039474401052852, 0.50095241750450903], [0.50095241750450903, 0.99564791503588179]]
Covariance 3:  [[0.97969091211711246, 0.18514371708299487], [0.18514371708299487, 0.97452231732738026]]
Weights:  {0: 0.20560692904269209, 1: 0.29843519263417956, 2: 0.49595787832312815}
Iteration: 66
Expectation done
Maximization done
Mean 1:  [3.0397332977619875, 3.0486018192351771]
Mean 2:  [7.021571017846977, 4.0154658390116094]
Mean 3:  [5.0117438217027228, 7.0014858758654137]
Covariance 1:  [[1.0285338831440254, 0.026899789879073516], [0.026899789879073516, 3.3849216464716885]]
Covariance 2:  [[0.99039700373284123, 0.50095328670942374], [0.50095328670942374, 0.99564801445084217]]
Covariance 3:  