In [14]:
from scipy.stats import multivariate_normal
from random import randint
import numpy as np

In [15]:
x, y = [], []
file = open("3gaussian.txt", 'r')
k = 3
for line in file:
    x.append(float(line.split()[0]))
    y.append(float(line.split()[1]))

In [16]:
u = [[2,2], [6,3], [4,6]]
cov_mat = [[[1,0],[0,2]], [[1,0.9], [0.9,1]], [[1, 0.9], [0.9, 1]]]
weights = {0: 0.323, 1: 0.328, 2: 0.349}
pi_0 = {}
pi_1 = {}
pi_2 = {}

In [17]:
def probablity(data_point, u, cov_mat):
    var = multivariate_normal(mean=u, cov=cov_mat)
    prob = var.pdf([data_point[0], data_point[1]])
    return prob

In [18]:
def expectation():
    for i in range(0, len(x)):
        data_point = (x[i], y[i])
        probs = []
        for j in range(0, k):
            prob = probablity(data_point, u[j], cov_mat[j])
            if j == 0:
                pi_0[data_point] = prob * weights[0]
            elif j == 1:
                pi_1[data_point] = prob * weights[1]
            else:
                pi_2[data_point] = prob * weights[2]
            probs.append((prob, j))
        denom = 0
        for prob in probs:
            label = prob[1]
            denom += prob[0] * weights[label]

        pi_0[data_point] /= denom
        pi_1[data_point] /= denom
        pi_2[data_point] /= denom

In [19]:
def maximization():
    weights[0] = sum(list(pi_0.values())) / len(x)
    weights[1] = sum(list(pi_1.values())) / len(x)
    weights[2] = sum(list(pi_2.values())) / len(x)
    sum_u0_x = 0
    sum_u0_y = 0
    sum_u1_x = 0
    sum_u1_y = 0
    sum_u2_x = 0
    sum_u2_y = 0
    for i in range(0, len(x)):
        data_point = (x[i], y[i])
        p0 = pi_0[data_point]
        p1 = pi_1[data_point]
        p2 = pi_2[data_point]
        sum_u0_x += (p0 * x[i])
        sum_u0_y += (p0 * y[i])
        sum_u1_x += (p1 * x[i])
        sum_u1_y += (p1 * y[i])
        sum_u2_x += (p2 * x[i])
        sum_u2_y += (p2 * y[i])
    sum_u0_x /= sum(list(pi_0.values()))
    sum_u0_y /= sum(list(pi_0.values()))
    sum_u1_x /= sum(list(pi_1.values()))
    sum_u1_y /= sum(list(pi_1.values()))
    sum_u2_x /= sum(list(pi_2.values()))
    sum_u2_y /= sum(list(pi_2.values()))
    u[0][0] = sum_u0_x
    u[0][1] = sum_u0_y
    u[1][0] = sum_u1_x
    u[1][1] = sum_u1_y
    u[2][0] = sum_u2_x
    u[2][1] = sum_u2_y
    update_cov_mat()

In [20]:
def update_cov_mat():
    final_cov_mat0 = [[0,0], [0,0]]
    final_cov_mat1 = [[0,0], [0,0]]
    final_cov_mat2 = [[0,0], [0,0]]
    for i in range(len(x)):
        data_point = (x[i], y[i])
        prob0 = pi_0[data_point]
        prob1 = pi_1[data_point]
        prob2 = pi_2[data_point]
        
        factor01 = [data_point[0] - u[0][0], data_point[1] - u[0][1]]
        factor01 = np.array(factor01).reshape(1,-1)
        factor02 = np.dot(factor01.reshape(-1,1), factor01)
        factor03 = prob0 * factor02
        
        for m in range(2):
            for n in range(2):
                final_cov_mat0[m][n] += factor03[m][n]
        
        factor11 = [data_point[0] - u[1][0], data_point[1] - u[1][1]]
        factor11 = np.array(factor11).reshape(1,-1)
        factor12 = np.dot(factor11.reshape(-1,1), factor11)
        factor13 = prob1 * factor12
        
        for m in range(2):
            for n in range(2):
                final_cov_mat1[m][n] += factor13[m][n]
                
        factor21 = [data_point[0] - u[2][0], data_point[1] - u[2][1]]
        factor21 = np.array(factor21).reshape(1,-1)
        factor22 = np.dot(factor21.reshape(-1,1), factor21)
        factor23 = prob2 * factor22
        
        for m in range(2):
            for n in range(2):
                final_cov_mat2[m][n] += factor23[m][n]
                
    for m in range(2):
        for n in range(2):
            final_cov_mat0[m][n] = final_cov_mat0[m][n] / sum(list(pi_0.values()))
    
    for m in range(2):
        for n in range(2):
            final_cov_mat1[m][n] = final_cov_mat1[m][n] / sum(list(pi_1.values()))
    
    for m in range(2):
        for n in range(2):
            final_cov_mat2[m][n] = final_cov_mat2[m][n] / sum(list(pi_2.values()))
    
    cov_mat[0] = final_cov_mat0
    cov_mat[1] = final_cov_mat1
    cov_mat[2] = final_cov_mat2

In [23]:
def repeat_untill_convergence():
    i = 0
    while(i <= 76):
        print("Iteration: " + str(i))
        expectation()
        print("Expectation done")
        maximization()
        print("Maximization done")
        print("Mean 1: ", end = " ")
        print(u[0])
        print("Mean 2: ", end = " ")
        print(u[1])
        print("Mean 3: ", end = " ")
        print(u[2])
        print("Covariance 1: ", end = " ")
        print(cov_mat[0])
        print("Covariance 2: ", end = " ")
        print(cov_mat[1])
        print("Covariance 3: ", end = " ")
        print(cov_mat[2])
        print("Weights: ", end = " ")
        print(weights)
        print("=============================================")
        i += 1

In [24]:
repeat_untill_convergence()

Iteration: 0
Expectation done
Maximization done
Mean 1:  [3.0467196929962985, 3.068441005334694]
Mean 2:  [7.023023844002468, 4.016252672978351]
Mean 3:  [5.0151562494154565, 7.004494408688037]
Covariance 1:  [[1.0339262584171014, 0.04006820509665956], [0.04006820509665956, 3.425157522432528]]
Covariance 2:  [[0.9879446413012476, 0.5000187755321673], [0.5000187755321673, 0.9955615889418641]]
Covariance 3:  [[0.9755631955228826, 0.18260368252398734], [0.18260368252398734, 0.9704946928026956]]
Weights:  {0: 0.20722337243634692, 1: 0.2982472464912047, 2: 0.4945293810724484}
Iteration: 1
Expectation done
Maximization done
Mean 1:  [3.0458770001690314, 3.0660454880789567]
Mean 2:  [7.022852467107506, 4.0161596144740646]
Mean 3:  [5.014746612080138, 7.004135855040982]
Covariance 1:  [[1.0332759894946775, 0.0384639509978191], [0.0384639509978191, 3.42027360292296]]
Covariance 2:  [[0.9882329520159965, 0.5001277145093495], [0.5001277145093495, 0.9955693808056224]]
Covariance 3:  [[0.9760562862

Expectation done
Maximization done
Mean 1:  [3.040846397096481, 3.0517598771258974]
Mean 2:  [7.021807335955111, 4.015593565316082]
Mean 3:  [5.012290063202684, 7.00197076359658]
Covariance 1:  [[1.029393111799702, 0.028977831735405645], [0.028977831735405645, 3.391294161179165]]
Covariance 2:  [[0.9899968157219011, 0.5007995838878605], [0.5007995838878605, 0.9956309451894311]]
Covariance 3:  [[0.9790296128807743, 0.1847307497627404], [0.1847307497627404, 0.9738762637091221]]
Weights:  {0: 0.2058635368740314, 1: 0.298404736277937, 2: 0.49573172684803146}
Iteration: 15
Expectation done
Maximization done
Mean 1:  [3.040705637318115, 3.051360468485291]
Mean 2:  [7.021777552551741, 4.015577463636053]
Mean 3:  [5.012221042405019, 7.001909564401664]
Covariance 1:  [[1.029284455348193, 0.028714630624613886], [0.028714630624613886, 3.39048753185353]]
Covariance 2:  [[0.990047224052095, 0.5008189190267284], [0.5008189190267284, 0.9956330352463523]]
Covariance 3:  [[0.9791135657466288, 0.1847830

Expectation done
Maximization done
Mean 1:  [3.0398764724781473, 3.049007983378894]
Mean 2:  [7.021601516909666, 4.015482319186634]
Mean 3:  [5.011814139809734, 7.001548366130156]
Covariance 1:  [[1.0286444022926438, 0.027166659413956396], [0.027166659413956396, 3.3857405391842517]]
Covariance 2:  [[0.990345327358526, 0.5009334130094815], [0.5009334130094815, 0.9956457495032108]]
Covariance 3:  [[0.9796089653865939, 0.1850924214125712], [0.1850924214125712, 0.974442241255445]]
Weights:  {0: 0.20563867830875715, 1: 0.29843141210531915, 2: 0.49592990958592137}
Iteration: 29
Expectation done
Maximization done
Mean 1:  [3.039853539650864, 3.0489429256510943]
Mean 2:  [7.021596633786582, 4.015479680513115]
Mean 3:  [5.011802877829533, 7.001538359234447]
Covariance 1:  [[1.0286267000008131, 0.027123905537190445], [0.027123905537190445, 3.3856093585786713]]
Covariance 2:  [[0.9903536005579665, 0.5009365941989302], [0.5009365941989302, 0.9956461109135605]]
Covariance 3:  [[0.979622688126719, 0

Expectation done
Maximization done
Mean 1:  [3.039718739660467, 3.0485605209390743]
Mean 2:  [7.02156791499572, 4.0154641624487555]
Mean 3:  [5.011736670765741, 7.001479519811183]
Covariance 1:  [[1.0285226455076788, 0.026872661390275776], [0.026872661390275776, 3.3848383938136535]]
Covariance 2:  [[0.9904022615502114, 0.500955309192144], [0.500955309192144, 0.995648245896556]]
Covariance 3:  [[0.9797033739932011, 0.1851515207650085], [0.1851515207650085, 0.9745344951942259]]
Weights:  {0: 0.20560210205547036, 1: 0.29843576770654606, 2: 0.4959621302379839}
Iteration: 43
Expectation done
Maximization done
Mean 1:  [3.0397150183905235, 3.0485499644947622]
Mean 2:  [7.02156712181031, 4.015463733868976]
Mean 3:  [5.011734842850629, 7.001477895048786]
Covariance 1:  [[1.0285197729994389, 0.026865727147000442], [0.026865727147000442, 3.3848171135626326]]
Covariance 2:  [[0.990403605626083, 0.5009558262198793], [0.5009558262198793, 0.9956483050914424]]
Covariance 3:  [[0.9797056019568522, 0.1

Expectation done
Maximization done
Mean 1:  [3.0396931522086175, 3.048487935007973]
Mean 2:  [7.021562460642263, 4.01546121532746]
Mean 3:  [5.011724101790607, 7.00146834745618]
Covariance 1:  [[1.0285028941479577, 0.026824983235033493], [0.026824983235033493, 3.3846920739472663]]
Covariance 2:  [[0.9904115042267654, 0.5009588646918764], [0.5009588646918764, 0.9956486531991933]]
Covariance 3:  [[0.9797186940805993, 0.1851611153563898], [0.1851611153563898, 0.9745494663022913]]
Weights:  {0: 0.20559616840758926, 1: 0.29843647473179663, 2: 0.49596735686061466}
Iteration: 57
Expectation done
Maximization done
Mean 1:  [3.0396925487573907, 3.048486223155601]
Mean 2:  [7.021562331995871, 4.015461145817075]
Mean 3:  [5.011723805358952, 7.001468083954982]
Covariance 1:  [[1.0285024283345858, 0.026823858847496825], [0.026823858847496825, 3.384688623246653]]
Covariance 2:  [[0.9904117222277723, 0.5009589485561113], [0.5009589485561113, 0.9956486628128225]]
Covariance 3:  [[0.979719055404835, 0.

Expectation done
Maximization done
Mean 1:  [3.0396890030770494, 3.0484761648805923]
Mean 2:  [7.021561576101367, 4.015460737391661]
Mean 3:  [5.01172206361838, 7.0014665356960055]
Covariance 1:  [[1.0284996913690418, 0.026817252362048825], [0.026817252362048825, 3.3846683481508477]]
Covariance 2:  [[0.9904130031511813, 0.5009594413256684], [0.5009594413256684, 0.995648719306738]]
Covariance 3:  [[0.9797211784428118, 0.18516267136362444], [0.18516267136362444, 0.9745518940903061]]
Weights:  {0: 0.20559520623095703, 1: 0.298436589391481, 2: 0.4959682043775624}
Iteration: 71
Expectation done
Maximization done
Mean 1:  [3.039688905230078, 3.048475887311453]
Mean 2:  [7.021561555241355, 4.0154607261205815]
Mean 3:  [5.011722015553027, 7.001466492969762]
Covariance 1:  [[1.0284996158394593, 0.02681707004985511], [0.02681707004985511, 3.3846677886391254]]
Covariance 2:  [[0.9904130385001919, 0.5009594549244508], [0.5009594549244508, 0.9956487208659293]]
Covariance 3:  [[0.9797212370307872, 0