In [1]:
import numpy as np
from sklearn import mixture

np.random.seed(1)

g = mixture.GMM(covariance_type="full", n_components=2)

dimensions = 2

obs = np.concatenate((np.random.randn(100, dimensions), 10 + np.random.randn(300, dimensions)))

g.fit(obs) 

print "mixture component weights\n%s" % g.weights_

print "mixture component mean vectors\n%s" % g.means_

print "mixture component covariance matrices\n%s" % g.covars_

mixture component weights
[ 0.75  0.25]
mixture component mean vectors
[[  9.95979962  10.04835949]
 [  0.22665544  -0.01327782]]
mixture component covariance matrices
[[[ 1.04862299 -0.10230272]
  [-0.10230272  1.05411687]]

 [[ 0.73906418 -0.11445233]
  [-0.11445233  0.89051922]]]


In [6]:
from scipy import linalg
from scipy import constants

numberOfMixtureComponents = g.n_components

np.random.seed(1)

sample = (3 + np.random.randn(1, dimensions))[0]

print "sample\n%s" % sample

print "scipy GMM pdf\n%s" % g.predict_proba([ sample ])

print "scipy GMM log pr\n%s" % g.score([ sample ])

# store the pdf of each mixture component
gaussianPDF = []

# store the sum of the pdfs
gaussianPDFSum = 0

for mixtureComponentIndex in range(numberOfMixtureComponents):
    print "\nmixture component %i" % mixtureComponentIndex
    
    # 1. get the given parameters for the current mixture component
    mixtureWeight = g.weights_[mixtureComponentIndex]

    print "  mixture weight %f" % mixtureWeight
    
    meanVector = g.means_[mixtureComponentIndex]
    
    print "  mean vector\n%s" % meanVector
    
    covarianceMatrix = g.covars_[mixtureComponentIndex]
    
    precisionMatrix = linalg.inv(covarianceMatrix)
    
    print "  precision matrix\n%s" % precisionMatrix
    
    determinant = linalg.det(covarianceMatrix)
    
    print "  determinant %f" % determinant
    
    numberOfDimensions = len(meanVector)
    
    # 2. center the data sample:
    centeredSample = []
    
    for i in range(numberOfDimensions):
        sampleValue = sample[i]
        
        mean = meanVector[i]
        
        centeredSampleValue = sampleValue - mean
        
        centeredSample.append(centeredSampleValue)
    
    # 3. form the scale coefficient
    scale = np.power((2 * constants.pi), numberOfDimensions)
    
    scale = scale * determinant
    
    scale = 1.0 / np.sqrt(scale)
        
    # 4. perform the vector-matrix-vector Mahalanobis distance calculation
    distanceTemp = []
    
    for i in range(numberOfDimensions):
        distanceValue = 0
        
        for j in range(numberOfDimensions):
            centeredSampleValue = centeredSample[j]
            
            precisionValue = precisionMatrix[j][i]
            
            tempValue = centeredSampleValue * precisionValue
            
            distanceValue = distanceValue + tempValue
                        
        distanceTemp.append(distanceValue)
        
    distance = 0
    
    for i in range(numberOfDimensions):
        centeredSampleValue = centeredSample[i]
        
        distanceValue = distanceTemp[i]
        
        tempValue = centeredSampleValue * distanceValue
        
        distance = distance + tempValue
        
    distance = - 0.5 * distance

    # 5. obtain the pdf
    pdf = scale * np.exp(distance)
    
    # 6. obtain the posterior pdf
    posteriorPDF = mixtureWeight * pdf
    
    # 7. increment the pdf sum
    gaussianPDFSum = gaussianPDFSum + posteriorPDF
    
    # 8. keep track of each pdf
    gaussianPDF.append(posteriorPDF)

print
    
# normalize each pdf by the sum for presentation to compare against scipy results
print "calculated Gaussian pdf\n%s" % (gaussianPDF / gaussianPDFSum)

# don't normalize each pdf by the sum
print "calculated pr\n%s" % gaussianPDFSum

print "calculated log pr\n%s" % np.log(gaussianPDFSum)

sample
[ 4.62434536  2.38824359]
scipy GMM pdf
[[  3.94305515e-12   1.00000000e+00]]
scipy GMM log pr
[-21.53236115]

mixture component 0
  mixture weight 0.750000
  mean vector
[  9.95979962  10.04835949]
  precision matrix
[[ 0.96274704  0.09343522]
 [ 0.09343522  0.95772936]]
  determinant 1.094905

mixture component 1
  mixture weight 0.250000
  mean vector
[ 0.22665544 -0.01327782]
  precision matrix
[[ 1.38053969  0.1774313 ]
 [ 0.1774313   1.14574442]]
  determinant 0.645052

calculated Gaussian pdf
[  3.94305515e-12   1.00000000e+00]
calculated pr
4.45260707191e-10
calculated log pr
-21.5323611465
