In [1]:
import numpy as np
from scipy.stats import multivariate_normal
from sklearn.mixture import GaussianMixture
from mpl_toolkits.mplot3d import Axes3D
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import pairwise_distances_argmin
%matplotlib auto

Using matplotlib backend: Qt5Agg


In [2]:
style='sklearn'
mu1_fact=(0,0,0)
cov_fact=np.identity(3)
data1=np.random.multivariate_normal(mu1_fact,cov_fact,400)
mu2_fact=(2,2,1)
data2=np.random.multivariate_normal(mu2_fact,cov_fact,100)
cov_fact=np.identity(3)
data=np.vstack((data1,data2))
y=np.array([True]*400+[False]*100)

In [3]:
######### gmm
g=GaussianMixture(n_components=2,covariance_type='full',tol=1e-6,max_iter=1000)
g.fit(data)
print('category prob:\t',g.weights_[0])
print('mean:\n',g.means_,'\n')
print('conv:\n',g.covariances_,'\n')
mu1,mu2=g.means_
sigma1,sigma2=g.covariances_

category prob:	 0.848187075121
mean:
 [[ 0.10345824  0.05852395  0.05666517]
 [ 2.01338169  2.25911224  1.16649277]] 

conv:
 [[[ 1.16166605  0.09468661  0.07794703]
  [ 0.09468661  0.93188223  0.01778669]
  [ 0.07794703  0.01778669  1.06947309]]

 [[ 1.15659348 -0.13917886 -0.2184768 ]
  [-0.13917886  0.49455066  0.0122214 ]
  [-0.2184768   0.0122214   1.03303814]]] 



In [9]:
num_iter=100
n,d=data.shape
mu1=data.min(axis=0)
mu2=data.max(axis=0)
sigma1=np.identity(d)
sigma2=np.identity(d)
pi=0.5

In [10]:
sigma1

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [12]:
for i in range(num_iter):
    # Estep  Q
    norm1=multivariate_normal(mu1,sigma1)
    norm2=multivariate_normal(mu2,sigma2)
    tau1=pi*norm1.pdf(data)
    tau2=(1-pi) * norm2.pdf(data)
    gamma=tau1/(tau1+tau2)
    
    # M Step : u ,sigma 套公式
    mu1=np.dot(gamma,data)/np.sum(gamma)
    mu2=np.dot((1-gamma),data) / np.sum((1-gamma))
    
    sigma1=np.dot(gamma * (data-mu1).T,data-mu1) / np.sum(gamma)
    sigma2=np.dot((1-gamma) * (data-mu2).T,data-mu2) /np.sum(1-gamma)
    pi=np.sum(gamma)/n
    print(i,'\t',mu1,mu2)
print('Category prob:\t',pi)
print('Mean:\t',mu1,mu2)
print('Covariance"\n',sigma1,'\n',sigma2)

0 	 [ 0.08565286  0.03733502  0.04663804] [ 1.95412966  2.19425993  1.13044304]
1 	 [ 0.08795735  0.0398257   0.04788954] [ 1.96137105  2.20348965  1.13508078]
2 	 [ 0.09003701  0.04213782  0.04903073] [ 1.96802531  2.21164067  1.1392834 ]
3 	 [ 0.09190026  0.04426134  0.05006257] [ 1.97407842  2.2187852   1.14305885]
4 	 [ 0.09355848  0.04619253  0.05098834] [ 1.97953492  2.2250077   1.14642413]
5 	 [ 0.09502526  0.04793319  0.05181312] [ 1.98441418  2.2303986   1.14940307]
6 	 [ 0.09631563  0.04948961  0.0525433 ] [ 1.9887465   2.23504906  1.15202411]
7 	 [ 0.09744532  0.05087149  0.05318613] [ 1.99256953  2.23904717  1.15431824]
8 	 [ 0.09843018  0.05209079  0.05374929] [ 1.99592514  2.24247535  1.15631727]
9 	 [ 0.09928563  0.05316087  0.05424054] [ 1.9988569   2.24540887  1.15805256]
10 	 [ 0.10002635  0.05409563  0.0546675 ] [ 2.00140819  2.24791526  1.15955407]
11 	 [ 0.100666    0.05490892  0.05503739] [ 2.00362087  2.25005425  1.1608498 ]
12 	 [ 0.10121711  0.05561413  0.05535

In [13]:
norm1=multivariate_normal(mu1,sigma1)
norm2=multivariate_normal(mu2,sigma2)
tau1=pi*norm1.pdf(data)
tau2=(1-pi) * norm2.pdf(data)

In [19]:
fig=plt.figure(figsize=(12,8),facecolor='w')
ax=fig.add_subplot(121,projection='3d')
ax.scatter(data[:,0],data[:,1],data[:,2],c='b',s=30,marker='o',depthshade=True)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title('orginal data:',fontsize=18)

ax=fig.add_subplot(122,projection='3d')
order=pairwise_distances_argmin([mu1_fact,mu2_fact],[mu1,mu2],metric='euclidean')
if order[0]==0:
    c1=tau1>tau2
else:
    c1=tau1<tau2
c2=~c1
acc=np.mean(y==c1)
print('accurancy:%.2f%%' % (100*acc))
ax.scatter(data[c1,0],data[c1,1],data[c1,2],c='r',s=30,marker='o',depthshade=True)
ax.scatter(data[c2,0],data[c2,1],data[c2,2],c='g',s=30,marker='^',depthshade=True)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title('EM data:',fontsize=18)
plt.tight_layout()
plt.show()

accurancy:91.40%


In [18]:
a=[-1,1,2]
for i in a:
    print(-i)

1
-1
-2


In [22]:
pairwise_distances_argmin([mu1_fact,mu2_fact],[mu1,mu2],metric='euclidean')

array([0, 1], dtype=int64)

In [23]:
mu1_fact

(0, 0, 0)

In [24]:
mu2_fact

(2, 2, 1)

In [27]:
[mu1,mu2]

[array([ 0.10448128,  0.05987378,  0.05726695]),
 array([ 2.01699845,  2.26231798,  1.16855337])]

In [25]:
[mu1_fact,mu2_fact]

[(0, 0, 0), (2, 2, 1)]

In [26]:
[mu1,mu2]

[array([ 0.10448128,  0.05987378,  0.05726695]),
 array([ 2.01699845,  2.26231798,  1.16855337])]

In [29]:
tau1

array([  4.54313457e-03,   6.75497807e-03,   2.19274063e-02,
         2.52709330e-02,   3.02997404e-02,   1.48413205e-02,
         4.14106642e-02,   3.20498398e-02,   1.34785990e-02,
         2.70282835e-02,   5.05350557e-03,   6.73322234e-03,
         5.74858703e-03,   3.47749279e-03,   1.07680951e-03,
         2.62646231e-02,   4.26718901e-02,   3.08880340e-03,
         9.87332298e-04,   4.28946161e-02,   1.84043145e-03,
         2.61708165e-03,   2.96768671e-02,   4.31927625e-02,
         1.36922724e-02,   2.59909107e-02,   7.83629151e-03,
         2.90814803e-03,   1.90801417e-04,   2.30611472e-02,
         1.59994491e-02,   2.59991191e-02,   3.25234664e-02,
         2.01058819e-02,   3.26374043e-02,   4.78174376e-05,
         7.60335031e-03,   1.11810431e-02,   4.19871228e-04,
         2.13000471e-02,   2.84074070e-02,   2.13566817e-02,
         7.34188919e-04,   2.90077333e-02,   1.75277947e-02,
         1.68551697e-02,   4.85187979e-03,   1.92256040e-02,
         2.88140127e-02,