In [1]:
import numpy as np
from sklearn.mixture import GaussianMixture
import matplotlib as mpl
import matplotlib.colors
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import pairwise_distances_argmin
%matplotlib auto

Using matplotlib backend: Qt5Agg


In [2]:
iris_feature = u'花萼长度', u'花萼宽度', u'花瓣长度', u'花瓣宽度'

In [3]:
def expand(a, b, rate=0.05):
    d = (b - a) * rate
    return a-d, b+d

In [6]:
def iris_type(s):
    it = {b'Iris-setosa': 0, b'Iris-versicolor': 1, b'Iris-virginica': 2}
    return it[s]

In [7]:
path = r'F:\study\ml\DoctorZou\08RegressionII\8.iris.data'
data=np.loadtxt(path,dtype=np.float,delimiter=',',converters={4:iris_type})

In [8]:
x_prime,y=np.split(data,(4,),axis=1)
y=y.ravel()

In [9]:
n_components=3
feature_pairs = [[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]

In [None]:
x_prime[]

In [47]:
# plt.figure(figsize=(8,6),facecolor='w')
for k ,pair in enumerate(feature_pairs):
    x=x_prime[:,pair]
    m=np.array([np.mean(x[y==i],axis=0) for i in range(3)])
    print('m Mean : \n',m)
    gmm=GaussianMixture(n_components=n_components,covariance_type='full',random_state=0)
    gmm.fit(x)
    print('predict mean : \n',gmm.means_)
    print('predict cov : \n',gmm.covariances_)
    y_hat=gmm.predict(x)
    order = pairwise_distances_argmin(m,gmm.means_,axis=1,metric='euclidean')
    print('order :',order)

    n_sample=y.size
    n_types=3
    change=np.empty((n_types,n_sample),dtype=np.bool)
    for i in range(n_types):
        change[i]=y_hat==order[i]
    for i in range(n_types):
        y_hat[change[i]]=i
    acc='acc rate : %.2f%%' % (100 * np.mean(y_hat == y))
    print(acc)

    cm_light=mpl.colors.ListedColormap(['#FF8080','#77E0A0','#A0A0FF'])
    cm_dark=mpl.colors.ListedColormap(['r','g','b'])
    x1_min,x1_max=x[:,0].min(),x[:,1].max()
    x2_min,x2_max=x[:,1].min(),x[:,1].max()
    x1_min,x1_max=expand(x1_min,x1_max)
    x_min,x2_max=expand(x2_min,x2_max)
    x1,x2=np.mgrid[x1_min:x1_max:500j,x2_min:x2_max:500j]
    grid_test=np.stack((x1.flat,x2.flat),axis=1)
    grid_hat=gmm.predict(grid_test)

    change=np.empty((n_types,grid_hat.size),dtype=np.bool)
    for i in range(n_types):
        change[i]=grid_hat==order[i]
    for i in range(n_types):
        grid_hat[change[i]]=i
    grid_hat = grid_hat.reshape(x1.shape)

    plt.subplot(3,2,k+1)
    plt.pcolormesh(x1,x2,grid_hat,cmap=cm_light)
    plt.scatter(x[:,0],x[:,1],s=30,c=y,marker='o',cmap=cm_dark,edgecolors='k')
    xx=0.95*x1_min+0.05*x1_max
    yy=0.1*x2_min+0.9*x2_max
    plt.text(xx,yy,acc,fontsize=10)
    plt.xlim((x1_min,x1_max))
    plt.ylim((x2_min,x2_max))
    plt.xlabel(iris_feature[pair[0]],fontsize=10)
    plt.xlabel(iris_feature[pair[1]],fontsize=10)
plt.tight_layout(2)
plt.suptitle('EM algorithm and Iris',fontsize=10)
plt.subplots_adjust(top=0.92)
plt.show()

m Mean : 
 [[ 5.006  3.418]
 [ 5.936  2.77 ]
 [ 6.588  2.974]]
predict mean : 
 [[ 5.01494511  3.44040237]
 [ 6.69225795  3.03018616]
 [ 5.90652226  2.74740414]]
predict cov : 
 [[[ 0.11948421  0.08969613]
  [ 0.08969613  0.12149899]]

 [[ 0.3588512   0.05091598]
  [ 0.05091598  0.08956947]]

 [[ 0.27590209  0.08910477]
  [ 0.08910477  0.09414053]]]
order : [0 2 1]
acc rate : 78.67%
m Mean : 
 [[ 5.006  1.464]
 [ 5.936  4.26 ]
 [ 6.588  5.552]]
predict mean : 
 [[ 5.0060006   1.46399865]
 [ 6.04240777  4.41742864]
 [ 6.58888904  5.63329718]]
predict cov : 
 [[[ 0.12176525  0.01581631]
  [ 0.01581631  0.0295045 ]]

 [[ 0.28119672  0.23746926]
  [ 0.23746926  0.31503012]]

 [[ 0.48521779  0.36602418]
  [ 0.36602418  0.32601109]]]
order : [0 1 2]
acc rate : 91.33%
m Mean : 
 [[ 5.006  0.244]
 [ 5.936  1.326]
 [ 6.588  2.026]]
predict mean : 
 [[ 5.00605757  0.23727675]
 [ 6.57289666  2.05192938]
 [ 5.977111    1.33910201]]
predict cov : 
 [[[ 0.12407758  0.01055895]
  [ 0.01055895  0.0090