In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn import datasets
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA#主成分分析

from scipy.stats import multivariate_normal
from sklearn.mixture import GaussianMixture,BayesianGaussianMixture

from collections import Counter
import math

我们谈到了用 k-means 进行聚类的方法，这次我们来说一下另一个很流行的算法：Gaussian Mixture Model (GMM)。事实上，GMM 和 k-means 很像，不过 GMM 是学习出一些概率密度函数来（所以 GMM 除了用在 clustering 上之外，还经常被用于 density estimation ），简单地说，k-means 的结果是每个数据点被 assign 到其中某一个 cluster 了，而 GMM 则给出这些数据点被 assign 到每个 cluster 的概率，又称作 soft assignment 。

得出一个概率有很多好处，因为它的信息量比简单的一个结果要多，比如，我可以把这个概率转换为一个 score ，表示算法对自己得出的这个结果的把握。也许我可以对同一个任务，用多个方法得到结果，最后选取“把握”最大的那个结果；另一个很常见的方法是在诸如疾病诊断之类的场所，机器对于那些很容易分辨的情况（患病或者不患病的概率很高）可以自动区分，而对于那种很难分辨的情况，比如，49% 的概率患病，51% 的概率正常，如果仅仅简单地使用 50% 的阈值将患者诊断为“正常”的话，风险是非常大的，因此，在机器对自己的结果把握很小的情况下，会“拒绝发表评论”，而把这个任务留给有经验的医生去解决。

废话说了一堆，不过，在回到 GMM 之前，我们再稍微扯几句。我们知道，不管是机器还是人，学习的过程都可以看作是一种“归纳”的过程，在归纳的时候你需要有一些假设的前提条件，例如，当你被告知水里游的那个家伙是鱼之后，你使用“在同样的地方生活的是同一种东西”这类似的假设，归纳出“在水里游的都是鱼”这样一个结论。当然这个过程是完全“本能”的，如果不仔细去想，你也不会了解自己是怎样“认识鱼”的。另一个值得注意的地方是这样的假设并不总是完全正确的，甚至可以说总是会有这样那样的缺陷的，因此你有可能会把虾、龟、甚至是潜水员当做鱼。也许你觉得可以通过修改前提假设来解决这个问题，例如，基于“生活在同样的地方并且穿着同样衣服的是同一种东西”这个假设，你得出结论：在水里有并且身上长有鳞片的是鱼。可是这样还是有问题，因为有些没有长鳞片的鱼现在又被你排除在外了。

在这个问题上，机器学习面临着和人一样的问题，在机器学习中，一个学习算法也会有一个前提假设，这里被称作“归纳偏执 (bias)”（bias 这个英文词在机器学习和统计里还有其他许多的意思）。例如线性回归，目的是要找一个函数尽可能好地拟合给定的数据点，它的归纳偏执就是“满足要求的函数必须是线性函数”。一个没有归纳偏执的学习算法从某种意义上来说毫无用处，就像一个完全没有归纳能力的人一样，在第一次看到鱼的时候有人告诉他那是鱼，下次看到另一条鱼了，他并不知道那也是鱼，因为两条鱼总有一些地方不一样的，或者就算是同一条鱼，在河里不同的地方看到，或者只是看到的时间不一样，也会被他认为是不同的，因为他无法归纳，无法提取主要矛盾、忽略次要因素，只好要求所有的条件都完全一样──然而哲学家已经告诉过我们了：世界上不会有任何样东西是完全一样的，所以这个人即使是有无比强悍的记忆力，也绝学不到任何一点知识。

这个问题在机器学习中称作“过拟合 (Overfitting)”，例如前面的回归的问题，如果去掉“线性函数”这个归纳偏执，因为对于 N 个点，我们总是可以构造一个 N-1 次多项式函数，让它完美地穿过所有的这 N 个点，或者如果我用任何大于 N-1 次的多项式函数的话，我甚至可以构造出无穷多个满足条件的函数出来。如果假定特定领域里的问题所给定的数据个数总是有个上限的话，我可以取一个足够大的 N ，从而得到一个（或者无穷多个）“超级函数”，能够 fit 这个领域内所有的问题。然而这个（或者这无穷多个）“超级函数”有用吗？只要我们注意到学习的目的（通常）不是解释现有的事物，而是从中归纳出知识，并能应用到新的事物上，结果就显而易见了。

没有归纳偏执或者归纳偏执太宽泛会导致 Overfitting ，然而另一个极端──限制过大的归纳偏执也是有问题的：如果数据本身并不是线性的，强行用线性函数去做回归通常并不能得到好结果。难点正在于在这之间寻找一个平衡点。不过人在这里相对于（现在的）机器来说有一个很大的优势：人通常不会孤立地用某一个独立的系统和模型去处理问题，一个人每天都会从各个来源获取大量的信息，并且通过各种手段进行整合处理，归纳所得的所有知识最终得以统一地存储起来，并能有机地组合起来去解决特定的问题。这里的“有机”这个词很有意思，搞理论的人总能提出各种各样的模型，并且这些模型都有严格的理论基础保证能达到期望的目的，然而绝大多数模型都会有那么一些“参数”（例如 K-means 中的 k ），通常没有理论来说明参数取哪个值更好，而模型实际的效果却通常和参数是否取到最优值有很大的关系，我觉得，在这里“有机”不妨看作是所有模型的参数已经自动地取到了最优值。另外，虽然进展不大，但是人们也一直都期望在计算机领域也建立起一个统一的知识系统（例如语意网就是这样一个尝试）。

In [218]:
print(np.linalg.det(np.eye(3)))


1.0


In [49]:
class GMM:
    def __init__(self):
        self.model = None
        self.k = 2
        
    def GMM_component(self, X, theta,param, k):#计算正态分布概率
        #这里要判断sigma是否为奇异阵，如果是奇异阵那么需要添加一个正则项
#         if(np.linalg.det(theta['sigma'][k])==0.0):
#             theta['sigma'][k] = self.regularization
            
#         sign = X-theta['mu'][k]
#         exponent = math.exp((-0.5*np.dot(sign.T,np.dot(np.linalg.inv(theta['sigma'][k]),sign))))
#         pp = exponent/(((2*math.pi)**(param["dim"]/2))*(np.linalg.det(theta['sigma'][k])**0.5))
#         return theta['pi'][k]*pp

        for i in range(param['dim']):
            for j in range(param['dim']):
                if np.isnan(theta['sigma'][k,i,j]):
                    theta['sigma'][k,i,j] = 0
        return theta['pi'][k]*multivariate_normal(theta['mu'][k], theta['sigma'][k,:,:],allow_singular=1,seed=1).pdf(X)
        
    def E_step(self, theta, param,X):#E步：更新隐变量概率分布q(Z)。这里的X是一个样本
        q = np.zeros((param['k'],len(X)))
        for i in range(param['k']):
            for j in range(len(X)):
                q[i,j] = self.GMM_component(X[j], theta ,param, i)
            
        q /= q.sum(axis=0)
#         print(q)
        return q

    def M_step(self,X,q,theta,param):#M步：使用q(Z)更新GMM参数。
        pi_temp = q.sum(axis=1); 
        pi_temp /= len(X) # 计算pi
        mu_temp = q.dot(X); mu_temp /= q.sum(axis=1)[:, None] # 计算mu
        sigma_temp = np.zeros((param['k'], param['dim'], param['dim']))
        for i in range(param['k']):
            ys = X - mu_temp[i, :]
            sigma_temp[i] = np.sum(q[i, :, None, None]*np.matmul(ys[..., None], ys[:, None, :]), axis=0)
        sigma_temp /= np.sum(q, axis=1)[:, None, None] # 计算sigma
        theta['pi'] = pi_temp; theta['mu'] = mu_temp; theta['sigma'] = sigma_temp
        return theta

    def likelihood(self,X,theta,param):#计算GMM的对数似然。
        ll = 0
        for i in range(param['k']):
            ll += self.GMM_component(X[0],theta,param,i)
        ll = np.log(ll).sum()
        return ll

    def EM_GMM(self,X,theta,param,eps=1e-5,max_iter=1000):#eps: 计算精度; max_iter: 最大迭代次数
        #print(X)#是一个由array构成的list
        for i in range(max_iter):
            ll_old = 0
            q = self.E_step(theta, param,X)# E-step
            theta = self.M_step(X, q, theta, param)# M-step
            ll_new = self.likelihood(X, theta,param)
            if np.abs(ll_new - ll_old) < eps:
                break;
            else:
                ll_old = ll_new
        ll_new = 1
        return theta

    # 分类别求出数学期望和标准差
    def train(self, X, y):
        theta = {}; param = {}
        param['k'] = self.k; param['N'] = X.shape[0]; param['dim'] = X.shape[1]#这里的N是整个训练样本的样本数，在实际计算时取相同标签的样本数
        theta['pi'] = np.ones(param['k'])/param['k']                 # 均匀初始化
        theta['mu'] = np.random.random((param['k'],param['dim']))    # 随机初始化
        theta['sigma'] = np.array([np.eye(param['dim'])]*param['k']) # 初始化为单位正定矩阵
        self.regularization = np.dot(np.eye(param['dim']),0.001)
#         print( self.regularization)
        #print(theta)
        
        labels = list(set(y))#标签的列表
        data = {label:[] for label in labels}#{0.0: [], 1.0: []}
        for f, label in zip(X, y):
            data[label].append(f)#print(data)#形成一个字典，根据标签将训练样本进行分类
        #print(data)
        
        self.model = {label: self.EM_GMM(value,theta,param,eps=1e-5,max_iter=50) for label, value in data.items()}
        print(self.model)
        return self.model

    # 计算概率
    def calculate_probabilities(self, input_data):
        probabilities = {}
        pp = 0.0
        dim = np.size(input_data)
        for label, value in self.model.items():#value是一个字典，表示特定标签的模型参数
#             print(value['pi'][1])
            for i in range(self.k):
                mu,sigma = value['mu'][i],value['sigma'][i]
#                 sign = input_data-mu
#                 exponent = math.exp((-0.5*np.dot(sign,np.dot(np.linalg.inv(sigma),sign.T))))
#                 pp = exponent/(((2*math.pi)**(dim/2))*(np.linalg.det(sigma)**0.5))
#                 for i in range(4):
#                     for j in range(4):
#                         if np.isnan(sigma[i,j]):
#                             sigma[i,j] = 0
                            
                pp += multivariate_normal(mu,sigma,allow_singular=1,seed=1).pdf(input_data)*value['pi'][i]
#                 pp = self.GMM_component(self,input_data,self.model[i]['theta'],self.model[i]['param'],i)
#                 probabilities[label] += value['pi'][i]*pp
            probabilities[label] = pp
            pp = 0.0

        return probabilities
    
    # 类别
    def predict(self, X_test):
        label = list(range(X_test.shape[0]))
        for i in range(X_test.shape[0]):#每个样本迭代一次
            label[i] = sorted(self.calculate_probabilities(X_test[i,:]).items(), key=lambda x: x[-1])[-1][0]
        
        return label
    
    def score(self, X_test, y_test):
        right = 0
        for X, y in zip(X_test, y_test):
            label = self.predict(X)
            if label == y:
                right += 1

        return right / float(len(X_test))
    
iris = datasets.load_iris()
X=iris.data
y=iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


model = GMM()
model.train(X_train, y_train)
y_pred = model.predict(X_test)

print("IRIS:Number of mislabeled points out of a total %d points : %d, Acc: %f%%"
      % (X_test.shape[0], (y_test != y_pred).sum(),100*(y_test == y_pred).sum()/X_test.shape[0]))


(75, 4) (75, 4) (75,) (75,)
{0: {'pi': array([0.82536018, 0.17463982]), 'mu': array([[6.52287126, 3.07037811, 5.54832577, 2.08188495],
       [5.9437815 , 2.71718001, 5.19692111, 1.71258978]]), 'sigma': array([[[ 0.36095862,  0.05615942,  0.2432668 ,  0.03546897],
        [ 0.05615942,  0.06906184,  0.05887008,  0.05427358],
        [ 0.2432668 ,  0.05887008,  0.21734134,  0.04740138],
        [ 0.03546897,  0.05427358,  0.04740138,  0.08054766]],

       [[ 0.05357667, -0.00211986,  0.02835285, -0.05003927],
        [-0.00211986,  0.00838199, -0.01653364,  0.00841736],
        [ 0.02835285, -0.01653364,  0.05747275, -0.04601809],
        [-0.05003927,  0.00841736, -0.04601809,  0.05772274]]])}, 1: {'pi': array([0.82536018, 0.17463982]), 'mu': array([[6.52287126, 3.07037811, 5.54832577, 2.08188495],
       [5.9437815 , 2.71718001, 5.19692111, 1.71258978]]), 'sigma': array([[[ 0.36095862,  0.05615942,  0.2432668 ,  0.03546897],
        [ 0.05615942,  0.06906184,  0.05887008,  0.05427358

In [258]:
class GaussianMixture:
    def __init__(self, n_components: int = 1, covariance_type: str = 'full',
                 tol: float = 0.001, reg_covar: float = 1e-06, max_iter: int = 100):
        self.n_components = n_components#混合模型的个数
        self.means_ = None
        self.covariances_ = None
        self.weights_ = None
        self.reg_covar = reg_covar  # 该参数是为了防止出现奇异协方差矩阵
        self.max_iter = max_iter

    def EM_GMM(self, X_train):
        # 获取一些必要的数据信息
        n_samples, n_feature = X_train.shape
        self.reg_covar = self.reg_covar * np.identity(n_feature)

        # 初始化一些必要的参数：均值，协方差，权重
        self.means_ = np.random.randint(X_train.min()/2, X_train.max()/2, size=(self.n_components, n_feature))
        self.covariances_ = np.zeros((self.n_components, n_feature, n_feature))
        for k in range(self.n_components):
            np.fill_diagonal(self.covariances_[k], 1)
        self.weights_ = np.ones(self.n_components) / self.n_components

        P_mat = np.zeros((n_samples, self.n_components))  # 概率矩阵
        for i in range(self.max_iter):# 分别对K各类概率
            for k in range(self.n_components):
                self.covariances_ += self.reg_covar  # 防止出现奇异协方差矩阵
                g = multivariate_normal(mean=self.means_[k], cov=self.covariances_[k])
                #### E-step，计算概率 ####
                P_mat[:, k] = self.weights_[k] * g.pdf(X_train)  # 计算X在各分布下出现的频率
            totol_N = P_mat.sum(axis=1)  # 计算各样本出现的总频率
            totol_N[totol_N == 0] = self.n_components# 如果某一样本在各类中的出现频率和为0，则使用K来代替，相当于分配等概率
            P_mat /= totol_N.reshape(-1, 1)
           
            #### M-step，更新参数 ####
            for k in range(self.n_components):
                N_k = np.sum(P_mat[:, k], axis=0)  # 类出现的频率
                self.means_[k] = (1/N_k) * np.sum(X_train *P_mat[:, k].reshape(-1, 1), axis=0)  # 该类的新均值
                self.covariances_[k]=(1/N_k)*np.dot((P_mat[:, k].reshape(-1, 1)* (X_train - self.means_[k])).T,
                                                          (X_train - self.means_[k])) + self.reg_covar
                self.weights_[k] = N_k / n_samples
        
        theta = {}
        theta['pi'] = self.weights_
        theta['mu'] = self.means_
        theta['sigma'] = self.covariances_
        return theta
    def train(self, X, y):
        self.model = []
        labels = list(set(y))#标签的列表
        self.lable_num = len(labels)
        for n,label in enumerate(labels):
            input_x = X[y==label]
            print(input_x.shape)
            self.model.append(self.EM_GMM(input_x))
#         print(self.model)
        print(self.model[0]['mu'][1])
        
        return self.model

    # 计算概率
    def calculate_probabilities(self, input_data):
        p = []
        for i in range(self.lable_num):
            for j in range(self.n_components):
                g = multivariate_normal(mean=self.model[i]['mu'][j], cov=self.model[i]['sigma'][j])
            p.append( self.model[i]['pi'] * g.pdf(input_data))
    
        return p#输出一个数组，表示每一类的概率
    
    # 类别
    def predict(self, X_test):
        label = list(range(X_test.shape[0]))
        for i in range(X_test.shape[0]):#每个样本迭代一次
            label[i] = np.argmax(self.calculate_probabilities(X_test[i]))
        
        return label
    
    
iris = datasets.load_iris()
X=iris.data
y=iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


model = GaussianMixture(n_components=2)
model.train(X_train, y_train)
y_pred = model.predict(X_test)

print("IRIS:Number of mislabeled points out of a total %d points : %d, Acc: %f%%"
      % (X_test.shape[0], (y_test != y_pred).sum(),100*(y_test == y_pred).sum()/X_test.shape[0]))

(75, 4) (75, 4) (75,) (75,)
(26, 4)
(26, 4)
(23, 4)
[4 3 1 0]
IRIS:Number of mislabeled points out of a total 75 points : 75, Acc: 0.000000%


### 利用sk-learn mixture gaussion进行拟合

In [77]:
class GMM:
    def __init__(self):
        self.model = {}
        self.k = 3
        
    def EM_GMM(self,X,theta,param,eps=1e-5,max_iter=1000):#eps: 计算精度; max_iter: 最大迭代次数
#         print(len(X))#是一个由array构成的list
#         print(X)
        dpgmm = BayesianGaussianMixture(n_components=param['k']).fit(X)
        theta['pi'] = dpgmm.weights_
        theta['mu'] = dpgmm.means_
        theta['sigma'] = dpgmm.covariances_
#         print(theta)
        return theta 

    # 分类别求出数学期望和标准差
    def train(self, X, y):
        theta = {}; param = {}
        param['k'] = self.k; param['N'] = X.shape[0]; param['dim'] = X.shape[1]#这里的N是整个训练样本的样本数，在实际计算时取相同标签的样本数
        theta['pi'] = np.ones(param['k'])/param['k']                 # 均匀初始化
        theta['mu'] = np.random.random((param['k'],param['dim']))    # 随机初始化
        theta['sigma'] = np.array([np.eye(param['dim'])]*param['k']) # 初始化为单位正定矩阵
        self.regularization = np.dot(np.eye(param['dim']),0.001)
  
        labels = list(set(y))#标签的列表
        data = {label:[] for label in labels}#{0.0: [], 1.0: []}
        for f, label in zip(X, y):
            data[label].append(f)#print(data)#形成一个字典，根据标签将训练样本进行分类
#         print(data)
        print(len(data[0]),len(data[1]),len(data[2]))
        
        self.model = {label:{} for label in range(3)}
        
        dpgmm0 = GaussianMixture(3).fit(data[0])
        self.model[0]['pi'] = dpgmm0.weights_
        self.model[0]['mu'] = dpgmm0.means_
        self.model[0]['sigma'] = dpgmm0.covariances_
        print(self.model[0])
        
        dpgmm1 = GaussianMixture(3).fit(data[1])
        self.model[1]['pi'] = dpgmm1.weights_
        self.model[1]['mu'] = dpgmm1.means_
        self.model[1]['sigma'] = dpgmm1.covariances_
        print(self.model[1])
        
        dpgmm2 = GaussianMixture(3).fit(data[2])
        self.model[2]['pi'] = dpgmm2.weights_
        self.model[2]['mu'] = dpgmm2.means_
        self.model[2]['sigma'] = dpgmm2.covariances_
        print(self.model[2])
        
#         self.model = {label: self.EM_GMM(value,theta,param,eps=1e-5,max_iter=50) for label, value in data.items()}
        print(self.model)
        return self.model

    # 计算概率
    def calculate_probabilities(self, input_data):
        probabilities = {}
        dim = np.size(input_data)
        
        for label, value in self.model.items():#value是一个字典，表示特定标签的模型参数
            pp = 0.0
            for i in range(self.k):
                mu,sigma = value['mu'][i],value['sigma'][i]
                pp += multivariate_normal(mu,sigma).pdf(input_data)*value['pi'][i]
#                 pp = self.GMM_component(self,input_data,self.model[i]['theta'],self.model[i]['param'],i)
#                 probabilities[label] += value['pi'][i]*pp
            probabilities[label] = pp
            
        print(probabilities)
        return probabilities
    
    # 类别
    def predict(self, X_test):
        label = list(range(X_test.shape[0]))
        for i in range(X_test.shape[0]):#每个样本迭代一次
            label[i] = sorted(self.calculate_probabilities(X_test[i,:]).items(), key=lambda x: x[-1])[-1][0]
        
        return label
    
iris = datasets.load_iris()
X=iris.data
y=iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


model = GMM()
model.train(X_train, y_train)
y_pred = model.predict(X_test)
print(y_pred)
print("IRIS:Number of mislabeled points out of a total %d points : %d, Acc: %f%%"
      % (X_test.shape[0], (y_test != y_pred).sum(),100*(y_test == y_pred).sum()/X_test.shape[0]))

(75, 4) (75, 4) (75,) (75,)
26 26 23
{'pi': array([0.42600661, 0.45860897, 0.11538442]), 'mu': array([[4.66661999, 2.9854718 , 1.41880543, 0.19070452],
       [5.01615054, 3.4663702 , 1.51671653, 0.28411375],
       [5.43333339, 4.0666673 , 1.5       , 0.23333339]]), 'sigma': array([[[ 5.62311637e-02,  1.78961292e-02,  2.63918295e-02,
         -3.63702195e-04],
        [ 1.78961292e-02,  5.39449955e-02,  1.14305277e-02,
         -7.54347169e-03],
        [ 2.63918295e-02,  1.14305277e-02,  1.97317448e-02,
          1.08273582e-03],
        [-3.63702195e-04, -7.54347169e-03,  1.08273582e-03,
          2.64982039e-03]],

       [[ 3.63186169e-02, -5.17262393e-03,  1.80012039e-02,
          3.64864557e-03],
        [-5.17262393e-03,  1.39834255e-02, -1.19618698e-02,
         -1.31609642e-03],
        [ 1.80012039e-02, -1.19618698e-02,  4.48780567e-02,
          9.46690448e-03],
        [ 3.64864557e-03, -1.31609642e-03,  9.46690448e-03,
          2.14153860e-02]],

       [[ 4.22232930e-0

{0: 4.139606786424737e-36, 1: 0.0032120552204899146, 2: 2.5604824975578484e-15}
{0: 13.500892358738126, 1: 4.666684442111552e-47, 2: 1.113012727038799e-66}
{0: 0.04690025034417189, 1: 8.104494344602104e-53, 2: 6.679427801943986e-86}
{0: 2.3267458981056727e-196, 1: 6.514866822543338e-62, 2: 1.4753009851381648e-13}
{0: 7.554623474113582e-92, 1: 2.3256189990200273e-14, 2: 0.0029599930702042085}
{0: 2.161771274510929e-123, 1: 2.8217134593065133e-37, 2: 0.17173998217352712}
{0: 1.803199000016402e-95, 1: 8.34231469673553e-17, 2: 0.15460035050724158}
{0: 9.895600206903195e-60, 1: 0.00019981372778766132, 2: 0.0055819407080437205}
{0: 1.6765791584287134e-57, 1: 0.029077109250636706, 2: 1.2587710136801488e-05}
{0: 5.769834565106905e-146, 1: 2.17719094539772e-38, 2: 0.13490783171333606}
{0: 1.0163190806492803e-37, 1: 0.01793154282285602, 2: 1.40787670529274e-08}
{0: 8.200829037075408, 1: 4.613418089165687e-53, 2: 1.8445816473282936e-57}
{0: 8.369682863251996e-73, 1: 3.355478867063952e-09, 2: 2.21

In [74]:
iris = datasets.load_iris()
X=iris.data
y=iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1)

labels = list(set(y_train))#标签的列表
data = {label:[] for label in labels}#{0.0: [], 1.0: []}
for f, label in zip(X_train, y_train):
    data[label].append(f)#print(data)#形成一个字典，根据标签将训练样本进行分类

print(len(data[0]),len(data[1]),len(data[2]))

# dpgmm0 = GaussianMixture(5).fit(data[0])
# print(dpgmm0.weights_)

# dpgmm1 = GaussianMixture(5).fit(data[1])
# print(dpgmm1.weights_)
# print(dpgmm0.weights_)
model = {label:{} for label in range(3)}
print(model)

dpgmm0 = GaussianMixture(3).fit(data[0])
model[0]['pi'] = dpgmm0.weights_
model[0]['mu'] = dpgmm0.means_
model[0]['sigma'] = dpgmm0.covariances_
print(model[0])

dpgmm1 = GaussianMixture(3).fit(data[1])
model[1]['pi'] = dpgmm1.weights_
model[1]['mu'] = dpgmm1.means_
model[1]['sigma'] = dpgmm1.covariances_
print(model[1])

dpgmm2 = GaussianMixture(3).fit(data[2])
model[2]['pi'] = dpgmm2.weights_
model[2]['mu'] = dpgmm2.means_
model[2]['sigma'] = dpgmm2.covariances_
print(model[2])

print(model)

26 26 23
{0: {}, 1: {}, 2: {}}
{'pi': array([0.42499098, 0.07692308, 0.49808594]), 'mu': array([[4.66602119, 2.98496632, 1.41853502, 0.19074485],
       [5.45      , 4.25      , 1.5       , 0.25      ],
       [5.04558895, 3.4838614 , 1.51545678, 0.27739375]]), 'sigma': array([[[ 5.59614221e-02,  1.77394753e-02,  2.62944902e-02,
         -3.55518751e-04],
        [ 1.77394753e-02,  5.39295297e-02,  1.13798584e-02,
         -7.55719928e-03],
        [ 2.62944902e-02,  1.13798584e-02,  1.97193654e-02,
          1.08753440e-03],
        [-3.55518751e-04, -7.55719928e-03,  1.08753440e-03,
          2.65085034e-03]],

       [[ 6.25010000e-02,  3.75000000e-02,  1.10440527e-29,
          3.75000000e-02],
        [ 3.75000000e-02,  2.25010000e-02,  7.24765957e-30,
          2.25000000e-02],
        [ 1.10440527e-29,  7.24765957e-30,  1.00000000e-06,
          4.68386162e-31],
        [ 3.75000000e-02,  2.25000000e-02,  4.68386162e-31,
          2.25010000e-02]],

       [[ 4.41873858e-02,  1.