In [1]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from scipy.spatial.distance import cdist
from sklearn import datasets, metrics
import pandas as pd
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import accuracy_score

In [2]:
# 定义模糊C均值（FCM）类
class FCM(BaseEstimator, ClassifierMixin):
    def __init__(self, k, alpha=2):
        """
        FCM 类的初始化函数

        Parameters:
        - k: 簇的数量
        - alpha: 模糊因子，控制隶属度的模糊程度，默认为2
        """
        # method=2 => use L2 distance
        self.k = k
        self.alpha = alpha
        self.x = None
        self.y = None
        self.labels = None
        self.centers = None
        self.u = None  # 隶属度矩阵
        self.iterations = 500

    def quick_L2(self, x, a):
        """
        计算两组数据点之间的欧氏距离的平方

        Parameters:
        - x: 第一组数据点
        - a: 第二组数据点

        Returns:
        - dis: 欧氏距离的平方
        """
        dis = -2 * np.dot(x, a.T)
        dis += np.einsum('ij,ij->i', x, x)[:, np.newaxis]
        dis += np.einsum('ij,ij->i', a, a)[np.newaxis, :]
        return dis

    def fit(self, x, y=None, init_method='u', seed=None, eps=1e-5):
        """
        拟合（训练）模糊C均值（FCM）模型

        Parameters:
        - x: 输入数据
        - y: 标签数据（未使用）
        - init_method: 初始化方法，'u'表示使用随机生成的隶属度矩阵，其他值（暂未实现）
        - seed: 随机数种子
        - eps: 收敛阈值

        Returns:
        - None
        """
        self.x = x
        self.y = y

        if seed is not None:
            np.random.seed(seed)

        if init_method == 'u':
            # 随机生成隶属度矩阵，每个数据点对每个簇的隶属度，随机生成并归一化
            self.u = np.random.rand(self.x.shape[0], self.k)
            self.u /= np.sum(self.u, axis=1)[:, np.newaxis]
        else:
            pass  # 其他初始化方法待实现

        pre_J = 0  # 上一次迭代的目标函数值
        for i in range(self.iterations):
            # 计算隶属度矩阵的模糊化
            u_a = self.u ** self.alpha  # u_{ij}^{\alpha}

            # 更新簇中心
            self.centers = np.dot(self.u.T, self.x) / np.sum(self.u, axis=0)[:, np.newaxis]

            # 计算欧氏距离
            dis = self.quick_L2(self.x, self.centers)

            # 计算目标函数值
            J = np.sum(u_a * dis)

            # 判断是否收敛
            if abs(J - pre_J) < eps:
                return

            # 更新隶属度矩阵
            e = 1 / (self.alpha - 1 + eps * 100)
            self.u = 1 / ((dis ** e) * np.sum(dis ** (-e), axis=1)[:, np.newaxis])

            # 更新上一次的目标函数值
            pre_J = J

    def predict(self):
        """
        预测数据点所属的簇

        Returns:
        - predictions: 数据点所属的簇的标签
        """
        return np.argmax(self.u, axis=1)

In [3]:
# 主程序
if __name__ == '__main__':
    # 加载鸢尾花数据集
    iris = datasets.load_iris()
    data = iris['data']
    labels = iris['target']

In [4]:
    print('FCM:')
    fcm = FCM(k=3)
    fcm.fit(data)
    res = fcm.predict()
    print('iris-predict:')
    print(res)

FCM:
iris-predict:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 2 2 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 0 1 2 2 2 2 2 2 1 2 2 2 2
 2 2 1 2 2 2 2 2 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2
 2 2]


In [5]:
    ari = adjusted_rand_score(labels, res)
    print('Adjusted Rand Index (ARI) in iris: ', ari)

Adjusted Rand Index (ARI) in iris:  0.7683058726537342


In [6]:
    print('iris-SC指标: ' + str(metrics.silhouette_score(data, res, metric='euclidean')))

iris-SC指标: 0.5209590632921908


In [7]:
    sonar = pd.read_csv('sonar.all-data.csv', header=None, sep=',')
    sonar1 = sonar.iloc[0:208, 0:60]
    data2 = np.array(sonar1)
    labels2 = np.zeros(208)
    labels2[sonar.iloc[:, 60] == 'R'] = 1

In [8]:
    fcm2 = FCM(k=2)
    fcm2.fit(data2)
    res2 = fcm2.predict()
    print('sonar-predict:')
    print(res2)

sonar-predict:
[1 0 0 1 0 0 0 0 0 0 1 0 0 1 1 1 1 0 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1
 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0
 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 1 0
 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 0 0]


In [9]:
    ari2 = adjusted_rand_score(labels2, res2)
    print('Adjusted Rand Index (ARI) in sonar: ', ari2)

Adjusted Rand Index (ARI) in sonar:  0.008545699314580773


In [10]:
    print('sonar-SC指标: ' + str(metrics.silhouette_score(data2, res2, metric='euclidean')))

sonar-SC指标: 0.19554404245764484
