# 实验3 模糊C均值聚类算法

In [1]:
import numpy as np
global FLAG
FLAG = False

## 样本集

In [2]:
x = [
    [-5.01, -8.12, -3.68],
    [-5.43, -3.48, -3.54],
    [1.08, -5.52, 1.66],
    [0.68, -3.78, -4.11],
    [-2.67, 0.63, 7.39],
    [4.94, 3.29, 2.08],
    [-2.51, 2.09, -2.59],
    [-2.25, -2.13, -6.94],
    [5.56, 2.86, -2.26],
    [1.03, -3.33, 4.33],
    
    [-0.91, -0.18, -0.05],
    [1.30, -2.06, -3.53],
    [-7.75, 4.54, -0.95],
    [-5.74, 0.50, 3.92],
    [6.14, 5.72, -4.85],
    [3.60, 1.26, 4.36],
    [5.37, -4.63, -3.65],
    [7.18, 1.46, -6.66],
    [-7.39, 1.17, 6.30],
    [-7.50, -6.32, -0.31],
    
    [5.35, 2.26, 8.13],
    [5.12, 3.22, -2.66],
    [-1.34, -5.31, -9.87],
    [4.48, 3.42, 5.19],
    [7.11, 2.39, 9.21],
    [7.17, 4.33, -0.98],
    [5.75, 3.97, 6.65],
    [0.77, 0.27, 2.41],
    [0.90, -0.43, -8.71],
    [3.52, -0.36, 6.43]
]

# 样本个数
n = 15   

# 样本维度
N = 3

## 1 设定聚类数目和参数

In [3]:
# 聚类数目
c = 2

# 控制聚类结果模糊程度的常数
b = 2

## 2 初始化各个聚类中心

In [4]:
# 聚类中心
# m = [x[0], x[1]]
m = [[-1, -1, -1], [1, 1, 1]]

## 3 重复下面的运算，知道各个样本的隶属度值稳定：

### i)用当前的聚类中心计算隶属度函数：

In [5]:
def get_uj_xi(x_i, c, b, m,j):
    """
    计算隶属度函数
    x_i 样本集中的第i个样本
    c 聚类数目
    b 控制聚类结果模糊程度的常数
    m 聚类中心集
    j 第j类 
    return 第i个样本在第j类下的隶属度
    """
    MAX = 100
    
    # 转化为向量形式
    X = np.mat(x_i).T   # 第i个样本向量
    M_j = np.mat(m[j]).T    # 第j类聚类中心向量
    
    if(np.linalg.norm(X - M_j) == 0): # 防止除0
        tp = MAX
    else:
        tp = (1 / (np.linalg.norm(X - M_j) ** 2)) ** (1 / (b - 1)) 
    
    value = 0    
    for i in range(c):
        Temp = np.mat(m[i]).T
        
        if(np.linalg.norm(X - Temp) == 0): # 防止除0
            value += MAX
        else:
            value += (1 / (np.linalg.norm(X - Temp) ** 2)) ** (1 / (b - 1))
           
    return tp / value

### ii)用当前的隶属度函数更新计算各类聚类中心

In [6]:
def update(x, N, cfc_i, cfc_u_i, b):
    """
    更新计算各类聚类中心
    x 总样本集
    N 样本维数
    cfc_i 第i类包含的样本集标记
    cfc_u_i 第i类包含的隶属度集
    b 控制聚类结果模糊程度的常数
    return 当前类的聚类中心
    """
    temp = []
    for i in range(N):
        temp.append(0)
        
    val = 0    
    for i in range(len(cfc_u_i)):
        val += cfc_u_i[i]
        
    k = 0
    for i in cfc_i:
        for j in range(N):
            temp[j] += x[i][j] * (cfc_u_i[k] ** b)
        k += 1
        
    for i in range(N):
        temp[i] /= val
        
        
    return temp

In [7]:
cfc = []    # 分类容器
for i in range(c):
    cfc.append([])
    
cfc_u = []    # 分类后的样本隶属度容器
for i in range(c):
    cfc_u.append([])
    
for i in range(n):
    
    if FLAG:
        print("第", i, "个样本")
    
    cls = 0    # 记录类别
    u_max = 0    # 记录最大隶属度函数
    for j in range(c):
        u = get_uj_xi(x[i], c, b, m, j)
        if(u > u_max):
            u_max = u;
            cls = j;
            
        if FLAG:
            print("    ", "对类:", j, " 的隶属度u:", u)
     
    if FLAG:
        print("        ", " 最大隶属度u_max:", u_max, "最大隶属类别", cls)
    
    cfc[cls].append(i)  
    cfc_u[cls].append(u_max)
    
print("第", 1, "次分类结果:", cfc)

cfc_avg_old = [] # 记录上一次的聚类中心
cfc_avg_new = [] # 记录这一次的聚类中心
for i in range(c):
    cfc_avg_old.append([])
    cfc_avg_new.append([])
    cfc_avg_new[i] = update(x, N, cfc[i], cfc_u[i], b)

print("第", 1, "次的分类后各类的聚类中心:")
print(cfc_avg_new)


print("-----------------------------------------------------------------------------------------------")

step = 2
while(cfc_avg_old != cfc_avg_new):
    for i in range(c):
        cfc_avg_old[i] = cfc_avg_new[i]
    cfc = []    # 分类容器
    for i in range(c):
        cfc.append([])
        
    cfc_u = []    # 分类后的样本隶属度容器
    for i in range(c):
        cfc_u.append([])
    
    for i in range(n):
        
        if FLAG:
            print("第", i, "个样本")
            
        cls = 0    # 记录类别
        u_max = 0 # 记录最大隶属度函数
        for j in range(c):
            u = get_uj_xi(x[i], c, b, cfc_avg_old, j)
            if(u > u_max):
                u_max = u;
                cls = j;
                
            if FLAG:
                print("    ", "对类:", j, " 的隶属度u:", u)
            
        if FLAG:
            print("        ", " 最大隶属度u_max:", u_max, "最大隶属类别", cls)
            
        cfc[cls].append(i)
        cfc_u[cls].append(u_max)
    print("第", step, "次分类结果:", cfc)
    for i in range(c):
        cfc_avg_new[i] = update(x, N, cfc[i], cfc_u[i], b)
    print("第", step, "次的分类后各类的聚类中心:")
    print(cfc_avg_new)
    print(cfc_avg_old)
    step += 1
    

    print("-----------------------------------------------------------------------------------------------")

第 1 次分类结果: [[0, 1, 2, 3, 6, 7, 10, 11, 12, 13], [4, 5, 8, 9, 14]]
第 1 次的分类后各类的聚类中心:
[[-1.5595800999900544, -1.333743788594477, -1.5615852847674245], [2.060603149205724, 1.293393430427564, 0.8165128168588237]]
-----------------------------------------------------------------------------------------------
第 2 次分类结果: [[0, 1, 2, 3, 6, 7, 10, 11, 12, 13], [4, 5, 8, 9, 14]]
第 2 次的分类后各类的聚类中心:
[[-1.7973293262880774, -1.425162104088001, -1.7219869505438903], [2.5531537136369717, 1.6040233955736165, 0.6030876406399948]]
[[-1.5595800999900544, -1.333743788594477, -1.5615852847674245], [2.060603149205724, 1.293393430427564, 0.8165128168588237]]
-----------------------------------------------------------------------------------------------
第 3 次分类结果: [[0, 1, 2, 3, 6, 7, 10, 11, 12, 13], [4, 5, 8, 9, 14]]
第 3 次的分类后各类的聚类中心:
[[-1.8890865681634843, -1.4623827440801327, -1.748654686226532], [2.8180331573778843, 1.7995086863781808, 0.4185511261215135]]
[[-1.7973293262880774, -1.425162104088001, -1.721986