# 1.数据生成

In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing 

In [2]:
def getData():
    #原论文的数据
    x = [
        [1.40,4,0.58,2,1.67,6.90],
        [1.38,5,0.61,3,1.42,11.90],
        [1.35,8,0.59,3,1.47,16.32],
        [3.95,14,0.22,5,49.79,63.63],
        [3.95,6,0.54,5,14.58,11.11],
        [2.00,7,0.48,5,5.28,14.58],
        [1.45,14,0.58,3,2.32,24.13],
        [1.40,3,0.51,3,1.90,5.88]
    ]
    return np.array(x)

In [3]:
data = getData()

# 2.数据标准化

In [4]:
def stdData(data):
    #平移-极差变换
    minmax=preprocessing.MinMaxScaler(feature_range=(0.0,1.0))
    return  minmax.fit_transform(data)

In [5]:
stddata = stdData(data)

In [6]:
stddata

array([[0.01923077, 0.09090909, 0.92307692, 0.        , 0.00516849,
        0.01766234],
       [0.01153846, 0.18181818, 1.        , 0.33333333, 0.        ,
        0.10424242],
       [0.        , 0.45454545, 0.94871795, 0.33333333, 0.0010337 ,
        0.18077922],
       [1.        , 1.        , 0.        , 1.        , 1.        ,
        1.        ],
       [1.        , 0.27272727, 0.82051282, 1.        , 0.27206946,
        0.09056277],
       [0.25      , 0.36363636, 0.66666667, 1.        , 0.07980153,
        0.15064935],
       [0.03846154, 1.        , 0.92307692, 0.33333333, 0.01860657,
        0.31601732],
       [0.01923077, 0.        , 0.74358974, 0.33333333, 0.00992351,
        0.        ]])

## 原论文标准化后的数据
![image.png](attachment:image.png)

# 3.聚类分析

## 3.1建立模糊相似矩阵

用王培庄贴进度作为距离建立相似矩阵

In [7]:
def distance(x,y):
    #计算两个样本的汪培庄贴近度
    min = 1
    max = 0
    for i in range(x.size):
        if x[i] < y[i]:
            if y[i] < min:
                min = y[i]
            if x[i] > max:
                max = x[i]
        else:
            if x[i] < min:
                min = x[i]
            if y[i] > max:
                max = y[i]
    return 1/2*(max + 1 - min)
def Odistance(x,y):
    #计算两个样本的欧式距离
    return 1 - np.sqrt(np.sum((x - y)**2))/x.size

In [8]:
def getSimilarMatrix(data):
    #得到相似矩阵(汪培庄)
    row,col = data.shape
    result = np.eye(row)
    for i in range(row):
        for j in range(row):
            if i!=j :
                result[i][j] = distance(data[i],data[j])
    return result

In [9]:
def OgetSimilarMatrix(data):
    #得到相似矩阵（欧式）
    row,col = data.shape
    result = np.eye(row)
    for i in range(row):
        for j in range(row):
            if i!=j :
                result[i][j] = Odistance(data[i],data[j])
    return result

In [10]:
similarMatrix = getSimilarMatrix(stddata)
OsimilarMatrix = OgetSimilarMatrix(stddata)

In [11]:
print(similarMatrix)
print(OsimilarMatrix)

[[1.         0.95895422 0.95895422 0.08391608 0.86497502 0.79343257
  0.95223517 0.86683312]
 [0.95895422 1.         0.97384213 0.16666667 0.8581352  0.79343257
  0.95223517 0.86683312]
 [0.95895422 0.97384213 1.         0.25291375 0.8198668  0.79343257
  0.95223517 0.86683312]
 [0.08391608 0.16666667 0.25291375 1.         0.58974359 0.66666667
  0.53846154 0.29487179]
 [0.86497502 0.8581352  0.8198668  0.58974359 1.         0.92467532
  0.77422168 0.82651349]
 [0.79343257 0.79343257 0.79343257 0.66666667 0.92467532 1.
  0.79343257 0.79343257]
 [0.95223517 0.95223517 0.95223517 0.53846154 0.77422168 0.79343257
  1.         0.86249158]
 [0.86683312 0.86683312 0.86683312 0.29487179 0.82651349 0.79343257
  0.86249158 1.        ]]
[[1.         0.93924669 0.91323819 0.60576718 0.75951229 0.81616459
  0.83108816 0.93503724]
 [0.93924669 1.         0.95198319 0.63131978 0.79342813 0.86521631
  0.85845512 0.9447661 ]
 [0.91323819 0.95198319 1.         0.65394737 0.79079926 0.8706991
  0.905976

## 3.2改造成模糊等价矩阵

In [12]:
def reMultiply(x,y):
    #模糊矩阵相乘
    xrow,xcol = x.shape
    yrow,ycol = y.shape
    result = np.ones([xrow,ycol])
    for i in range(xrow):
        for j in range(ycol):
            max = 0
            for k in range(xcol):
                if x[i][k] < y[k][j]:
                    if x[i][k] > max:
                        max = x[i][k]
                else:
                    if y[k][j] > max:
                        max = y[k][j]
            result[i][j] = max
    return result

In [13]:
def getEqualMatrix(similarMatrix):
    #从模糊相似矩阵得到模糊等价矩阵
    row,col = similarMatrix.shape
    circular = 1
    copyMatrix = similarMatrix.copy()
    while circular < row:
        nextMatrix = reMultiply(copyMatrix,copyMatrix)
        if (nextMatrix==copyMatrix).all():
            break
        copyMatrix = nextMatrix
        circular = circular * 2
    return copyMatrix

In [14]:
def getCutSet(matrix,a):
    #求截集
    result = matrix.copy()
    result[result>=a] = 1
    result[result<a] = 0
    return result

In [15]:
equalMatrix = getEqualMatrix(similarMatrix)
OequalMatrix = getEqualMatrix(OsimilarMatrix)

In [16]:
print(equalMatrix)
print(OequalMatrix)

[[1.         0.95895422 0.95895422 0.66666667 0.86497502 0.86497502
  0.95223517 0.86683312]
 [0.95895422 1.         0.97384213 0.66666667 0.86497502 0.86497502
  0.95223517 0.86683312]
 [0.95895422 0.97384213 1.         0.66666667 0.86497502 0.86497502
  0.95223517 0.86683312]
 [0.66666667 0.66666667 0.66666667 1.         0.66666667 0.66666667
  0.66666667 0.66666667]
 [0.86497502 0.86497502 0.86497502 0.66666667 1.         0.92467532
  0.86497502 0.86497502]
 [0.86497502 0.86497502 0.86497502 0.66666667 0.92467532 1.
  0.86497502 0.86497502]
 [0.95223517 0.95223517 0.95223517 0.66666667 0.86497502 0.86497502
  1.         0.86683312]
 [0.86683312 0.86683312 0.86683312 0.66666667 0.86497502 0.86497502
  0.86683312 1.        ]]
[[1.         0.93924669 0.93924669 0.73337887 0.86718743 0.8706991
  0.90597643 0.93924669]
 [0.93924669 1.         0.95198319 0.73337887 0.86718743 0.8706991
  0.90597643 0.9447661 ]
 [0.93924669 0.95198319 1.         0.73337887 0.86718743 0.8706991
  0.90597643

In [17]:
result = getCutSet(equalMatrix,0.93)
Oresult = getCutSet(OequalMatrix,0.93)

In [18]:
print(result)
print(Oresult)

[[1. 1. 1. 0. 0. 0. 1. 0.]
 [1. 1. 1. 0. 0. 0. 1. 0.]
 [1. 1. 1. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [1. 1. 1. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1.]]
[[1. 1. 1. 0. 0. 0. 0. 1.]
 [1. 1. 1. 0. 0. 0. 0. 1.]
 [1. 1. 1. 0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 1. 1. 0. 0. 0. 0. 1.]]


小记：
1.原论文中说的是计算8个样本之间的贴近度，进行已知瓦斯突出样本聚类分析得出聚类结果，如果作者所说的贴进度是之前计算得出的格贴近度的话，得不出作者所得出的聚类结果
2.我后来将格贴近度作为距离度量改成欧氏距离进行度量，也得不出作者所得的聚类结果
3.可能作者用了其他计算相似度的方法，我没有一一尝试，先假设作者得出的聚类结果为模糊模式库来进行下面的模糊模式识别

# 4模糊模式识别

## 4.1均值化模糊模式库

In [19]:
def getMean(matrix,x):
    row,col = matrix.shape
    result = np.zeros([1,col])
    for i in x:
        result = result + matrix[i]
    result = result/len(x)
    return np.array(result)

In [20]:
A1 = getMean(stddata,[0])
A2 = getMean(stddata,[1,2,6,7])
A3 = getMean(stddata,[3,4,5])
print("A1:",A1)
print("A2:",A2)
print("A3:",A3)

A1: [[0.01923077 0.09090909 0.92307692 0.         0.00516849 0.01766234]]
A2: [[0.01730769 0.40909091 0.90384615 0.33333333 0.00739094 0.15025974]]
A3: [[0.75       0.54545455 0.4957265  1.         0.45062366 0.41373737]]


![image.png](attachment:image.png)

小记：我计算得出的平均值跟作者得出的平均值有很大的出入，这个应该是作者错了，错的相当明显了，第一类只有一个元素，均值应该与原标准化后的值相等，作者的数据明显不相等

## 4.2计算x9,x10与A1，A2，A3的格贴近度

In [21]:
x9 = np.array([0,0.39735,0.96296,0,0,0.19378])
x10 = np.array([0.76596,1,0.00004,1,1,1])

In [22]:
d91 = distance(A1[0],x9)
d92 = distance(A2[0],x9)
d93 = distance(A3[0],x9)
d101 = distance(A1[0],x10)
d102 = distance(A2[0],x10)
d103 = distance(A3[0],x10)
print(d91,"  ",d92,"  ",d93)
print(d101,"  ",d102,"  ",d103)

0.9615384615384615    0.9482276045228288    0.540994560994561
0.16247454545454543    0.3215654545454546    0.7521367521367521


![image.png](attachment:image.png)

小结：
1.计算结果与作者不同，同属类别是一样的，应该是巧合
![image.png](attachment:image.png)
作者给的表2写的是待评估瓦斯突出样本数据，但表中数据却是x7,x8，说明应该是瞎给的

# 5.总结

## 5.1分析结果

除了标准化数据与原作者相同之外，其余计算结果均与作者不同，如果不出意外的话，应该是篇水文，而且水的离谱

不管论文水平如何，我的聚类分析结果与原本数据的类别差别较大，有2种可能

1.本身数据量非常少，导致某个特殊样本造成的影响太大，导致聚类效果很差（第四个样本）

2.标准化数据的方法有可能会影响两个样本的相似度（从观察第一个样本和第8个样本得出）

## 5.2修改标准化方法看一下聚类结果

In [23]:
def zScoreData(data):
    #平移-极差变换
    return  preprocessing.scale(data)

In [24]:
zscdata = zScoreData(data)

In [25]:
zscdata

array([[-0.65718207, -0.91386756,  0.55994594, -1.46261427, -0.51878019,
        -0.70529219],
       [-0.67569425, -0.66176617,  0.81350637, -0.56254395, -0.53472549,
        -0.42104345],
       [-0.7034625 ,  0.09453802,  0.64446608, -0.56254395, -0.53153643,
        -0.16976756],
       [ 1.70311974,  1.6071464 , -2.48277918,  1.23759669,  2.5503703 ,
         2.51979404],
       [ 1.70311974, -0.40966477,  0.22186537,  1.23759669,  0.30463487,
        -0.46595475],
       [-0.10181694, -0.15756337, -0.28525548,  1.23759669, -0.28853012,
        -0.26868612],
       [-0.61090165,  1.6071464 ,  0.55994594, -0.56254395, -0.47732242,
         0.27422897],
       [-0.65718207, -1.16596896, -0.03169505, -0.56254395, -0.50411052,
        -0.76327893]])

In [26]:
def OOdistance(x,y):
    #计算两个样本的欧式距离
    return 1 - np.sqrt(np.sum((x - y)**2))/(2*x.size)

In [27]:
def OgetSimilarMatrix(data):
    #得到相似矩阵（欧式）
    row,col = data.shape
    result = np.eye(row)
    for i in range(row):
        for j in range(row):
            if i!=j :
                result[i][j] = OOdistance(data[i],data[j])
    return result

In [28]:
similarMatrix = OgetSimilarMatrix(zscdata)

In [29]:
similarMatrix

array([[1.        , 0.9158635 , 0.87857206, 0.420868  , 0.68857432,
        0.74820683, 0.76240557, 0.90768042],
       [0.9158635 , 1.        , 0.93206971, 0.45304463, 0.73622138,
        0.81152726, 0.80098936, 0.91311762],
       [0.87857206, 0.93206971, 1.        , 0.48743779, 0.73317888,
        0.82128433, 0.86813919, 0.8708677 ],
       [0.420868  , 0.45304463, 0.48743779, 1.        , 0.58050416,
        0.56669804, 0.52813268, 0.45597989],
       [0.68857432, 0.73622138, 0.73317888, 0.58050416, 1.        ,
        0.83397451, 0.68890475, 0.73398138],
       [0.74820683, 0.81152726, 0.82128433, 0.56669804, 0.83397451,
        1.        , 0.76938128, 0.81513503],
       [0.76240557, 0.80098936, 0.86813919, 0.52813268, 0.68890475,
        0.76938128, 1.        , 0.74834589],
       [0.90768042, 0.91311762, 0.8708677 , 0.45597989, 0.73398138,
        0.81513503, 0.74834589, 1.        ]])

In [30]:
equalMatrix = getEqualMatrix(similarMatrix)

In [31]:
equalMatrix

array([[1.        , 0.9158635 , 0.9158635 , 0.58050416, 0.82128433,
        0.82128433, 0.86813919, 0.91311762],
       [0.9158635 , 1.        , 0.93206971, 0.58050416, 0.82128433,
        0.82128433, 0.86813919, 0.91311762],
       [0.9158635 , 0.93206971, 1.        , 0.58050416, 0.82128433,
        0.82128433, 0.86813919, 0.91311762],
       [0.58050416, 0.58050416, 0.58050416, 1.        , 0.58050416,
        0.58050416, 0.58050416, 0.58050416],
       [0.82128433, 0.82128433, 0.82128433, 0.58050416, 1.        ,
        0.83397451, 0.82128433, 0.82128433],
       [0.82128433, 0.82128433, 0.82128433, 0.58050416, 0.83397451,
        1.        , 0.82128433, 0.82128433],
       [0.86813919, 0.86813919, 0.86813919, 0.58050416, 0.82128433,
        0.82128433, 1.        , 0.86813919],
       [0.91311762, 0.91311762, 0.91311762, 0.58050416, 0.82128433,
        0.82128433, 0.86813919, 1.        ]])

In [32]:
result = getCutSet(equalMatrix,0.9)

In [33]:
result

array([[1., 1., 1., 0., 0., 0., 0., 1.],
       [1., 1., 1., 0., 0., 0., 0., 1.],
       [1., 1., 1., 0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [1., 1., 1., 0., 0., 0., 0., 1.]])

没什么变化，看来主要还是样本太少