In [28]:
import pandas as pd
import numpy as np 

In [29]:
A=pd.read_excel('../table/2020-7-19.xlsx',index_col='矿厂')
print(A.info())
print(A)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 白沙湘永煤矿厂 to 扎诺尔南山煤矿厂
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   粉尘浓度(mg/m^3)  5 non-null      float64
 1   游离二氧化硅含量(%)   5 non-null      float64
 2   煤肺患病率(%)      5 non-null      float64
dtypes: float64(3)
memory usage: 160.0+ bytes
None
          粉尘浓度(mg/m^3)  游离二氧化硅含量(%)  煤肺患病率(%)
矿厂                                           
白沙湘永煤矿厂           50.8          4.3       8.7
沈阳田师傅煤矿厂         200.0          4.9       7.2
抚顺龙凤煤矿厂           71.4          2.5       5.0
大同同家山煤矿厂          98.5          3.7       2.7
扎诺尔南山煤矿厂          10.2          2.4       0.3


In [30]:
# coding: gbk
# 使用CRITIC客观赋权法获得真实误差序列与组合预测值
# 所有的评价指标均为损益性指标，越小越好
# 参考链接：https://blog.csdn.net/stephen_curry300/article/details/106989729
# 参考论文：两种客观赋权法及其在确定组合预测权重中的应用

# 确定各单一预测模型权重步骤如下：
# 1）对原始决策矩阵X进行标准化处理，得到标准化矩阵R
# 2）由标准化矩阵求出各评价指标的概率
# 3）按客观赋权方法求各评定指标的权重（这里有CRITIC法和变异系数法两种）
# 4）最后，根据  各评价指标概率  和  各评价指标的权重  计算各预测方法的权重，权重之和为1.0

# X：array
def critic(X):
    print('原始矩阵：',X)
    n,m = X.shape  # n = 4, m = 5
    # 1、对原始决策矩阵X进行标准化处理，得到矩阵Z
    Z = standard(X)
    print('标准化的矩阵：',Z)

    # 2、求各指标的概率
    # print(prob(Z))
    probValue = prob(Z)
    print('各指标概率：',probValue)

    # 3、按客观评价法求出各评定指标的权重Wj，这里先介绍CRITIC
    R = np.array(pd.DataFrame(Z).corr())
    delta = np.zeros(m)
    c = np.zeros(m)
    for j in range(m):
        delta[j] = Z[:,j].std()
        c[j] = R.shape[0] - R[:,j].sum()
    C = delta * c
    w = C/sum(C)
    # 这是指标权重
    zhiBiaoQuanZhong = np.round(w,5)
    print('各指标权重:',zhiBiaoQuanZhong)

    # 存放结果
    CRITICValue = []
    sum1 = 0
    for i in range(n):
        result = 0
        for j in range(m):
            result += probValue[i,j]*zhiBiaoQuanZhong[j]
        sum1 += result
        CRITICValue.append(result)

    print('各个预测模型的权重:',CRITICValue)   # 使用CRITIC求出来的每一个预测方法对应的权重，也就是权重分配
    print('各个预测模型权重之和:',sum1)   # 几个预测方法所有权重加起来为1


# 2、求各指标的概率
def prob(Z):
    n, m = Z.shape
    # 2、由上面得到的标准矩阵求各个指标的概率。由标准化矩阵R求出各评价指标的概率
    prob = []  # 用于存放各指标的列表和  总共求5次，因为有5个指标
    for j in range(m):
        sum1 = 0
        for i in range(n):
            sum1 += Z[i, j]
        prob.append(sum1)  # 将结果存进数组中
    # 声明一个二维数组存放概率
    probTwo = np.empty([n, m])
    # 遍历数组，求每一个指标的概率
    for ii in range(n):
        for jj in range(m):
            probTwo[ii, jj] = Z[ii, jj] / prob[jj]
    return probTwo

# 1、标准化矩阵
def standard(X):
    xmin = X.min(axis=0)   # 都是损益性指标
    n, m = X.shape
    for i in range(n):
        for j in range(m):
            # X[i,j] = (X[i,j]-xmin[j])/xmaxmin[j]
            X[i, j] = xmin[j]/X[i,j]
    return X

In [31]:
# TOPSIS指标评价
# 本题目指标为极小型指标

# 极小型指标
def dataDirection_1(datas, offset=0):
	def normalization(data):
		return 1 / (data + offset)

	return list(map(normalization, datas))


# 中间型指标
def dataDirection_2(datas, x_min, x_max):
	def normalization(data):
		if data <= x_min or data >= x_max:
			return 0
		elif data > x_min and data < (x_min + x_max) / 2:
			return 2 * (data - x_min) / (x_max - x_min)
		elif data < x_max and data >= (x_min + x_max) / 2:
			return 2 * (x_max - data) / (x_max - x_min)

	return list(map(normalization, datas))


# 区间型指标
def dataDirection_3(datas, x_min, x_max, x_minimum, x_maximum):
	def normalization(data):
		if data >= x_min and data <= x_max:
			return 1
		elif data <= x_minimum or data >= x_maximum:
			return 0
		elif data > x_max and data < x_maximum:
			return 1 - (data - x_max) / (x_maximum - x_max)
		elif data < x_min and data > x_minimum:
			return 1 - (x_min - data) / (x_min - x_minimum)

	return list(map(normalization, datas))



# data:array

# 正向化矩阵标准化
def temp2(datas):
    K = np.power(np.sum(pow(datas,2),axis =1),0.5)
    for i in range(0,K.size):
        for j in range(0,datas[i].size):
            datas[i,j] = datas[i,j] / K[i]      #套用矩阵标准化的公式
    return datas 

In [32]:
# data：DataFrame

def topsis(data, weight=None):
	# 归一化
	data = data / np.sqrt((data ** 2).sum())

	# 最优最劣方案
	Z = pd.DataFrame([data.min(), data.max()], index=['负理想解', '正理想解'])

	# 距离
	weight = entropyWeight(data) if weight is None else np.array(weight)
	Result = data.copy()
	Result['正理想解'] = np.sqrt(((data - Z.loc['正理想解']) ** 2 * weight).sum(axis=1))
	Result['负理想解'] = np.sqrt(((data - Z.loc['负理想解']) ** 2 * weight).sum(axis=1))

	# 综合得分指数
	Result['综合得分指数'] = Result['正理想解'] / (Result['负理想解'] + Result['正理想解'])
	Result['排序'] = Result.rank(ascending=False)['综合得分指数']

	return Result, Z, weight

In [33]:
# CRITIC计算权重
A1=A.values
critic(A1)

原始矩阵： [[ 50.8   4.3   8.7]
 [200.    4.9   7.2]
 [ 71.4   2.5   5. ]
 [ 98.5   3.7   2.7]
 [ 10.2   2.4   0.3]]
标准化的矩阵： [[0.2007874  0.55813953 0.03448276]
 [0.051      0.48979592 0.04166667]
 [0.14285714 0.96       0.06      ]
 [0.1035533  0.64864865 0.11111111]
 [1.         1.         1.        ]]
各指标概率： [[0.13401928 0.1526396  0.0276468 ]
 [0.0340409  0.13394904 0.03340655]
 [0.09535266 0.26254011 0.04810543]
 [0.06911857 0.17739197 0.08908412]
 [0.66746859 0.27347928 0.80175711]]
各指标权重: [0.30761 0.35505 0.33734]
各个预测模型的权重: [0.10474673223165747, 0.06929929042332002, 0.13877418171196843, 0.11429622092381603, 0.572883574709238]
各个预测模型权重之和: 1.0


In [34]:
A2=temp2(A1)
A3=dataDirection_1(A2)
weight=[0.30761,0.35505,0.33734]
out=topsis(pd.DataFrame(A3),weight)
out

(          0         1         2      正理想解      负理想解    综合得分指数   排序
 0  0.212794  0.395845  0.633755  0.306506  0.334914  0.477856  3.0
 1  0.696834  0.375194  0.436253  0.197040  0.384140  0.339034  5.0
 2  0.489494  0.376660  0.596109  0.197710  0.369579  0.348517  4.0
 3  0.462626  0.381906  0.220528  0.314447  0.208495  0.601304  2.0
 4  0.124553  0.644059  0.063706  0.458655  0.160206  0.741128  1.0,
              0         1         2
 负理想解  0.124553  0.375194  0.063706
 正理想解  0.696834  0.644059  0.633755,
 array([0.30761, 0.35505, 0.33734]))