In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline

In [None]:
# 对每一列进行标准化处理
def cost_normalize(column):
    '''
    效益型指标标准化，越大越好
    '''
    max_value = column.max()
    min_value = column.min()
    return (max_value - column) / (max_value - min_value)

def benefit_normalize(column):
    '''
    成本型指标标准化，越小越好
    '''
    max_value = column.max()
    min_value = column.min()
    return (column - min_value) / (max_value - min_value)

def fix_normalize(column, a=0):
    '''
    固定型指标标准化，越接近固定值 a 最好
    '''
    max_value = np.abs(column - a).max()
    min_value = np.abs(column - a).min()
    return (max_value - np.abs(column - a)) / (max_value - min_value)

In [None]:
# 计算每列的权重
def entropy_weight(column):
    '''
    熵权法计算权重
    '''
    p = column / column.sum()
    e = np.nansum(-p*np.log(p)/np.log(len(column)), axis=0)
    return (1 - e) / np.sum(1 - e)

def coefficient_variation(column):
    '''
    变异系数法计算权重
    '''
    # 计算每列的变异系数
    cv = column.std() / column.mean()
    # 计算每列的权重
    w = cv / np.sum(cv)
    return np.array(w)

In [None]:
datafilelist = ['1.经济发展度', '2.社会和谐度', '3.生活方便度', '4.环境舒适度']
yearlist = ['2018', '2019', '2020', '2021', '2022']
# datafilelist = ['1.经济发展度']
# yearlist = ['2018']
factorl1list = ['经济发展度', '社会和谐度', '生活方便度', '环境舒适度']
# factors = [['社会消费品零售总额（亿元）', '人均GDP(元)', '一般公共预算收入（亿元）', '一般公共预算支出（亿元）', '第三产业占比'],
        #    ['住户存款余额（亿元）', '城镇非私营单位在岗职工平均工资(元)', '年末户籍人口（万人）', '住宅商品房价格与人均工资比', '城镇登记失业率', '社会抚养比'],
        #    ['年末邮政局（个）', '普通本科专科在校学生数（万人）', '人均移动电话资源占有数(个)', '每千人口拥有执业 (助理) 医师数', '医院数（个 ）', '执业(助理)医师数（人）', '年末户籍人口（千人）', '年末移动电话用户数量（万个）'],
        #    ['道路交通等效声级dB(A)', '环境噪声等效声级dB(A)', '第二产业占比', '城市绿化覆盖率', '第一产业增加值(亿元)', '第三产业增加值（亿元）'],
        #   ]
# 0-效益型，1-成本型，2-中间型
bcfi = [[0, 0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [1, 1, 1, 0],
        ]
benidx = [[0,1,2,3,4], [0,1,2,3], [0,1,2,3,4], [3]]
costidx = [[], [], [], [0,1,2]]
fixidx = [[], [], [], []]

global_weight = [0.33617548, 0.2995685 , 0.25207122, 0.1121848]


In [None]:
# df1 is template, df2 is the data to be sorted
def change_index(df1, df2):
    if (type(df1) == pd.Series):
        df1 = pd.DataFrame(df1).reset_index()
        df2 = pd.DataFrame(df2).reset_index()
    df1.set_index('城市', inplace=True)
    df2.set_index('城市', inplace=True)
    df2 = df2.loc[df1.index]
    df1.reset_index(inplace=True)
    df2.reset_index(inplace=True)
    return df2
# economy_2018, economy_2019 = change_index(economy_2018, economy_2019)
# economy_2019

In [None]:
import os

def use_svg_display():
    matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

def set_figsize(figsize=(3.5, 2.5)):
    # use_svg_display()
    plt.rcParams['figure.figsize'] = figsize

def get_output_dir():
    script_dir = os.getcwd()
    output_dir = os.path.join(script_dir, "output")
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    return output_dir

# def plot_factor(dtframe, filename):
#     plt.rc('font',family='SimHei')  #用来正常显示中文标签
#     plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
#     # plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']

#     # use_svg_display()
#     set_figsize((15, 2.5))
#     plt.plot(dtframe.iloc[:, 1], 'o-', linewidth=2, color='r', alpha=0.5, label=f'{filename}')
#     # plt.xlabel('xlabel')
#     plt.ylabel(filename)
#     plt.xticks(np.linspace(0, 25, 26), dtframe.iloc[:, 0])
#     plt.legend()
#     # plt.savefig(f'{filename}.pdf', bbox_inches='tight')
#     plt.savefig(f'{filename}.png', bbox_inches='tight')
#     plt.show()
#     plt.clf()

# def plot_dataframe(dtframe):
#     plt.rc('font',family='SimHei')  #用来正常显示中文标签
#     plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
#     # plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']

#     # use_svg_display()
#     set_figsize((15, 2.5))
#     dtframe.plot(subplots=True)
#     plt.show()
#     plt.clf()    

def plot_df_eachcol(dtframe, fn, yr):
    plt.rc('font',family='SimHei')  #用来正常显示中文标签
    plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
    # plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
    output_dir = get_output_dir()

    # use_svg_display()
    for idx, col in enumerate(dtframe.columns):
        if (idx == 0):
            continue
        set_figsize((15, 2.5))
        # print(idx)
        # print(col)
        plt.plot(dtframe.iloc[:, idx], 'o-', linewidth=2, color='r', alpha=0.5, label=f'{fn}_{col}_{yr}')
        # # plt.xlabel('xlabel')
        plt.ylabel(col)
        plt.xticks(np.linspace(0, 25, 26), dtframe.iloc[:, 0])
        plt.legend()
        # # plt.savefig(f'{filename}.pdf', bbox_inches='tight')
        plt.savefig(os.path.join(output_dir, f'{fn}_{col}_{yr}.pdf'), bbox_inches='tight')
        
        # plt.show()
        plt.clf()    

def plot_rank(rank, filename):
    plt.rc('font',family='SimHei')  #用来正常显示中文标签
    plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
    # plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']

    set_figsize((15, 2.5))
    # plt.plot(rank.iloc[:, 1], 'o-', linewidth=2, color='r', alpha=0.5, label=f'{filename}')
    plt.plot(rank.index, rank.values, 'o-', linewidth=2, color='r', alpha=0.5, label=f'{filename}')
    for a,b in zip(rank.index, rank.values):
        plt.text(a, b, f'{round(b,2)}')
    # for i,j in zip(rank.index, rank.values):
    #     plt.annotate(str(round(j,2)), xy=(i,j))
    # plt.xlabel('xlabel')
    plt.ylabel(f'综合评价分数')
    # plt.xticks(np.linspace(0, 25, 26), rank.iloc[:, 0])
    plt.legend()
    # plt.savefig(f'{filename}.pdf', bbox_inches='tight')
    output_dir = get_output_dir()
    plt.savefig(os.path.join(output_dir, f'{filename}.pdf'), bbox_inches='tight')
    # plt.show()
    plt.clf()


def plot_ranks_5yrs(dffinalrankbyyr, filename):
    plt.rc('font',family='SimHei')  #用来正常显示中文标签
    plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
    # plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']

    set_figsize((15, 2.5))

    plt.plot(dffinalrankbyyr, 'o-', linewidth=2, alpha=0.5)

    # plt.xlabel('xlabel')
    plt.ylabel(f'综合评价分数')
    plt.legend(labels = yearlist)
    output_dir = get_output_dir()
    plt.savefig(os.path.join(output_dir, f'{filename}.pdf'), bbox_inches='tight')
    # plt.show()
    plt.clf()



In [None]:

def pie_rank(weight_entropy, yr):
       set_figsize((5, 4))
       plt.rc('font',family='SimHei')  #用来正常显示中文标签
       plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
       # plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']

       kinds = ['经济发展度', '社会和谐度', '生活方便度', '环境舒适度']
       explode = (0.08, 0, 0, 0)
       colors = [plt.cm.Accent(i) for i in range(len(kinds))]

       plt.pie(weight_entropy,
              labels=kinds,
              explode = explode, # 突出显示某一块
              autopct="%3.1f%%", # 显示百分比
              startangle=90, # 饼图的初始角度
              shadow = True, # 显示阴影
              colors= colors # 设置颜色
              )
       output_dir = get_output_dir()
       plt.savefig(os.path.join(output_dir, f'综评指标权重_{yr}.pdf'), bbox_inches='tight')
       plt.show()

In [None]:
def getdfallranks1yr(dfallranks, yr):
    # factorl1list = ['经济发展度', '社会和谐度', '生活方便度', '环境舒适度']

    offsetyr = (int(yr)-2018)
    dfranks = dfallranks.iloc[:, [offsetyr, offsetyr+5, offsetyr+10, offsetyr+15]]
    dfranks.columns = factorl1list
    # print(f'dfranks {yr}')
    # print(dfranks)

    # preprocess data
    numerical_features = dfranks.dtypes[dfranks.dtypes != 'object'].index
    print(numerical_features)

    benefit_index = numerical_features[0:4]
    cost_index = numerical_features[0:0]
    # fix_index = numerical_features[0:0]

    dfranks_benefit = dfranks[benefit_index]
    dfranks_cost = dfranks[cost_index]
    # df_yr_fix = dfranks[fix_index]

    # normalize each column
    dfranks_benefit_new = dfranks_benefit.apply(benefit_normalize)
    dfranks_cost_new = dfranks_cost.apply(cost_normalize)
    # df_yr_fix_new = df_yr_fix.apply(fix_normalize)

    # concat benefit, cost and fix parts
    dfranks_new = pd.concat((dfranks_benefit_new, dfranks_cost_new), axis=1)

    # now dfranks_new is the 相对偏差模糊矩阵
    # print(f'dfranks_new')
    # print(dfranks_new)

    # calculate weights
    # weight_entropy = entropy_weight(df_yr_new.iloc[:, :])
    # print(weight_entropy)
    coevar = coefficient_variation(dfranks_new.iloc[:, :])
    # print(coevar)
    # pie_rank(coevar.tolist(), yr)
    # calculate rank
    allrank = dfranks_new.iloc[:, :].dot(coevar)
    print(allrank)

    sorted_allrank = allrank.sort_values(ascending=False)
    plot_rank(sorted_allrank, f'全指标_综合评价_{yr}')

    return allrank



In [None]:
# main function

# all rank series
allranks = []
dfallranks = pd.DataFrame()

# get the first data as template
df_template = pd.ExcelFile(f'./data/{datafilelist[0]}2018-2022.xlsx').parse(yearlist[0]).head(26)

for fn in datafilelist:
    strfilename = f'./data/{fn}2018-2022.xlsx'
    print(strfilename)
    fnid = int(fn[0]) - 1
    # read data from excel file
    pdFile = pd.ExcelFile(strfilename)
    # process data for each year
    for yr in yearlist:
        print(f'{yr}')
        df_yr = pdFile.parse(yr).head(26)
        df_yr = change_index(df_template, df_yr)

        # plot each column
        # plot_df_eachcol(df_yr, fn[2:], yr)

        # preprocess data
        numerical_features = df_yr.dtypes[df_yr.dtypes != 'object'].index
        print(numerical_features)

        benefit_index = numerical_features[benidx[fnid]]
        cost_index = numerical_features[costidx[fnid]]
        fix_index = numerical_features[fixidx[fnid]]
        df_yr_benefit = df_yr[benefit_index]
        df_yr_cost = df_yr[cost_index]
        df_yr_fix = df_yr[fix_index]
        # normalize each column
        df_yr_cost_new = df_yr_cost.apply(cost_normalize)
        df_yr_benefit_new = df_yr_benefit.apply(benefit_normalize)
        df_yr_fix_new = df_yr_fix.apply(fix_normalize)
        df_yr_benefit_new.set_index(df_yr.iloc[:, 0], inplace=True)
        df_yr_cost_new.set_index(df_yr.iloc[:, 0], inplace=True)
        df_yr_fix_new.set_index(df_yr.iloc[:, 0], inplace=True)
        # concat benefit, cost and fix parts
        df_yr_new = pd.concat((df_yr_benefit_new, df_yr_cost_new, df_yr_fix_new), axis=1)
        # now df_yr_new is the 相对偏差模糊矩阵
        # print(f'df_yr_new')
        # print(df_yr_new)

        # calculate weights
        # weight_entropy = entropy_weight(df_yr_new.iloc[:, :])
        # print(weight_entropy)
        coevar = coefficient_variation(df_yr_new.iloc[:, :])
        # print(coevar)
        # calculate rank
        rank = df_yr_new.iloc[:, :].dot(coevar)
        # print(rank)
        print(f'add to allranks[]')
        allranks.append(rank)
        # print(rank)
        dfallranks = pd.concat((dfallranks, rank.rename(f'{fn[2:]}_{yr}')), axis=1)
        # print(dfallranks)

        sorted_rank = rank.sort_values(ascending=False)
        plot_rank(sorted_rank, f'{fn[2:]}_综合评价_{yr}')

    pdFile.close()



In [None]:

print(dfallranks)

dffinalrankbyyr = pd.DataFrame()
for yr in yearlist:
    dffr1y = getdfallranks1yr(dfallranks, yr)
    dffinalrankbyyr = pd.concat((dffinalrankbyyr, dffr1y.rename(f'{yr}')), axis=1)

print(f'dffinalrankbyyr')
print(dffinalrankbyyr)


In [None]:

dffinalrankbyyr.sort_values(by='2022', ascending=False, inplace=True)

print(f'sorted dffinalrankbyyr')
print(dffinalrankbyyr)

plot_ranks_5yrs(dffinalrankbyyr, f'总评级')


In [None]:
class StopExecution(Exception):
    def _render_traceback_(self):
        pass


In [None]:
raise StopExecution

In [None]:
print(f'Should not see me printed!')

In [None]:
# def plot_4ranks(allranks5yr, filename):
#     plt.rc('font',family='SimHei')  #用来正常显示中文标签
#     plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
#     # plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']

#     set_figsize((15, 2.5))

#     sorted_allranks4yr = []
#     sorted_templ = allranks5yr[4].sort_values(ascending=False)
#     for r in allranks5yr:
#         sorted_allranks4yr.append(change_index(sorted_templ, r))

#     for idx, rank in enumerate(sorted_allranks4yr):
#         rank.set_index('城市', inplace=True)
#         plt.plot(rank.index, rank.values, 'o-', linewidth=2, alpha=0.5, label=f'{2018+idx}')
#         # for a,b in zip(rank.index, rank.values):
#         #     plt.text(a, b, f'{round(b,2)}')

#     # plt.xlabel('xlabel')
#     plt.ylabel(f'综合评价分数')
#     # plt.xticks(np.linspace(0, 25, 26), rank.iloc[:, 0])
#     plt.legend()
#     # plt.savefig(f'{filename}.pdf', bbox_inches='tight')
#     output_dir = get_output_dir()
#     plt.savefig(os.path.join(output_dir, f'{filename}.pdf'), bbox_inches='tight')
#     # plt.show()
#     plt.clf()


In [None]:
# offsetyr = (int(yr)-2018)
# dfranks = dfallranks.iloc[:, [offsetyr, offsetyr+5, offsetyr+10, offsetyr+15]]
# print(type(dfranks))
# dfranks.columns = factorl1list
# print(f'dfranks {yr}')
# print(dfranks)


In [None]:
# allranks5yr = []

# for yr in yearlist:
#     allranks5yr.append(allranks1yr(allranks, yr))

# # allranks5yr.sort
# print(allranks5yr)
# plot_4ranks(allranks5yr, f'总评级')


In [None]:
# def allranks1yr(allranks, yr):
#     # factorl1list = ['经济发展度', '社会和谐度', '生活方便度', '环境舒适度']

#     offsetyr = (int(yr)-2018)
#     # dfranks = pd.concat([allranks[0], allranks[1], allranks[2], allranks[3]], axis=1)
#     dfranks = pd.concat([allranks[0+offsetyr], allranks[5+offsetyr], allranks[10+offsetyr], allranks[15+offsetyr]], axis=1)
#     dfranks.columns = factorl1list
#     print(f'dfranks {yr}')
#     print(dfranks)

#     # preprocess data
#     numerical_features = dfranks.dtypes[dfranks.dtypes != 'object'].index
#     print(numerical_features)

#     benefit_index = numerical_features[0:4]
#     cost_index = numerical_features[0:0]
#     # fix_index = numerical_features[0:0]

#     dfranks_benefit = dfranks[benefit_index]
#     dfranks_cost = dfranks[cost_index]
#     # df_yr_fix = dfranks[fix_index]

#     # normalize each column
#     dfranks_benefit_new = dfranks_benefit.apply(benefit_normalize)
#     dfranks_cost_new = dfranks_cost.apply(cost_normalize)
#     # df_yr_fix_new = df_yr_fix.apply(fix_normalize)

#     # concat benefit, cost and fix parts
#     dfranks_new = pd.concat((dfranks_benefit_new, dfranks_cost_new), axis=1)

#     # now dfranks_new is the 相对偏差模糊矩阵
#     print(f'dfranks_new')
#     print(dfranks_new)

#     # calculate weights
#     # weight_entropy = entropy_weight(df_yr_new.iloc[:, :])
#     # print(weight_entropy)
#     coevar = coefficient_variation(dfranks_new.iloc[:, :])
#     # print(coevar)
#     pie_rank(coevar.tolist(), yr)
#     # calculate rank
#     allrank = dfranks_new.iloc[:, :].dot(coevar)
#     print(allrank)

#     sorted_allrank = allrank.sort_values(ascending=False)
#     plot_rank(sorted_allrank, f'全部_综合评价_{yr}')

#     return allrank

