In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

## Function Definition

In [2]:
# 面积修正
def predict_area_correct(means, areas):
    means_correct = means*areas
    # 归一化
    means_squeeze = (means_correct-min(means_correct))/(max(means_correct)-min(means_correct))
    
    return means_squeeze

def light_area_correct(lights, areas):
    lights_correct = lights*areas
    
    return lights_correct

In [3]:
# 面积覆盖率
def area_rate(nums, areas):
    return nums*1.0/areas

In [7]:
def generate_corr(predict_df, center2PAC, light_df, index_df):
    # 合并
    district_df = pd.merge(center2PAC, predict_df, how = 'inner')
    district_df = pd.merge(district_df, light_df, how = 'left')
    
    # 计算县级平均得分
    scores_df = district_df.loc[: ,['PAC', 'predict', 'light_sum']].groupby('PAC').mean()
    scores_df['num'] = district_df.groupby('PAC').size()
    scores_df['PAC'] = scores_df.index
    scores_df.index = [i for i in range(scores_df.shape[0])]
    
    # 合并指标
    scores_df = pd.merge(scores_df, index_df, how = 'inner')
    
    # 面积修正
    scores_df['alter_predict'] = predict_area_correct(scores_df['predict'], scores_df['area'])
    scores_df['alter_lights'] = light_area_correct(scores_df['light_sum'], scores_df['area'])
    
    # 计算corr
    corr_df = scores_df.drop(['num', 'area'], axis = 1)
    corr_df['PAC'] = [str(pac) for pac in corr_df['PAC']]
    
    return scores_df, corr_df

## Readin Data

In [5]:
# 坐标-PAC-index-light
center2PAC = pd.read_csv('Data/center2PAC2017.csv').dropna()
PAC_index17 = pd.read_csv('Data/PAC_index17.csv')
light17 = pd.read_csv('Data/light2017.csv')

In [6]:
# 删减不必要列
center2PAC = center2PAC.loc[:, ['name', 'PAC']]
center2PAC['name'] = [re.sub(r'-2017\.png[\n]', '', name) for name in center2PAC.name]
center2PAC.columns = ['y-x', 'PAC']
PAC_index17 = PAC_index17.drop('Year', axis = 1)

## National

In [None]:
national_predict = pd.read_csv('Data/siRes/national_light_predict.csv')
national_predict.shape

In [None]:
national_scores, national_corr = generate_corr(national_predict, center2PAC, light17, PAC_index17)
national_corr.shape

In [None]:
national_corr.head()

In [None]:
national_corr.corr('spearman')

In [None]:
national_corr.to_csv('Data/siRes/spear/national_corr_df.csv', index = False, header = True)
national_corr.corr('spearman').to_csv('Data/siRes/spear/national_spearman.csv', index = False, header = True)

In [None]:
national_area_rates = area_rate(national_scores['num'], national_scores['area'])
plt.hist(national_area_rates, bins = 6)
plt.show()