In [23]:
import pandas as pd
import numpy
from IPython.display import HTML, display
from sklearn import feature_selection

def draw(title, indicators):

    # 考察的指标
    col_names = ['PE_ETF加权', 'PE_市值加权', 'PE_等权','PB_ETF加权', 'PB_市值加权', 'PB_等权', '股息收益率 %', 'ROE %']

    # 只取10年前的数据
    from_date = '2012-01-01'

    correlation_coefficients_result = []

    for indicator in indicators:
        df = pd.read_excel('../data-指数-新表格/' + indicator + '.xls')
        # 只取大于某日期的数据
        df = df.loc[lambda d : d['日期'] >= from_date, :]

        # 取特征
        features = df[col_names]
        # 取y值
        price = df['收盘价'].values

        correlation_coefficients = feature_selection.r_regression(features, price)
        # 最后一行的日期
        correlation_coefficients = numpy.append(correlation_coefficients, df.tail(1)['日期'])
        correlation_coefficients_result.append(correlation_coefficients)

    col_names.append('最早数据日期')

    display(HTML('<h4><center>{}</center></h4>'.format(title + ' - 指数与指标的相关系数')))
    result = pd.DataFrame(data=correlation_coefficients_result, columns=col_names, index=indicators)
    return result

pd.set_option("display.max_columns", None,
              "display.float_format",lambda x : '%.4f' % x)

# 宽基
indicators = [
    '000016_上证50',
    '000300_沪深300',
    '399006_创业板指',
]
display(draw('宽基', indicators))

# 消费民生
indicators = [
    '000932_中证消费',
    # 'CSIH30533_中国互联网50',
    'SZ399986_中证银行',
]
display(draw('消费民生', indicators))


# 医药
indicators = [
    '399989_中证医疗',
    '930726_中证生物医药',
]
display(draw('医药', indicators))

# 新科技
indicators = [
    '930713_CS人工智能',
    '000941_新能源',
    'H30184_半导体',
    '399976_CS新能源车',
    '931798_光伏龙头30',
]
display(draw('新科技', indicators))

# 基础工业
indicators = [
    'SH000928_中证能源',
    '399440_国证钢铁',
    '000819_有色金属',
    '930632_CS稀金属',    
]
display(draw('基础工业', indicators))

# 国防军工
indicators = [
    'SZ399967_中证军工',    
]
display(draw('国防军工', indicators))

# 夕阳
indicators = [
    '931009_建筑材料',
    'CSI931775_房地产',
    'SZ399995_基建工程',
]
display(draw('夕阳产业', indicators))


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
000016_上证50,0.7973,0.8019,0.5831,-0.0336,-0.0376,0.417,-0.6665,-0.8027,2012-01-04
000300_沪深300,0.8557,0.8548,0.5714,0.3209,0.2258,0.6149,-0.7319,-0.7896,2012-01-04
399006_创业板指,0.572,0.5571,0.5428,0.8207,0.7959,0.7111,-0.7121,0.7539,2012-01-04


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
000932_中证消费,0.8602,0.8285,0.507,0.9388,0.9285,0.6299,-0.4757,0.3067,2012-01-04
SZ399986_中证银行,0.6769,0.8679,0.7484,-0.2139,0.1422,-0.1307,-0.7504,-0.7476,2013-07-16


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
399989_中证医疗,0.0954,0.1652,0.2419,0.7921,0.8284,0.536,-0.1596,0.6169,2014-11-03
930726_中证生物医药,0.2628,0.5234,0.1608,0.6261,0.8307,0.0597,-0.2036,0.1339,2015-08-06


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
930713_CS人工智能,0.6921,0.7187,0.408,0.7526,0.6845,0.3945,-0.5246,-0.2569,2015-08-03
000941_新能源,0.012,-0.1262,-0.3282,0.9413,0.9364,0.9195,-0.4508,0.7018,2012-01-04
H30184_半导体,-0.0571,0.2865,-0.2137,0.9263,0.8926,0.9319,-0.5547,0.7471,2013-07-16
399976_CS新能源车,0.3251,0.4634,0.3976,0.8909,0.8877,0.7371,-0.5595,0.1548,2014-12-01
931798_光伏龙头30,0.5601,0.6392,0.7082,0.7201,0.7387,0.7401,-0.5704,0.293,2017-01-03


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
SH000928_中证能源,-0.1358,-0.1539,-0.0048,0.7925,0.7908,0.8647,-0.0742,0.6285,2012-01-04
399440_国证钢铁,0.5288,0.5788,0.3987,0.926,0.8907,0.7505,-0.6282,-0.1409,2014-12-31
000819_有色金属,-0.2638,0.0748,-0.0714,0.8353,0.8369,0.6437,-0.4118,0.464,2012-05-10
930632_CS稀金属,-0.0505,-0.0598,-0.3763,0.6194,0.6945,0.25,-0.2922,0.5739,2015-05-13


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
SZ399967_中证军工,0.4014,0.5504,0.6112,0.8574,0.8447,0.8025,-0.4127,0.1592,2013-12-30


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
931009_建筑材料,-0.2862,-0.1372,-0.3056,0.3033,0.7082,-0.0695,0.4543,0.4368,2013-07-16
CSI931775_房地产,-0.242,-0.2401,0.0562,-0.5613,0.9069,0.6453,-0.4039,0.3213,2013-07-16
SZ399995_基建工程,0.9767,0.9787,0.8998,0.964,0.967,0.9525,-0.9016,0.6537,2015-01-05
