In [1]:
import pandas as pd
import numpy
from IPython.display import HTML, display
from sklearn import feature_selection

def draw(title, indicators):

    # 考察的指标
    col_names = ['PE_ETF加权', 'PE_市值加权', 'PE_等权','PB_ETF加权', 'PB_市值加权', 'PB_等权', '股息收益率 %', 'ROE %']

    # 只取10年前的数据
    from_date = '2015-01-01'

    correlation_coefficients_result = []

    for indicator in indicators:
        df = pd.read_excel("../data-指数-260114/" + indicator + ".xls")
        # 只取大于某日期的数据
        df = df.loc[lambda d : d['日期'] >= from_date, :]

        # 取特征
        features = df[col_names]
        # 取y值
        price = df['收盘价'].values

        correlation_coefficients = feature_selection.r_regression(features, price)
        # 最后一行的日期
        correlation_coefficients = numpy.append(correlation_coefficients, df.tail(1)['日期'])
        correlation_coefficients_result.append(correlation_coefficients)

    col_names.append('最早数据日期')

    display(HTML('<h4><center>{}</center></h4>'.format(title + ' - 指数与指标的相关系数')))
    result = pd.DataFrame(data=correlation_coefficients_result, columns=col_names, index=indicators)
    return result

pd.set_option("display.max_columns", None,
              "display.float_format",lambda x : '%.4f' % x)

# 宽基
indicators = [
    "SH000016-上证50",
    "SH000300-沪深300",
    "SZ399006-创业板指",
    "SH000510-中证A500",
]
display(draw('宽基', indicators))

# 消费民生
indicators = [
    'SH000932-中证消费',
    # 'CSIH30533_中国互联网50',
    'SZ399986-中证银行',
    'SZ399989-中证医疗',
    'CSI930726-中证生物医药',
]
display(draw('消费民生', indicators))


# 新科技
indicators = [
    'CSI930713-人工智能',
    'CSI000941-新能源',
    'CSIH30184-半导体',
    'SZ399976-CS新能源车',
    'CSIH30590-机器人',
    'CSI931798-光伏龙头'
]
display(draw('新科技', indicators))

# 基础工业
indicators = [
    "SH000928-中证能源",
    "SZ399440-国证钢铁",
    "CSI930632-CS稀金属",
    "SH000819-有色金属",
    "CSIH30199-电力指数"
]
display(draw('基础工业', indicators))

# 国防军工
indicators = [
    'SZ399967-中证军工',    
]
display(draw('国防军工', indicators))

# 夕阳
indicators = [
    'CSI931009-建筑材料',
    'SZ399995-基建工程',
]
display(draw('夕阳产业', indicators))

Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
SH000016-上证50,0.7749,0.7187,0.5451,0.4423,0.4073,0.4236,-0.5695,-0.2327,2015-01-05
SH000300-沪深300,0.7652,0.7244,0.4573,0.484,0.3639,0.4981,-0.4878,-0.2435,2015-01-05
SZ399006-创业板指,0.5064,0.5749,0.538,0.7779,0.7327,0.631,-0.3379,0.4348,2015-01-05
SH000510-中证A500,0.9783,0.9707,0.9577,0.9866,0.9738,0.9273,-0.8482,-0.6349,2024-09-24


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
SH000932-中证消费,0.6482,0.6162,0.2997,0.8959,0.8664,0.4325,-0.1997,0.4904,2015-01-05
SZ399986-中证银行,0.5932,0.7711,0.3685,-0.0421,0.0171,-0.0943,-0.4019,-0.428,2015-01-05
SZ399989-中证医疗,0.2846,0.3354,0.3985,0.8345,0.8582,0.6475,-0.445,0.6416,2015-01-05
CSI930726-中证生物医药,0.4278,0.584,0.2901,0.6996,0.8429,0.3184,-0.4711,0.2978,2015-08-06


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
CSI930713-人工智能,0.5128,0.61,0.3465,0.8279,0.742,0.5075,-0.4012,0.0727,2015-08-03
CSI000941-新能源,0.248,0.4754,0.078,0.917,0.9365,0.8804,-0.4461,0.5483,2015-01-05
CSIH30184-半导体,-0.1803,0.4378,0.42,0.8676,0.8194,0.8041,-0.5097,0.419,2015-01-05
SZ399976-CS新能源车,0.3005,0.424,0.3933,0.8028,0.8021,0.5781,-0.4395,0.1266,2015-01-05
CSIH30590-机器人,0.422,0.6267,0.6346,0.9266,0.9361,0.78,-0.7168,0.2107,2015-02-11
CSI931798-光伏龙头,0.5494,0.6199,0.6873,0.6721,0.6906,0.6955,-0.4585,0.2597,2017-01-03


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
SH000928-中证能源,-0.1885,-0.2479,-0.0608,0.6233,0.6883,0.4336,0.291,0.685,2015-01-05
SZ399440-国证钢铁,0.4357,0.5186,0.4312,0.9369,0.9098,0.771,-0.5877,-0.0432,2015-01-05
CSI930632-CS稀金属,-0.0425,-0.0458,-0.3324,0.4994,0.5592,0.2284,-0.2266,0.5141,2015-05-13
SH000819-有色金属,-0.1362,0.0209,-0.1639,0.7431,0.8002,0.4145,-0.0076,0.4147,2015-01-05
CSIH30199-电力指数,0.0459,0.0713,0.1559,0.8678,0.8458,0.7164,-0.477,0.4165,2015-01-05


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
SZ399967-中证军工,0.3868,0.5725,0.6634,0.8753,0.8781,0.8143,-0.2301,0.1368,2015-01-05


Unnamed: 0,PE_ETF加权,PE_市值加权,PE_等权,PB_ETF加权,PB_市值加权,PB_等权,股息收益率 %,ROE %,最早数据日期
CSI931009-建筑材料,-0.4768,-0.5389,-0.4311,0.4714,0.6988,0.1671,0.0249,0.6509,2015-01-05
SZ399995-基建工程,0.9774,0.9807,0.9115,0.9666,0.969,0.9566,-0.8623,0.6246,2015-01-05
