In [29]:
import akshare as ak
import polars as pl
import numpy as np
import pandas as pd
import empyrical as ep
from dateutil.parser import parse
from datetime import datetime
import jqdatasdk as jq
import seaborn as sns
import matplotlib.pyplot as plt
from Data_Fetcher import *
from Factor_calculation import *
from Factor_Evaluation import *
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体：解决plot不能显示中文问题
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
plt.style.use('seaborn-v0_8')

In [2]:
code_list = load_csi500_components('.\\中证500指数成分股_月末_2016-2023.xlsx')['stock_code'].unique().to_list()
stock_data = load_stock_data(code_list, '20150101', '20231231')

In [None]:
stock_data

In [5]:
price_factor = price_factors_calculation(stock_data)

In [None]:
price_factor

In [7]:
# 计算 Alpha#002 因子
alpha_2 = alpha_002(stock_data)

In [8]:
# 计算 Alpha#003 因子
alpha_3 = alpha_003(stock_data)

In [None]:
fundamental_factor = pl.read_excel('基本面因子.xlsx')
fundamental_factor=fundamental_factor.with_columns(pl.col('date').str.strptime(pl.Date,'%Y-%m-%d'))
fundamental_factor

In [None]:
csi500_data = pl.read_excel('中证500指数成分股_月末_2016-2023.xlsx')
csi500_data = csi500_data.with_columns(pl.col('date').str.strptime(pl.Date,'%Y-%m-%d'))
csi500_data

In [None]:

# 将 csi500_data 与 stock_data 合并
merged_data = (
    csi500_data
    .join(
        stock_data,
        left_on=['date', 'stock_code'],
        right_on=['日期', '股票代码']
    )
    .sort(['stock_code', 'date'])
)

# 计算下一月收益率
result = (
    merged_data
    .groupby('stock_code')
    .apply(lambda group: 
        group.with_columns(
            pl.col('收盘').pct_change().shift(-1).alias('next_month_return')
        )
    )
    .select(['date', 'stock_code', 'next_month_return'])
)



In [12]:
#计算中证500指数收益率
csi500_return = load_index_data('20150101', '20231231')
csi500_return = csi500_return.with_columns(pl.col('日期').str.strptime(pl.Date,'%Y-%m-%d'))

# 获取 csi500_data 中的唯一日期
unique_dates = csi500_data['date'].unique().sort()

# 将 csi500_return 与唯一日期合并
merged_index_data = (
    pl.DataFrame({'date': unique_dates})
    .join(
        csi500_return,
        left_on='date',
        right_on='日期',
        how='left'
    )
    .sort('date')
)

# 计算下一月收益率
index_result = (
    merged_index_data
    .with_columns(
        pl.col('收盘').pct_change().shift(-1).alias('next_month_return')
    )
    .select(['date', 'next_month_return'])
)



In [13]:

# 合并所有因子数据
merged_factors = (
    result
    .join(fundamental_factor, left_on=['date', 'stock_code'], right_on=['date', 'code'], how='left')
    .join(price_factor, left_on=['date', 'stock_code'], right_on=['日期', '股票代码'], how='left')
    .join(alpha_2, left_on=['date', 'stock_code'], right_on=['日期', '股票代码'], how='left')
    .join(alpha_3, left_on=['date', 'stock_code'], right_on=['日期', '股票代码'], how='left')
)
merged_factors = merged_factors.drop_nulls()
# 定义因子列表
factor_cols = ['size_factor', 'value_factor', 'profitability_factor', '10日动量', '20日波动率', 
               '相对成交量', '60日相对强弱', '价量比', 'alpha002', 'alpha003']



In [14]:
#设置双重索引
merged_factors = merged_factors.to_pandas()
merged_factors = merged_factors.set_index(['date','stock_code']).sort_index()


In [None]:
#根据因子进行分组
grouped_factors = get_group(merged_factors, '价量比')
grouped_factors


In [16]:
index_result = index_result.to_pandas()
index_result = index_result.dropna()
index_result = index_result.set_index('date')

In [None]:

returns = get_algorithm_return(grouped_factors)
returns['benchmark'] = index_result['next_month_return']
returns['excess_ret'] = returns['G5'] - returns['G1']
cum_df = np.exp(np.log1p(returns).cumsum())

plot_nav(cum_df,'价量比')

In [None]:
ic_plot(merged_factors,'价量比')


In [None]:
(Strategy_performance(returns).style.format('{:.2%}')
                                     .set_caption('价量比')
                                     .highlight_max(axis=1, color='#d65f5f')
                                     .highlight_min(axis=1, color='#5fba7d'))