In [10]:
import numpy as np
import pandas as pd
import akshare as ak
import statsmodels.formula.api as smf
import os

# ==========================================
# 1. 基金数据准备
# ==========================================
fund_code = "001938"
start_date = "20190101"
fund_individual_basic_info_xq_df = ak.fund_individual_basic_info_xq(symbol=fund_code)
fund_net_value_df = ak.fund_open_fund_info_em(
    symbol=fund_code,
    indicator="累计净值走势",
    )
fund_net_value_df["净值日期"] = pd.to_datetime(fund_net_value_df["净值日期"])
fund_df = fund_net_value_df[fund_net_value_df["净值日期"] >= start_date]
fund_df = fund_df.set_index("净值日期", drop=True)
fund_df['日度回报率'] = fund_df['累计净值'].pct_change()
fund_df.dropna(subset=['日度回报率'], inplace=True)
# ==========================================
# 2. 因子数据准备，读取参考文件
# ==========================================
current_folder = os.getcwd()
parent_folder = os.path.abspath(os.path.join(current_folder, ".."))
target_file_path = os.path.join(parent_folder, "data", "CAPM单因子模型（经典算法）日收益率（截至到20250630）.csv")

factor_capm = pd.read_csv(target_file_path)
factor_capm['date'] = pd.to_datetime(factor_capm['date'])
factor_capm.set_index('date', inplace=True)
# ==========================================
# 3. 数据合并与预处理
# ==========================================
capm_data = pd.merge(fund_df[['日度回报率']], factor_capm, left_index=True, right_index=True, how='inner')
capm_data['Ri_Rf'] = capm_data['日度回报率'] - capm_data['RF']
capm_data['Market'] = capm_data['MKT'] - capm_data['RF']
# ==========================================
# 4. CAPM回归分析
# ==========================================
formula = 'Ri_Rf ~ Market'
model = smf.ols(formula=formula, data=capm_data).fit()
alpha_daily = model.params['Intercept']
alpha_annualized = alpha_daily * 250
print(model.summary())
print(f"基金 {fund_code} 年化超额收益为{alpha_annualized:.2%}")

                            OLS Regression Results                            
Dep. Variable:                  Ri_Rf   R-squared:                       0.700
Model:                            OLS   Adj. R-squared:                  0.700
Method:                 Least Squares   F-statistic:                     3662.
Date:                Tue, 18 Nov 2025   Prob (F-statistic):               0.00
Time:                        20:24:34   Log-Likelihood:                 6124.7
No. Observations:                1572   AIC:                        -1.225e+04
Df Residuals:                    1570   BIC:                        -1.223e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0001      0.000      0.906      0.3

  fund_df['日度回报率'] = fund_df['累计净值'].pct_change()


In [11]:
# ==========================================
# 5. 择时能力分析
# ==========================================
capm_data['Market_sq'] = capm_data['Market'] ** 2
formula = 'Ri_Rf ~ Market + Market_sq'
model = smf.ols(formula=formula, data=capm_data).fit()
alpha_daily = model.params['Intercept']
alpha_annualized = alpha_daily * 250
print(model.summary())
print(f"基金 {fund_code} 年化超额收益为{alpha_annualized:.2%}")

                            OLS Regression Results                            
Dep. Variable:                  Ri_Rf   R-squared:                       0.701
Model:                            OLS   Adj. R-squared:                  0.700
Method:                 Least Squares   F-statistic:                     1837.
Date:                Tue, 18 Nov 2025   Prob (F-statistic):               0.00
Time:                        20:24:34   Log-Likelihood:                 6126.9
No. Observations:                1572   AIC:                        -1.225e+04
Df Residuals:                    1569   BIC:                        -1.223e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0002      0.000      1.516      0.1

In [12]:
# ==========================================
# 1. # Carhart四因子数据准备，读取参考文件
# ==========================================
current_folder = os.getcwd()
parent_folder = os.path.abspath(os.path.join(current_folder, ".."))
target_file_path = os.path.join(parent_folder, "data", "Carhart-四因子模型（经典算法）日收益率（截至到20250630）.csv")

factor_car = pd.read_csv(target_file_path)
factor_car['date'] = pd.to_datetime(factor_car['date'])
factor_car.set_index('date', inplace=True)
# ==========================================
# 2. 数据合并与预处理
# ==========================================
car_data = pd.merge(fund_df[['日度回报率']], factor_car, left_index=True, right_index=True, how='inner')
car_data['Ri_Rf'] = car_data['日度回报率'] - car_data['RF']
car_data['Market'] = car_data['MKT'] - car_data['RF']
# ==========================================
# 3. Carhart四因子回归分析
# ==========================================
formula = 'Ri_Rf ~ Market + SMB + HML + UMD'
model = smf.ols(formula=formula, data=car_data).fit()
alpha_daily = model.params['Intercept']
alpha_annualized = alpha_daily * 250
p_value = model.pvalues['Intercept']
print(model.summary())
print(f"基金 {fund_code} 年化超额收益为{alpha_annualized:.2%}")


                            OLS Regression Results                            
Dep. Variable:                  Ri_Rf   R-squared:                       0.804
Model:                            OLS   Adj. R-squared:                  0.803
Method:                 Least Squares   F-statistic:                     1606.
Date:                Tue, 18 Nov 2025   Prob (F-statistic):               0.00
Time:                        20:24:34   Log-Likelihood:                 6458.9
No. Observations:                1572   AIC:                        -1.291e+04
Df Residuals:                    1567   BIC:                        -1.288e+04
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0001      0.000      1.456      0.1

In [13]:
# ==========================================
# 1. # 模型因子拓展：读取DHS因子数据
# ==========================================
current_folder = os.getcwd()
parent_folder = os.path.abspath(os.path.join(current_folder, ".."))
target_file_path = os.path.join(parent_folder, "data", "Daniel-Hirshleifer-Sun-三因子模型（经典算法）日收益率（截至到20250630）.csv")

factor_dhs = pd.read_csv(target_file_path)
factor_dhs['date'] = pd.to_datetime(factor_dhs['date'])
factor_dhs.set_index('date', inplace=True)
# ==========================================
# 2. 数据合并
# ==========================================
full_data=pd.merge(car_data,factor_dhs[['R_fin','R_pead']],left_index=True,right_index=True)
full_data.dropna(inplace=True)
# ==========================================
# 3. 运行合并“六因子”回归分析
# ==========================================
formula = 'Ri_Rf ~ Market + SMB + HML + UMD + R_pead + R_fin'
model = smf.ols(formula=formula, data=full_data).fit()
alpha_daily = model.params['Intercept']
alpha_annualized = alpha_daily * 250
print(model.summary())
print(f"基金 {fund_code} 年化超额收益为{alpha_annualized:.2%}")

                            OLS Regression Results                            
Dep. Variable:                  Ri_Rf   R-squared:                       0.808
Model:                            OLS   Adj. R-squared:                  0.808
Method:                 Least Squares   F-statistic:                     1100.
Date:                Tue, 18 Nov 2025   Prob (F-statistic):               0.00
Time:                        20:24:34   Log-Likelihood:                 6476.7
No. Observations:                1572   AIC:                        -1.294e+04
Df Residuals:                    1565   BIC:                        -1.290e+04
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0002   9.99e-05      1.900      0.0

In [14]:
# ==========================================
# 4. 多重共线性检查：VIF
# ==========================================
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
X_factors = full_data[['Market', 'SMB', 'HML', 'UMD', 'R_fin', 'R_pead']]
X_factors_with_const = add_constant(X_factors)
def get_vif_summary(X_data):
    vif_data = pd.DataFrame()
    vif_data["Factor"] = X_data.columns
    vif_data["VIF"] = [variance_inflation_factor(X_data.values, i) 
                       for i in range(X_data.shape[1])]
    vif_data = vif_data[vif_data['Factor'] != 'const']
    vif_data = vif_data.sort_values("VIF", ascending=False).reset_index(drop=True)
    return vif_data
vif_summary = get_vif_summary(X_factors_with_const)
print(vif_summary)

   Factor       VIF
0   R_fin  3.538052
1     HML  2.185151
2     SMB  1.956189
3  Market  1.290608
4     UMD  1.110312
5  R_pead  1.051390


In [15]:
# ==========================================
# 5. 择时能力分析
# ==========================================
car_data['Market_sq'] = car_data['Market'] ** 2
formula = 'Ri_Rf ~ Market + Market_sq + SMB + HML + UMD'
model =smf.ols(formula, car_data).fit()
alpha_daily = model.params['Intercept']
alpha_annualized = alpha_daily * 250
print(model.summary())
print(f"基金 {fund_code} 年化超额收益为{alpha_annualized:.2%}")

                            OLS Regression Results                            
Dep. Variable:                  Ri_Rf   R-squared:                       0.805
Model:                            OLS   Adj. R-squared:                  0.805
Method:                 Least Squares   F-statistic:                     1297.
Date:                Tue, 18 Nov 2025   Prob (F-statistic):               0.00
Time:                        20:24:34   Log-Likelihood:                 6465.4
No. Observations:                1572   AIC:                        -1.292e+04
Df Residuals:                    1566   BIC:                        -1.289e+04
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0003      0.000      2.511      0.0