In [1]:
import pandas as pd
import numpy as np
import os

# ==============================================================================
# 主流程执行区
# ==============================================================================

if __name__ == "__main__":
    
    # --- 1. 参数配置：使用您刚刚修正后的两个文件路径 ---
    FCFF_FILE_PATH = '/Users/alan-hopiy/Documents/quantitative-learning-journey/outputs/初步数据/初步数据/stock_financial_indicators_fcff_wide_panel.xlsx'
    ASSETS_FILE_PATH = '/Users/alan-hopiy/Documents/quantitative-learning-journey/outputs/初步数据/初步数据/stock_balance_sheets_total_assets_wide_panel.xlsx'

    print("====== 开始计算 '自由现金流 / 总资产' 指标 ======")

    # --- 2. 加载两个数据表 ---
    try:
        # 加载自由现金流（FCFF）数据
        fcff_df = pd.read_excel(FCFF_FILE_PATH, index_col=0)
        print(f"\n成功加载自由现金流数据，形状: {fcff_df.shape}")
        
        # 加载总资产数据
        assets_df = pd.read_excel(ASSETS_FILE_PATH, index_col=0)
        print(f"成功加载总资产数据，形状: {assets_df.shape}")

    except FileNotFoundError as e:
        raise SystemExit(f"[错误] 文件未找到，请检查路径: {e.filename}")

    # --- 3. (建议) 检查数据对齐情况 ---
    if fcff_df.shape == assets_df.shape and fcff_df.index.equals(assets_df.index) and fcff_df.columns.equals(assets_df.columns):
        print("\n数据检查：两个数据表的行和列完全对齐，可以进行计算。")
    else:
        print("\n[警告] 两个数据表的行或列不完全对齐，正在尝试强制对齐...")
        # 使用'inner'连接方式，保留两者共有的列（时间周期）和行（股票代码）
        fcff_df, assets_df = fcff_df.align(assets_df, join='inner', axis=1)
        fcff_df, assets_df = fcff_df.align(assets_df, join='inner', axis=0)
        print(f"对齐后数据形状: {fcff_df.shape}")


    # --- 4. 执行核心计算：逐元素相除 ---
    fcf_to_assets_ratio_df = fcff_df / assets_df
    
    print("\n核心步骤：已完成两个表的逐元素相除。")

    # --- 5. 清理异常值 (将无穷大替换为空值) ---
    fcf_to_assets_ratio_df.replace([np.inf, -np.inf], np.nan, inplace=True)
    print("已将计算过程中产生的无穷大值替换为空值(NaN)。")

    # --- 6. 查看并保存结果 ---
    print("\n--- 计算出的新表 '自由现金流 / 总资产' (前5行) ---")
    print(fcf_to_assets_ratio_df.head())

    # (可选) 将结果保存到新文件
    OUTPUT_DIR = '/Users/alan-hopiy/Documents/quantitative-learning-journey/outputs/初步数据/'
    OUTPUT_FILE_PATH = os.path.join(OUTPUT_DIR, 'Ratio-FCF-to-Assets.xlsx')
    
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    fcf_to_assets_ratio_df.to_excel(OUTPUT_FILE_PATH)
    print(f"\n已将比率结果保存到: {OUTPUT_FILE_PATH}")


成功加载自由现金流数据，形状: (5413, 31)
成功加载总资产数据，形状: (5413, 93)

[警告] 两个数据表的行或列不完全对齐，正在尝试强制对齐...
对齐后数据形状: (5413, 29)

核心步骤：已完成两个表的逐元素相除。
已将计算过程中产生的无穷大值替换为空值(NaN)。

--- 计算出的新表 '自由现金流 / 总资产' (前5行) ---
           2018-03-31  2018-06-30  2018-09-30  2018-12-31  2019-03-31  \
ts_code                                                                 
000001.SZ         NaN    0.009402         NaN    0.038901         NaN   
000002.SZ   -0.064804   -0.002604   -0.024295    0.026705   -0.024462   
000004.SZ    0.011529    0.034129    0.016258   -0.382034    0.456031   
000006.SZ   -0.022726    0.003398    0.059987    0.129302   -0.027407   
000007.SZ    0.010014    0.053670    0.042214    0.165448   -0.016976   

           2019-06-30  2019-09-30  2019-12-31  2020-03-31  2020-06-30  ...  \
ts_code                                                                ...   
000001.SZ    0.006653         NaN    0.035795         NaN    0.020274  ...   
000002.SZ    0.014727   -0.011676    0.015329   -0.004027    0.009

In [4]:
import pandas as pd
import numpy as np
import os # 引入os模块来处理文件路径

# ==============================================================================
#  步骤 0: 从您指定的Excel文件路径加载数据
# ==============================================================================
print("--- 步骤 0: 从Excel文件加载准备好的宽表数据 ---")

file_path = '/Users/alan-hopiy/Documents/quantitative-learning-journey/outputs/初步数据/Ratio-FCF-to-Assets.xlsx'

try:
    df_gross_margin = pd.read_excel(file_path, index_col=0)
    print(f"成功从路径 '{file_path}' 加载数据！")
    print("数据预览：")
    print(df_gross_margin.head())

except FileNotFoundError:
    print(f"[错误] 文件未找到！请检查路径是否正确: {file_path}")
    df_gross_margin = pd.DataFrame()
except Exception as e:
    print(f"读取Excel文件时出错: {e}")
    df_gross_margin = pd.DataFrame()

# ==============================================================================
#  数据预处理与因子计算流程
# ==============================================================================

if not df_gross_margin.empty:
    print("\n--- 开始执行数据预处理和因子计算 ---")
    
    # --- 步骤 1: 数据充分性筛选 ---
    print("\n--- 步骤 1: 数据充分性筛选 ---")
    valid_counts = df_gross_margin.notna().sum(axis=1)
    MIN_PERIODS = 12
    df_filtered = df_gross_margin[valid_counts >= MIN_PERIODS]
    print(f"原始公司数量: {len(df_gross_margin)}")
    print(f"筛选后剩余公司数量: {len(df_filtered)}")

    if not df_filtered.empty:
        # --- 步骤 2: 极端值处理 (缩尾) ---
        print("\n--- 步骤 2: 极端值处理 (缩尾) ---")
        lower_bound = df_filtered.stack().quantile(0.01)
        upper_bound = df_filtered.stack().quantile(0.99)
        df_winsorized = df_filtered.clip(lower=lower_bound, upper=upper_bound)
        print(f"已对数据在 [{lower_bound:.2f}, {upper_bound:.2f}] 范围内进行缩尾处理。")

        # --- 步骤 3: 计算处理后数据的平均值 ---
        print("\n--- 步骤 3: 计算处理后数据的平均值 ---")
        average_gm = df_winsorized.mean(axis=1)
        
        # --- 步骤 4: Z-Score 标准化 ---
        print("\n--- 步骤 4: Z-Score 标准化 ---")
        z_scores_gm = (average_gm - average_gm.mean()) / average_gm.std()
        print("Z-Score 计算完成。")

        # ==============================================================================
        #  新增部分：步骤 5 - 导出最终结果到Excel
        # ==============================================================================
        print("\n--- 步骤 5: 导出Z-score因子值到Excel文件 ---")
        
        # 定义输出的文件夹路径和完整文件路径
        output_dir = '/Users/alan-hopiy/Documents/quantitative-learning-journey/outputs/z-scores'
        output_filename = 'Z值-自由现金流除总资产-平均值.xlsx'
        output_path = os.path.join(output_dir, output_filename)
        
        try:
            # 确保输出文件夹存在
            os.makedirs(output_dir, exist_ok=True)
            
            # 将z_scores_gm这个Series转换为DataFrame并导出
            # z_scores_gm.to_frame()会把Series变成一列的DataFrame
            # 我们给这一列命名为 'z_score_gm'
            z_scores_gm.to_frame(name='z_score_gm').to_excel(output_path, index=True)
            
            print(f"\n[成功] 因子已成功导出到: {output_path}")
            
        except Exception as e:
            print(f"\n[失败] 文件导出失败: {e}")
        # ==============================================================================

    else:
        print("经过数据充分性筛选后，没有剩余的公司可供处理。")
else:
    print("数据加载失败或数据为空，未执行后续计算。")

--- 步骤 0: 从Excel文件加载准备好的宽表数据 ---
成功从路径 '/Users/alan-hopiy/Documents/quantitative-learning-journey/outputs/初步数据/Ratio-FCF-to-Assets.xlsx' 加载数据！
数据预览：
           2018-03-31  2018-06-30  2018-09-30  2018-12-31  2019-03-31  \
ts_code                                                                 
000001.SZ         NaN    0.009402         NaN    0.038901         NaN   
000002.SZ   -0.064804   -0.002604   -0.024295    0.026705   -0.024462   
000004.SZ    0.011529    0.034129    0.016258   -0.382034    0.456031   
000006.SZ   -0.022726    0.003398    0.059987    0.129302   -0.027407   
000007.SZ    0.010014    0.053670    0.042214    0.165448   -0.016976   

           2019-06-30  2019-09-30  2019-12-31  2020-03-31  2020-06-30  ...  \
ts_code                                                                ...   
000001.SZ    0.006653         NaN    0.035795         NaN    0.020274  ...   
000002.SZ    0.014727   -0.011676    0.015329   -0.004027    0.009832  ...   
000004.SZ    0.679301    0.

In [5]:
import pandas as pd
import numpy as np
import os

# ==============================================================================
# 函数库定义区
# ==============================================================================

def preprocess_panel_winsorize(panel_df: pd.DataFrame, 
                               lower_quantile: float = 0.02, 
                               upper_quantile: float = 0.98) -> pd.DataFrame:
    """【预处理函数】对宽面板数据进行对称的、标准的缩尾处理。"""
    print(f"\n--- [步骤B] 开始执行对称缩尾处理（Winsorize） ---")
    print(f"处理规则: 下限={lower_quantile*100:.1f}%, 上限={upper_quantile*100:.1f}%")
    processed_df = panel_df.copy()
    for col_name in processed_df.columns:
        if not pd.api.types.is_numeric_dtype(processed_df[col_name]): continue
        column_data = processed_df[col_name]
        valid_data = column_data.dropna()
        if valid_data.empty: continue
        lower_bound = valid_data.quantile(lower_quantile)
        upper_bound = valid_data.quantile(upper_quantile)
        processed_df[col_name] = column_data.clip(lower=lower_bound, upper=upper_bound)
    print(f"--- [步骤B] 对称缩尾处理完成 ---")
    return processed_df

# 成长性因子计算相关函数 (保持不变)
def calculate_stable_growth(row, target_years, base_window_half_size=2, min_periods_in_window=3, latest_window_size=4):
    series = row.dropna()
    if len(series) < latest_window_size: return np.nan
    latest_window = series.iloc[-latest_window_size:]
    latest_avg = latest_window.mean()
    base_avg, actual_years = np.nan, np.nan
    for shift in range(6):
        base_period_center_idx = -(target_years * 4) - 1 - shift
        window_start_idx = base_period_center_idx - base_window_half_size
        window_end_idx = base_period_center_idx + base_window_half_size + 1
        if window_start_idx >= -len(series) and window_end_idx <= 0:
            base_window = series.iloc[window_start_idx:window_end_idx]
            if base_window.count() >= min_periods_in_window:
                base_avg = base_window.mean()
                actual_years = (series.index.get_loc(latest_window.index[-1]) - series.index.get_loc(base_window.index[-1])) / 4.0
                break
    if pd.isna(base_avg): return np.nan
    if base_avg > 0 and latest_avg > 0:
        if actual_years < 1: actual_years = 1.0
        return (latest_avg / base_avg) ** (1 / actual_years) - 1
    else: return -10

def build_composite_growth_factor(panel_data: pd.DataFrame, factor_prefix: str, years_list: list = [3, 4, 5], latest_window_size: int = 4):
    print(f"\n--- [步骤C] 开始构建 {factor_prefix.upper()} 复合成长因子 ---")
    results_df = pd.DataFrame(index=panel_data.index)
    # ... (此处省略与之前完全相同的内部实现细节) ...
    for years in years_list:
        col_name = f'{factor_prefix}_cagr_{years}yr_stable'
        results_df[col_name] = panel_data.apply(calculate_stable_growth, axis=1, target_years=years, latest_window_size=latest_window_size)
    zscore_cols = []
    for years in years_list:
        cagr_col = f'{factor_prefix}_cagr_{years}yr_stable'
        zscore_col = f'{factor_prefix}_zscore_{years}yr_stable'
        zscore_cols.append(zscore_col)
        mean, std = results_df[cagr_col].mean(), results_df[cagr_col].std()
        results_df[zscore_col] = (results_df[cagr_col] - mean) / std
    MIN_VALID_SCORES = 2
    valid_score_counts = results_df[zscore_cols].count(axis=1)
    results_df_filtered = results_df[valid_score_counts >= MIN_VALID_SCORES].copy()
    results_df_filtered.loc[:, zscore_cols] = results_df_filtered.loc[:, zscore_cols].fillna(0)
    final_score_col = f'composite_{factor_prefix}_growth_score'
    composite_avg = results_df_filtered[zscore_cols].mean(axis=1)
    final_mean, final_std = composite_avg.mean(), composite_avg.std()
    results_df_filtered.loc[:, final_score_col] = (composite_avg - final_mean) / final_std
    results_df_filtered.sort_values(by=final_score_col, ascending=False, inplace=True)
    print(f"--- [步骤C] {factor_prefix.upper()} 复合成长因子构建完成 ---")
    return results_df_filtered


# ==============================================================================
# 主流程执行区
# ==============================================================================

if __name__ == "__main__":
    
    # --- 参数配置 ---
    FCFF_FILE_PATH = '/Users/alan-hopiy/Documents/quantitative-learning-journey/outputs/初步数据/初步数据/stock_financial_indicators_fcff_wide_panel.xlsx'
    ASSETS_FILE_PATH = '/Users/alan-hopiy/Documents/quantitative-learning-journey/outputs/初步数据/初步数据/stock_balance_sheets_total_assets_wide_panel.xlsx'
    OUTPUT_DIR = '/Users/alan-hopiy/Documents/quantitative-learning-journey/outputs/z-scores/'
    FACTOR_PREFIX = 'fcf_ta_ratio'
    
    # --- 步骤 A: 加载数据并构造原始比率指标 ---
    print(f"--- [步骤A] 加载数据并构造 {FACTOR_PREFIX} 指标 ---")
    try:
        fcff_df = pd.read_excel(FCFF_FILE_PATH, index_col=0)
        assets_df = pd.read_excel(ASSETS_FILE_PATH, index_col=0)
        # 对齐并计算比率
        fcff_df, assets_df = fcff_df.align(assets_df, join='inner', axis=1)
        raw_ratio_df = fcff_df / assets_df
        raw_ratio_df.replace([np.inf, -np.inf], np.nan, inplace=True)
        print("原始比率指标构造完成。")
    except FileNotFoundError as e:
        raise SystemExit(f"[错误] 文件未找到: {e.filename}")

    # --- 步骤 B: 对原始比率进行对称缩尾预处理 ---
    # 使用2%-98%的对称缩尾来消除噪音
    clean_ratio_df = preprocess_panel_winsorize(
        raw_ratio_df,
        lower_quantile=0.02,
        upper_quantile=0.98
    )
    
    # --- 步骤 C: 基于预处理后的数据，计算成长性因子 ---
    growth_factor = build_composite_growth_factor(
        panel_data=clean_ratio_df,
        factor_prefix=FACTOR_PREFIX,
        latest_window_size=4
    )

    # --- 步骤 D: 保存结果 ---
    output_filename = f"Z值-FCF比总资产-成长性.xlsx"
    output_path = os.path.join(OUTPUT_DIR, output_filename)
    try:
        os.makedirs(OUTPUT_DIR, exist_ok=True)
        growth_factor.to_excel(output_path)
        print(f"\n[成功] 已将因子得分导出到: {output_path}")
    except Exception as e:
        print(f"\n[失败] 最终文件导出失败: {e}")
        
    print("\n====== 工作流全部执行完毕！ ======")

--- [步骤A] 加载数据并构造 fcf_ta_ratio 指标 ---
原始比率指标构造完成。

--- [步骤B] 开始执行对称缩尾处理（Winsorize） ---
处理规则: 下限=2.0%, 上限=98.0%
--- [步骤B] 对称缩尾处理完成 ---

--- [步骤C] 开始构建 FCF_TA_RATIO 复合成长因子 ---
--- [步骤C] FCF_TA_RATIO 复合成长因子构建完成 ---

[成功] 已将因子得分导出到: /Users/alan-hopiy/Documents/quantitative-learning-journey/outputs/z-scores/Z值-FCF比总资产-成长性.xlsx

