In [1]:
# --------------------------------------------------
# Cell 1: 导入库并设置项目结构
# --------------------------------------------------
import os
import re
import sys
import warnings
from datetime import datetime
from pathlib import Path
from time import perf_counter

import numpy as np
import pandas as pd

warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

base_path = Path.cwd()
report_path = base_path / "报告数据"
# 输入路径
input_path = report_path / "输入"
anjian_data_path = input_path / "安监数据"
base_data_path = input_path / "basic_data.xlsx"
# 输出路径
output_path = report_path / "输出"
# 中间过程文件路径（自动创建，用于存放临时文件）
temp_path = report_path / "temp"
upload_split_path = temp_path / "1_待上传猪猪云文件"
zhuzhuyun_download_path = temp_path / "2_猪猪云下载数据"
zhuzhuyun_merge_path = temp_path / "3_猪猪云合并数据"
pycharm_input_path = temp_path / "4_logistics数据"
transit_data_path = temp_path / "5_中转数据"

In [2]:
# --- 1. 报告周期配置（每季度仅修改此处） ---
# 请使用 "YYYYQX" 格式，例如: "2025Q2" 代表 2025年第二季度
CURRENT_YEAR_QUARTER = "2025Q1"

In [9]:
# --------------------------------------------------
# Cell 2: 季报周期配置与路径函数
# --------------------------------------------------
# --- 自动计算相关周期 ---
try:
    current_year = int(CURRENT_YEAR_QUARTER[:4])
    current_quarter = int(CURRENT_YEAR_QUARTER[5])

    # 计算上一季度
    if current_quarter == 1:
        previous_quarter_year = current_year - 1
        previous_quarter = 4
    else:
        previous_quarter_year = current_year
        previous_quarter = current_quarter - 1
    PREVIOUS_YEAR_QUARTER = f"{previous_quarter_year}Q{previous_quarter}"

    # 计算去年同期
    last_year_quarter_year = current_year - 1
    LAST_YEAR_QUARTER = f"{last_year_quarter_year}Q{current_quarter}"

    # 生成用于报告展示的中文周期名称
    quarter_map = {1: "一", 2: "二", 3: "三", 4: "四"}
    CURRENT_QUARTER_DISPLAY = f"{current_year}年第{quarter_map[current_quarter]}季度"

    print("--- 报告周期配置成功 ---")
    print(f"本期报告周期: {CURRENT_YEAR_QUARTER} ({CURRENT_QUARTER_DISPLAY})")
    print(f"环比对比周期 (上季度): {PREVIOUS_YEAR_QUARTER}")
    print(f"同比对比周期 (去年同期): {LAST_YEAR_QUARTER}")

except (ValueError, IndexError):
    print("错误：CURRENT_YEAR_QUARTER 格式不正确，请使用 'YYYYQX' 格式，例如 '2025Q2'")
    # sys.exit(1)

# --- 2. 路径和常量定义 ---
# 沿用 Cell 1 中定义的 base_path, report_path, input_path 等
ROOT_PATH = base_path
HISTORICAL_DATA_PATH = input_path / "historical_data"
OUTPUT_DIR = output_path / "6_邮政报告表格_季度"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


def get_months_in_quarter(year_quarter_str: str) -> list[str]:
    """
    根据季度标识（如 "2025Q2"）返回该季度包含的月份列表（['202504', '202505', '202506']）。
    """
    year = int(year_quarter_str[:4])
    quarter = int(year_quarter_str[5])
    quarter_to_months_map = {
        1: ["01", "02", "03"],
        2: ["04", "05", "06"],
        3: ["07", "08", "09"],
        4: ["10", "11", "12"],
    }
    months = quarter_to_months_map.get(quarter, [])
    return [f"{year}{month}" for month in months]


def get_quarterly_file_paths(period_label: str) -> dict:
    target_quarter_str = ""
    if period_label == "本期":
        target_quarter_str = CURRENT_YEAR_QUARTER
    elif period_label == "上季度":
        target_quarter_str = PREVIOUS_YEAR_QUARTER
    elif period_label == "去年同期":
        target_quarter_str = LAST_YEAR_QUARTER
    else:
        print(f"警告：未知的周期标签 '{period_label}'")
        return {}

    # 获取该季度的所有月份
    months_list = get_months_in_quarter(target_quarter_str)

    logistics_paths = []
    report_paths = []

    for yyyymm in months_list:
        month_folder = HISTORICAL_DATA_PATH / yyyymm
        logistics_paths.append(month_folder / "logistics数据")
        report_paths.append(month_folder / f"分析总报告_{yyyymm}.xlsx")

    return {
        "logistics": logistics_paths,
        "report": report_paths,
    }


--- 报告周期配置成功 ---
本期报告周期: 2025Q2 (2025年第二季度)
环比对比周期 (上季度): 2025Q1
同比对比周期 (去年同期): 2024Q2


In [3]:
# ==============================================================================
# Cell 3: 邮政、极兔季报生成
# ==============================================================================

import traceback
import warnings
from pathlib import Path

import numpy as np
import pandas as pd

warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

# ==============================================================================
# 1. 报告周期与路径配置
# ==============================================================================

# --- 自动计算相关周期 ---
try:
    current_year = int(CURRENT_YEAR_QUARTER[:4])
    current_quarter = int(CURRENT_YEAR_QUARTER[5])
    if current_quarter == 1:
        previous_quarter_year = current_year - 1
        previous_quarter = 4
    else:
        previous_quarter_year = current_year
        previous_quarter = current_quarter - 1
    PREVIOUS_YEAR_QUARTER = f"{previous_quarter_year}Q{previous_quarter}"
    last_year_quarter_year = current_year - 1
    LAST_YEAR_QUARTER = f"{last_year_quarter_year}Q{current_quarter}"
    quarter_map = {1: "一", 2: "二", 3: "三", 4: "四"}
    CURRENT_QUARTER_DISPLAY = f"{current_year}年第{quarter_map[current_quarter]}季度"
except (ValueError, IndexError):
    raise ValueError(
        "错误：CURRENT_YEAR_QUARTER 格式不正确，请使用 'YYYYQX' 格式，例如 '2025Q2'"
    )

# --- 路径定义---
ROOT_PATH = Path.cwd()
INPUT_PATH = ROOT_PATH / "报告数据" / "输入"
OUTPUT_DIR = ROOT_PATH / "报告数据" / "输出" / "6_邮政报告表格_季度"
HISTORICAL_DATA_PATH = INPUT_PATH / "historical_data"
BASIC_DATA_PATH = INPUT_PATH / "basic_data.xlsx"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


# --- 季度文件路径获取函数 ---
def get_months_in_quarter(year_quarter_str: str) -> list[str]:
    year = int(year_quarter_str[:4])
    quarter = int(year_quarter_str[5])
    q_map = {
        1: ["01", "02", "03"],
        2: ["04", "05", "06"],
        3: ["07", "08", "09"],
        4: ["10", "11", "12"],
    }
    return [f"{year}{m}" for m in q_map.get(quarter, [])]


def get_quarterly_source_paths(period_label: str) -> dict:
    target_quarter_str = ""
    if period_label == "本期":
        target_quarter_str = CURRENT_YEAR_QUARTER
    elif period_label == "上季度":
        target_quarter_str = PREVIOUS_YEAR_QUARTER
    elif period_label == "去年同期":
        target_quarter_str = LAST_YEAR_QUARTER
    else:
        return {}
    months_list = get_months_in_quarter(target_quarter_str)
    paths = {"data_analysis": []}
    for yyyymm in months_list:
        month_folder = HISTORICAL_DATA_PATH / yyyymm
        paths["data_analysis"].append(month_folder / f"data_analysis_result_{yyyymm}")
    return paths


# ==============================================================================
# 2. 全局常量与辅助函数
# ==============================================================================
COMPANIES_NINE_MAJOR = [
    "EMS",
    "中通",
    "京东",
    "圆通",
    "德邦",
    "极兔",
    "申通",
    "韵达",
    "顺丰",
]
COMPANIES_ALL = COMPANIES_NINE_MAJOR + ["快包"]

REPORT_CONFIG = {
    "邮政": {
        "products": ["EMS", "快包"],
        "output_filename": f"邮政季报_{CURRENT_YEAR_QUARTER}.xlsx",
        "product_to_filename": {"EMS": "EMS", "快包": "邮政"},
        "logistics_data_sources": ["EMS", "邮政"],
    },
    "极兔": {
        "products": ["极兔"],
        "output_filename": f"极兔季报_{CURRENT_YEAR_QUARTER}.xlsx",
        "product_to_filename": {"极兔": "极兔"},
        "logistics_data_sources": ["极兔"],
    },
}


COMPANY_FILE_MAP = {
    "EMS": "EMS",
    "中通": "中通",
    "京东": "京东",
    "圆通": "圆通",
    "德邦": "德邦",
    "极兔": "极兔",
    "申通": "申通",
    "韵达": "韵达",
    "顺丰": "顺丰",
    "快包": "邮政",
}


def auto_adjust_xlsx_columns(writer, df, sheet_name):
    workbook = writer.book
    worksheet = writer.sheets[sheet_name]
    for i, col in enumerate(df.columns):
        is_logistics_col = "物流信息" in str(col)
        column_len = df[col].astype(str).str.len().max() if not df.empty else 0
        header_len = len(str(col))
        width = max(column_len, header_len, 8) + 2
        width = min(width, 60 if is_logistics_col else 40)
        worksheet.set_column(i, i, width)


# ==============================================================================
# 3. 核心数据聚合函数
# ==============================================================================
def load_and_aggregate_quarterly_transit_data(period_label: str) -> pd.DataFrame:
    print("      -> 正在聚合季度中转次数数据...")
    months = get_months_in_quarter(
        CURRENT_YEAR_QUARTER if period_label == "本期" else PREVIOUS_YEAR_QUARTER
    )
    all_transit_dfs = []
    for yyyymm in months:
        transit_data_folder = HISTORICAL_DATA_PATH / yyyymm / f"5_中转数据_{yyyymm}"
        if not transit_data_folder.exists():
            continue
        for company_key, file_prefix in COMPANY_FILE_MAP.items():
            file_path = transit_data_folder / f"{file_prefix}_transit_data.xlsx"
            if file_path.exists():
                try:
                    df_transit_raw = pd.read_excel(file_path)
                    df_transit_renamed = df_transit_raw.rename(
                        columns={
                            "出发城市": "寄出城市",
                            "到达城市": "寄达城市",
                            "平均中转次数": "中转次数",
                        }
                    )
                    required_cols = ["寄出城市", "寄达城市", "中转次数"]
                    if all(col in df_transit_renamed.columns for col in required_cols):
                        df_transit = df_transit_renamed[required_cols].copy()
                        df_transit["company"] = company_key
                        all_transit_dfs.append(df_transit)
                except Exception as e:
                    print(
                        f"         -> 警告: 读取中转文件 {file_path.name} 时发生错误: {e}"
                    )
    if not all_transit_dfs:
        return pd.DataFrame()
    df_full_transit = pd.concat(all_transit_dfs, ignore_index=True)
    df_agg_transit = (
        df_full_transit.groupby(["寄出城市", "寄达城市", "company"])["中转次数"]
        .mean()
        .reset_index()
    )
    df_agg_transit.rename(columns={"中转次数": "平均中转次数"}, inplace=True)
    df_pivot_transit = df_agg_transit.pivot_table(
        index=["寄出城市", "寄达城市"], columns="company", values="平均中转次数"
    )
    df_pivot_transit.columns = [
        f"平均中转次数_{col}" for col in df_pivot_transit.columns
    ]
    print("      -> 季度中转次数数据聚合完成。")
    return df_pivot_transit.reset_index()


def load_all_company_quarterly_details(period_label):
    source_paths = get_quarterly_source_paths(period_label)
    all_monthly_dfs = []
    print("      -> 正在聚合所有公司的季度原始数据(线路详细数据)...")
    for company_key in COMPANIES_ALL:
        file_prefix = COMPANY_FILE_MAP.get(company_key)
        if not file_prefix:
            continue
        for analysis_dir in source_paths["data_analysis"]:
            file_path = analysis_dir / f"{file_prefix}_data_analysis_result.xlsx"
            if file_path.exists():
                try:
                    df = pd.read_excel(
                        file_path, sheet_name="线路详细数据", dtype={"单号": str}
                    )
                    if not df.empty:
                        df["company"] = company_key
                        all_monthly_dfs.append(df)
                except Exception as e:
                    print(f"         -> 警告: 读取 {file_path.name} 失败: {e}")
    if not all_monthly_dfs:
        return pd.DataFrame()
    print(f"      -> 季度原始数据聚合完成，共 {len(all_monthly_dfs)} 个月度文件。")
    return pd.concat(all_monthly_dfs, ignore_index=True)


def calculate_quarterly_master_route_data(period_label: str) -> pd.DataFrame:
    df_transit_pivot = load_and_aggregate_quarterly_transit_data(period_label)
    df_full_q = load_all_company_quarterly_details(period_label)
    if df_full_q.empty:
        return pd.DataFrame()
    print("      -> 正在按线路聚合季度数据并计算季度平均指标...")
    cols_to_agg = [
        c
        for c in df_full_q.columns
        if c not in ["company", "寄出城市", "寄达城市", "中转次数"]
        and pd.api.types.is_numeric_dtype(df_full_q[c])
    ]
    df_agg = df_full_q.groupby(["寄出城市", "寄达城市", "company"])[cols_to_agg].mean(
        numeric_only=True
    )
    df_pivot = df_agg.unstack("company")
    df_pivot.columns = ["_".join(map(str, col)).strip() for col in df_pivot.columns]
    df_pivot.reset_index(inplace=True)
    if not df_transit_pivot.empty:
        df_pivot = pd.merge(
            df_pivot, df_transit_pivot, on=["寄出城市", "寄达城市"], how="left"
        )
    df_pivot["线路"] = df_pivot["寄出城市"] + "-" + df_pivot["寄达城市"]
    print("      -> 正在计算行业最优和均值...")
    metrics_map = {
        "全程时限": False,
        "寄出地处理时限": False,
        "运输时限": False,
        "寄达地处理时限": False,
        "投递时限": False,
        "平均中转次数": False,
    }
    for metric_name, higher_is_better in metrics_map.items():
        comp_cols = [
            c
            for c in df_pivot.columns
            if c.startswith(metric_name) and c.split("_")[-1] in COMPANIES_NINE_MAJOR
        ]
        if not comp_cols:
            continue
        df_pivot[f"行业均值_{metric_name}"] = df_pivot[comp_cols].mean(axis=1)
        df_pivot[f"行业最优_{metric_name}"] = df_pivot[comp_cols].min(axis=1)
        df_pivot[f"行业最差_{metric_name}"] = df_pivot[comp_cols].max(axis=1)
    return df_pivot


def load_and_concat_quarterly_raw_data(config):
    source_paths = get_quarterly_source_paths("本期")
    details_data = {}
    for prod_name in config["products"]:
        filename_part = config["product_to_filename"].get(prod_name)
        if not filename_part:
            continue
        monthly_dfs_d = []
        for analysis_dir in source_paths["data_analysis"]:
            file_path = analysis_dir / f"{filename_part}_data_analysis_result.xlsx"
            if file_path.exists():
                try:
                    df = pd.read_excel(
                        file_path, sheet_name="线路详细数据", dtype={"单号": str}
                    )
                    if not df.empty:
                        monthly_dfs_d.append(df)
                except Exception as e:
                    print(f"         -> 警告: 读取 {file_path.name} 失败: {e}")
        if monthly_dfs_d:
            details_data[prod_name] = pd.concat(monthly_dfs_d, ignore_index=True)
    return details_data


def load_quarterly_logistics_data(config: dict) -> pd.DataFrame:
    """【V8.2 修改】从'3_猪猪云合并数据'文件夹加载季度物流信息，并打印路径"""
    print("      -> 正在聚合季度'完整物流信息'数据...")
    months = get_months_in_quarter(CURRENT_YEAR_QUARTER)
    all_logistics_dfs = []

    for yyyymm in months:
        logistics_data_folder = (
            HISTORICAL_DATA_PATH / yyyymm / f"3_猪猪云合并数据_{yyyymm}"
        )
        print(f"         -> 正在检查路径: {logistics_data_folder}")

        if not logistics_data_folder.exists():
            print(f"         -> 警告: 未找到物流数据目录: {logistics_data_folder}")
            continue

        for source_file_name in config.get("logistics_data_sources", []):
            file_path = logistics_data_folder / f"{source_file_name}.xlsx"
            if file_path.exists():
                try:
                    df_logistics = pd.read_excel(
                        file_path,
                        usecols=["快递单号", "完整物流信息"],
                        dtype={"快递单号": str},
                    )
                    df_logistics.dropna(subset=["快递单号"], inplace=True)
                    all_logistics_dfs.append(df_logistics)
                except Exception as e:
                    print(f"         -> 警告: 读取物流文件 {file_path.name} 失败: {e}")

    if not all_logistics_dfs:
        print("      -> 未加载到任何季度物流信息。")
        return pd.DataFrame(columns=["快递单号", "完整物流信息"])

    df_full_logistics = pd.concat(all_logistics_dfs, ignore_index=True).drop_duplicates(
        subset=["快递单号"]
    )
    print(
        f"      -> 季度'完整物流信息'聚合完成，共 {len(df_full_logistics)} 条独特记录。"
    )
    return df_full_logistics


# ==============================================================================
# 4. 各Sheet生成函数
# ==============================================================================


def create_sheet1_quarterly(df_q_main_report, config):
    df_main = df_q_main_report.copy()
    base_headers = "寄出省份	寄出城市	寄达省份	寄达城市	线路	行业最优_中转次数	行业均值_全程时限	行业最优_全程时限	行业均值_寄出地处理时长	行业最优_寄出地处理时长	行业均值_运输时长	行业最优_运输时长	行业均值_寄达地处理时长	行业最优_寄达地处理时长	行业均值_投递时长	行业最优_投递时长".strip().split()
    product_headers, indicator_headers = [], []
    for prod in config["products"]:
        product_headers.extend(
            [
                f"{prod}_中转次数",
                f"{prod}_寄出地处理时长",
                f"{prod}_运输时长",
                f"{prod}_寄达地处理时长",
                f"{prod}_投递时长",
                f"{prod}_全程时限",
            ]
        )
        indicator_headers.extend(
            [
                f"{prod}筛选指标_运输时长",
                f"{prod}筛选指标_寄达地处理时长",
                f"{prod}筛选指标_寄出地处理时长",
                f"{prod}筛选指标_投递时长",
                f"{prod}筛选指标_全程时限",
            ]
        )
    final_headers = base_headers + product_headers + indicator_headers
    column_mapping = {
        "寄出省份": "寄出省份",
        "寄出城市": "寄出城市",
        "寄达省份": "寄达省份",
        "寄达城市": "寄达城市",
        "线路": "线路",
        "行业最优_中转次数": "行业最优_平均中转次数",
        "行业均值_全程时限": "行业均值_全程时限",
        "行业最优_全程时限": "行业最优_全程时限",
        "行业均值_寄出地处理时长": "行业均值_寄出地处理时限",
        "行业最优_寄出地处理时长": "行业最优_寄出地处理时限",
        "行业均值_运输时长": "行业均值_运输时限",
        "行业最优_运输时长": "行业最优_运输时限",
        "行业均值_寄达地处理时长": "行业均值_寄达地处理时限",
        "行业最优_寄达地处理时长": "行业最优_寄达地处理时限",
        "行业均值_投递时长": "行业均值_投递时限",
        "行业最优_投递时长": "行业最优_投递时限",
    }
    for prod in config["products"]:
        column_mapping.update(
            {
                f"{prod}_中转次数": f"平均中转次数_{prod}",
                f"{prod}_寄出地处理时长": f"寄出地处理时限_{prod}",
                f"{prod}_运输时长": f"运输时限_{prod}",
                f"{prod}_寄达地处理时长": f"寄达地处理时限_{prod}",
                f"{prod}_投递时长": f"投递时限_{prod}",
                f"{prod}_全程时限": f"全程时限_{prod}",
            }
        )
    df_sheet1 = pd.DataFrame()
    for new_col, source_col in column_mapping.items():
        if source_col in df_main.columns:
            df_sheet1[new_col] = df_main[source_col]
        else:
            df_sheet1[new_col] = pd.NA
    metrics_to_calculate = [
        "运输时长",
        "寄达地处理时长",
        "寄出地处理时长",
        "投递时长",
        "全程时限",
    ]
    for metric in metrics_to_calculate:
        for prod in config["products"]:
            indicator_col, data_col = f"{prod}筛选指标_{metric}", f"{prod}_{metric}"
            industry_col = (
                f"行业均值_{metric}" if prod == "快包" else f"行业最优_{metric}"
            )
            if data_col in df_sheet1.columns and industry_col in df_sheet1.columns:
                prod_data = pd.to_numeric(df_sheet1[data_col], errors="coerce")
                industry_data = pd.to_numeric(df_sheet1[industry_col], errors="coerce")
                df_sheet1[indicator_col] = prod_data - industry_data
            else:
                df_sheet1[indicator_col] = pd.NA
    for col in final_headers:
        if col not in df_sheet1.columns:
            df_sheet1[col] = pd.NA
    return df_sheet1[final_headers]


def create_sheet2_quarterly(details_data, df_q_main_report, logistics_df):
    if not details_data:
        return pd.DataFrame()

    final_headers = """
    邮件号	产品种类	寄出城市	寄达城市	线路	全程时限	
    寄出地处理时长	揽收-到达寄出地分拣中心时长	到达寄出地分拣中心-离开寄出地城市时长	
    运输时长	寄达地处理时长	到达寄达地城市-离开寄达地分拣中心时长	离开寄达地分拣中心-派件	
    投递时长	
    行业最优_全程时限	行业均值_全程时限	完整物流信息
    """.strip().split()

    df_s2 = pd.concat(
        [df.assign(产品种类=prod) for prod, df in details_data.items()],
        ignore_index=True,
    )
    df_s2.rename(
        columns={
            "单号": "邮件号",
            "公里": "线路里程",
            "签收时间": "完成投递时间",
            "寄出地处理时限": "寄出地处理时长",
            "运输时限": "运输时长",
            "寄达地处理时限": "寄达地处理时长",
            "投递时限": "投递时长",
            "揽收时间": "揽件时间",
        },
        inplace=True,
    )

    df_s2["线路"] = df_s2["寄出城市"] + "-" + df_s2["寄达城市"]

    # --- 时间计算部分 (未改变) ---
    time_cols = [
        "揽件时间",
        "完成投递时间",
        "离开寄件城市时间",
        "到达收件城市时间",
        "派送时间",
        "到达分拣中心时间",
        "离开收件城市分拣中心时间",
    ]
    for col in time_cols:
        if col in df_s2.columns:
            df_s2[col] = pd.to_datetime(df_s2[col], errors="coerce")
    duration_calcs = {
        "揽收-到达寄出地分拣中心时长": ("到达分拣中心时间", "揽件时间"),
        "到达寄出地分拣中心-离开寄出地城市时长": (
            "离开寄件城市时间",
            "到达分拣中心时间",
        ),
        "到达寄达地城市-离开寄达地分拣中心时长": (
            "离开收件城市分拣中心时间",
            "到达收件城市时间",
        ),
        "离开寄达地分拣中心-派件": ("派送时间", "离开收件城市分拣中心时间"),
    }
    for new_col, (end_t, start_t) in duration_calcs.items():
        if end_t in df_s2.columns and start_t in df_s2.columns:
            duration_hours = (df_s2[end_t] - df_s2[start_t]) / pd.Timedelta(hours=1)
            df_s2[new_col] = duration_hours.where(duration_hours >= 0)

    # --- 合并行业数据---
    industry_cols_to_merge = {
        "行业最优_全程时限": "行业最优_全程时限",
        "行业均值_全程时限": "行业均值_全程时限",
    }
    df_main_subset = df_q_main_report[
        ["线路"] + list(industry_cols_to_merge.values())
    ].drop_duplicates(subset=["线路"])
    df_s2 = pd.merge(df_s2, df_main_subset, on="线路", how="left")

    # --- 合并完整物流信息 ---
    if not logistics_df.empty:
        df_s2["邮件号"] = df_s2["邮件号"].astype(str)
        logistics_df["快递单号"] = logistics_df["快递单号"].astype(str)

        # 先执行合并
        df_s2 = pd.merge(
            df_s2, logistics_df, left_on="邮件号", right_on="快递单号", how="left"
        )

        # 合并后处理冲突列
        if "完整物流信息_y" in df_s2.columns:
            # 用_y列（来自logistics_df）的数据填充或创建“完整物流信息”列
            df_s2["完整物流信息"] = df_s2["完整物流信息_y"]
            # 删除合并产生的辅助列
            df_s2.drop(
                columns=["完整物流信息_x", "完整物流信息_y"],
                inplace=True,
                errors="ignore",
            )

        if "快递单号" in df_s2.columns:
            df_s2.drop(columns=["快递单号"], inplace=True)
    else:
        df_s2["完整物流信息"] = "未找到物流数据"

    # 确保最终列存在
    for col in final_headers:
        if col not in df_s2.columns:
            df_s2[col] = pd.NA

    return df_s2[final_headers]


# ==============================================================================
# 5. 主执行函数
# ==============================================================================
def execute_report_generation():
    """【V8】季报生成器的总调度函数，为配置文件中的每个公司生成报告。"""

    print(f"\n{'=' * 25} 开始执行 {CURRENT_YEAR_QUARTER} 季报生成任务 {'=' * 25}")
    print("⚠️  注意: 此过程将聚合三个月的全部数据，可能需要较长时间，请耐心等待...")

    print("\n[步骤 1/4] 正在聚合季度线路主数据 (最耗时步骤)...")
    df_q_main_report = calculate_quarterly_master_route_data("本期")
    if df_q_main_report.empty:
        print(f"🔥🔥🔥 致命错误: 未能聚合任何公司的季度线路主数据，流程中止。")
        return

    # 补充省份信息
    if "寄出省份" not in df_q_main_report.columns and not df_q_main_report.empty:
        details_for_prov = load_all_company_quarterly_details("本期")
        if not details_for_prov.empty:
            city_to_province = details_for_prov[
                ["寄出城市", "寄出省份", "寄达城市", "寄达省份"]
            ].drop_duplicates()
            city_prov_map_send = city_to_province.drop_duplicates(subset=["寄出城市"])[
                ["寄出城市", "寄出省份"]
            ]
            city_prov_map_rec = city_to_province.drop_duplicates(subset=["寄达城市"])[
                ["寄达城市", "寄达省份"]
            ]
            df_q_main_report = pd.merge(
                df_q_main_report, city_prov_map_send, on="寄出城市", how="left"
            )
            df_q_main_report = pd.merge(
                df_q_main_report, city_prov_map_rec, on="寄达城市", how="left"
            )

    for company_name, config in REPORT_CONFIG.items():
        final_output_path = OUTPUT_DIR / config["output_filename"]
        print(
            f"\n{'=' * 20} 开始生成: {company_name} {CURRENT_YEAR_QUARTER} 季报 {'=' * 20}"
        )

        try:
            print(f"  - [步骤 2/4] 正在为 {company_name} 聚合产品明细数据...")
            q_details_data = load_and_concat_quarterly_raw_data(config)
            if not q_details_data:
                print(f"  - ⚠️ 警告: 未能聚合 {company_name} 的任何邮件明细数据。")

            print(f"  - [步骤 3/4] 正在为 {company_name} 加载季度物流信息...")
            logistics_df = load_quarterly_logistics_data(config)

            print(
                f"  - [步骤 4/4] 正在生成 {company_name} 的 '线路明细' 和 '邮件明细' Sheets..."
            )
            df_s1 = create_sheet1_quarterly(df_q_main_report, config)
            df_s2 = create_sheet2_quarterly(
                q_details_data, df_q_main_report, logistics_df
            )

            print(
                f"      -> 所有数据计算完成，正在写入最终文件: {final_output_path.name}"
            )
            with pd.ExcelWriter(final_output_path, engine="xlsxwriter") as writer:
                sheets_to_write = {"线路明细": df_s1, "邮件明细": df_s2}
                for sheet_name, df in sheets_to_write.items():
                    if df is not None and not df.empty:
                        print(f"         -> 正在写入Sheet: {sheet_name} ({len(df)} 行)")
                        df.to_excel(
                            writer,
                            sheet_name=sheet_name,
                            index=False,
                            float_format="%.2f",
                        )
                        auto_adjust_xlsx_columns(writer, df, sheet_name)
                    elif df is not None:
                        print(f"         -> Sheet '{sheet_name}' 为空，已跳过写入。")

            print(
                f"\n🎉🎉🎉 {company_name} 报告已成功生成: '{final_output_path.name}'！🎉🎉🎉"
            )

        except Exception as e:
            print(
                f"      -> 🔥🔥🔥 生成报告 '{config['output_filename']}' 时发生严重错误: {e}"
            )
            print(traceback.format_exc())


# --- 执行 ---
if __name__ == "__main__" or ("ipykernel" in str(globals().get("get_ipython", ""))):
    execute_report_generation()


⚠️  注意: 此过程将聚合三个月的全部数据，可能需要较长时间，请耐心等待...

[步骤 1/4] 正在聚合季度线路主数据 (最耗时步骤)...
      -> 正在聚合季度中转次数数据...
      -> 季度中转次数数据聚合完成。
      -> 正在聚合所有公司的季度原始数据(线路详细数据)...
      -> 季度原始数据聚合完成，共 30 个月度文件。


  return pd.concat(all_monthly_dfs, ignore_index=True)


      -> 正在按线路聚合季度数据并计算季度平均指标...
      -> 正在计算行业最优和均值...
      -> 正在聚合所有公司的季度原始数据(线路详细数据)...
      -> 季度原始数据聚合完成，共 30 个月度文件。


  return pd.concat(all_monthly_dfs, ignore_index=True)



  - [步骤 2/4] 正在为 邮政 聚合产品明细数据...
  - [步骤 3/4] 正在为 邮政 加载季度物流信息...
      -> 正在聚合季度'完整物流信息'数据...
         -> 正在检查路径: /Users/lava/Documents/国家邮政局发展研究中心实习/python_data_analysis/报告数据/输入/historical_data/202501/3_猪猪云合并数据_202501
         -> 正在检查路径: /Users/lava/Documents/国家邮政局发展研究中心实习/python_data_analysis/报告数据/输入/historical_data/202502/3_猪猪云合并数据_202502
         -> 正在检查路径: /Users/lava/Documents/国家邮政局发展研究中心实习/python_data_analysis/报告数据/输入/historical_data/202503/3_猪猪云合并数据_202503
      -> 季度'完整物流信息'聚合完成，共 454215 条独特记录。
  - [步骤 4/4] 正在生成 邮政 的 '线路明细' 和 '邮件明细' Sheets...
      -> 所有数据计算完成，正在写入最终文件: 邮政季报_2025Q1.xlsx
         -> 正在写入Sheet: 线路明细 (2644 行)
         -> 正在写入Sheet: 邮件明细 (359971 行)

🎉🎉🎉 邮政 报告已成功生成: '邮政季报_2025Q1.xlsx'！🎉🎉🎉

  - [步骤 2/4] 正在为 极兔 聚合产品明细数据...
  - [步骤 3/4] 正在为 极兔 加载季度物流信息...
      -> 正在聚合季度'完整物流信息'数据...
         -> 正在检查路径: /Users/lava/Documents/国家邮政局发展研究中心实习/python_data_analysis/报告数据/输入/historical_data/202501/3_猪猪云合并数据_202501
         -> 正在检查路径: /Users/lava/Documents/国家邮政局发展研究中心实习/python

In [None]:
# ==============================================================================
# Cell 4  报告图片生成 (季度版)
# ==============================================================================
import os
from datetime import datetime
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.font_manager import FontProperties, fontManager

# 路径配置
ROOT_PATH = Path.cwd()
# 数据源改为从历史数据中按季度聚合
HISTORICAL_DATA_PATH = ROOT_PATH / "报告数据" / "输入" / "historical_data"
OUTPUT_IMAGE_PATH = ROOT_PATH / "报告数据" / "输出" / "7_报告图片_季度"

OUTPUT_IMAGE_PATH.mkdir(parents=True, exist_ok=True)
print(f"数据输入基路径: {HISTORICAL_DATA_PATH}")
print(f"图片输出路径: {OUTPUT_IMAGE_PATH}")

# 公司列表与文件名映射
COMPANY_MAPPING = {
    "EMS": "EMS",
    "中通": "中通",
    "京东": "京东",
    "圆通": "圆通",
    "德邦": "德邦",
    "极兔": "极兔",
    "申通": "申通",
    "韵达": "韵达",
    "顺丰": "顺丰",
    "邮政": "邮政",  # "快包" 数据在 "邮政" 文件中
}
# 分析对象包含 "快包"，但在文件名层面它由 "邮政" 文件代表
COMPANIES_TO_ANALYZE = [
    "EMS",
    "快包",
    "中通",
    "京东",
    "圆通",
    "德邦",
    "极兔",
    "申通",
    "韵达",
    "顺丰",
]

METRICS_TO_PLOT = [
    "揽收时间",
    "到达分拣中心时间",
    "离开寄件城市时间",
    "到达收件城市时间",
    "派送时间",
    "签收时间",
]

# --- 2. 字体与颜色配置 ---
try:
    font_path_str = "微软雅黑.ttf"
    font_path_obj = Path(font_path_str)
    if not font_path_obj.exists():  # 尝试在系统路径中查找
        # 这是一个备用方案，尝试查找系统字体
        import matplotlib

        system_fonts = matplotlib.font_manager.findSystemFonts(
            fontpaths=None, fontext="ttf"
        )
        found_path = next(
            (
                f
                for f in system_fonts
                if "msyh.ttf" in f.lower() or "microsoft yahei" in f.lower()
            ),
            None,
        )
        if found_path:
            font_path_str = found_path
            print(f"提示: 在本地未找到 '微软雅黑.ttf'，已使用系统字体: {font_path_str}")
        else:
            raise FileNotFoundError
    fontManager.addfont(str(font_path_str))
    chinese_font = FontProperties(fname=font_path_str)
    plt.rcParams["font.sans-serif"] = [chinese_font.get_name()]
    plt.rcParams["axes.unicode_minus"] = False
    print(f"Matplotlib 全局字体已成功设置为: {chinese_font.get_name()}")
except (FileNotFoundError, StopIteration):
    print(
        f"❌ 错误: 找不到字体文件 '微软雅黑.ttf' 或其系统备用字体。图片中的中文可能显示为方框。"
    )
    # 如果找不到字体，脚本仍然可以运行，但图片会有问题
    pass

TIME_LABELS = [f"{h:02d}:00-{(h + 1) % 24:02d}:00" for h in range(24)]
COLORS = [
    "olive",
    "grey",
    "yellow",
    "orange",
    "green",
    "palegoldenrod",
    "darkolivegreen",
    "pink",
    "Thistle",
    "steelblue",
    "darkslategrey",
    "slategray",
    "tan",
    "darkolivegreen",
    "grey",
    "pink",
    "goldenrod",
    "mediumslateblue",
    "saddlebrown",
    "olive",
    "navy",
    "sandybrown",
    "moccasin",
    "black",
]

# --- 3. 辅助函数 ---


def get_months_in_quarter(year_quarter_str: str) -> list[str]:
    """【新增】根据季度字符串获取对应的月份列表"""
    year = int(year_quarter_str[:4])
    quarter = int(year_quarter_str[5])
    q_map = {
        1: ["01", "02", "03"],
        2: ["04", "05", "06"],
        3: ["07", "08", "09"],
        4: ["10", "11", "12"],
    }
    return [f"{year}{m}" for m in q_map.get(quarter, [])]


def calculate_hourly_distribution(time_series: pd.Series) -> list:
    """高效计算给定时间序列中每小时的数据点数量"""
    if time_series.empty or time_series.isna().all():
        return [0] * 24
    dt_series = pd.to_datetime(time_series, errors="coerce").dropna()
    if dt_series.empty:
        return [0] * 24
    counts = dt_series.dt.hour.value_counts().sort_index()
    hourly_counts = [0] * 24
    for hour, count in counts.items():
        if 0 <= hour < 24:
            hourly_counts[hour] = int(count)
    return hourly_counts


def plot_and_save_distribution(
    company_name: str, metric_name: str, hourly_counts: list, output_path: Path
):
    """根据数据绘制并保存分布图"""
    if sum(hourly_counts) == 0:
        print(
            f"    -> {company_name} 的 '{metric_name}' 数据为空或无法处理，不生成图片。"
        )
        return
    plt.figure(figsize=(20, 10))
    bars = plt.bar(TIME_LABELS, hourly_counts, width=0.5, color=COLORS)
    total_count = sum(hourly_counts)
    for bar in bars:
        height = bar.get_height()
        if height > 0:
            percentage = height / total_count
            plt.text(
                bar.get_x() + bar.get_width() / 2.0,
                height,
                f"{percentage:.1%}",
                ha="center",
                va="bottom",
                fontsize=16,
            )
    plt.xlabel("24小时分布", fontsize=16)
    plt.ylabel("快件数量", fontsize=16)
    plt.xticks(rotation=75, fontsize=16)
    plt.yticks(fontsize=16)
    plt.grid(axis="y", linestyle="--", alpha=0.7)
    plt.tight_layout()
    save_path = (
        output_path / f"{company_name}_{metric_name}_分布图_季度.png"
    )  # 文件名增加季度标识
    plt.savefig(save_path)
    plt.close()
    print(f"    -> 图片已保存: {save_path.name}")


# --- 4. 主执行流程 (季度版) ---


def run_quarterly_plotting_and_analysis():
    print(f"🚀 开始执行季度分析任务 ({CURRENT_YEAR_QUARTER})...")
    analysis_results = {}
    months_in_quarter = get_months_in_quarter(CURRENT_YEAR_QUARTER)
    print(f"本季度包含月份: {', '.join(months_in_quarter)}")

    for company in COMPANIES_TO_ANALYZE:
        print(f"\n--- 正在处理公司: {company} (季度: {CURRENT_YEAR_QUARTER}) ---")

        file_prefix = (
            "邮政" if company == "快包" else COMPANY_MAPPING.get(company, company)
        )

        # 【核心修改】聚合季度内所有月份的数据
        all_monthly_dfs = []
        for yyyymm in months_in_quarter:
            month_folder = HISTORICAL_DATA_PATH / yyyymm
            # 路径结构是 historical_data/YYYYMM/data_analysis_result_YYYYMM/
            file_path = (
                month_folder
                / f"data_analysis_result_{yyyymm}"
                / f"{file_prefix}_data_analysis_result.xlsx"
            )

            if file_path.exists():
                try:
                    df_month = pd.read_excel(file_path, sheet_name="线路详细数据")
                    if not df_month.empty:
                        all_monthly_dfs.append(df_month)
                        print(f"  -> 已加载 {yyyymm} 数据 ({len(df_month)} 行)")
                    else:
                        print(
                            f"  -> {yyyymm} 文件 '{file_path.name}' 的 '线路详细数据' sheet 为空。"
                        )
                except Exception as e:
                    print(f"  -> 读取文件 {file_path.name} 失败: {e}")
            else:
                print(f"  -> 未找到 {yyyymm} 数据文件: {file_path.name}")

        if not all_monthly_dfs:
            print(f"  -> {company} 在 {CURRENT_YEAR_QUARTER} 季度无任何数据，跳过。")
            continue

        # 将所有月度数据合并为一个季度DataFrame
        df_quarterly = pd.concat(all_monthly_dfs, ignore_index=True)
        print(f"  -> {company} 季度数据聚合完毕，共 {len(df_quarterly)} 行。")

        analysis_results[company] = {}

        for metric in METRICS_TO_PLOT:
            if metric not in df_quarterly.columns:
                print(f"    -> 季度数据中 '{metric}' 列不存在，跳过。")
                continue

            hourly_counts = calculate_hourly_distribution(df_quarterly[metric])
            plot_and_save_distribution(
                company, metric, hourly_counts, OUTPUT_IMAGE_PATH
            )

            total_count = sum(hourly_counts)
            if total_count == 0:
                continue

            # 计算特定时间段占比的逻辑保持不变
            if metric == "到达分拣中心时间":
                count_16_22 = sum(hourly_counts[16:22])
                analysis_results[company]["分拣中心_16_22_占比"] = (
                    count_16_22 / total_count
                )
            if metric == "离开寄件城市时间":
                count_16_22 = sum(hourly_counts[16:22])
                analysis_results[company]["离开城市_16_22_占比"] = (
                    count_16_22 / total_count
                )
                count_22_24 = sum(hourly_counts[22:24])
                analysis_results[company]["离开城市_22_24_占比"] = (
                    count_22_24 / total_count
                )

    print("\n\n--- 📈 季度分析结果汇总 ---")

    def print_ranking_results(metric_key: str, description: str):
        print(f"\n--- {description} 占比排名 ({CURRENT_YEAR_QUARTER}) ---")
        company_ratios = []
        for company, metrics in analysis_results.items():
            if metric_key in metrics:
                company_ratios.append((company, metrics[metric_key]))
        if not company_ratios:
            print("无相关数据可供排名。")
            return
        sorted_ratios = sorted(company_ratios, key=lambda item: item[1], reverse=True)
        print("排名 | 公司   | 占比")
        print("-----|--------|-------")
        for i, (company, ratio) in enumerate(sorted_ratios):
            print(f"{i + 1:<4} | {company:<6} | {ratio:>6.2%}")

    print_ranking_results("分拣中心_16_22_占比", "到达分拣中心时间 (16:00 - 22:00)")
    print_ranking_results("离开城市_16_22_占比", "离开寄件城市时间 (16:00 - 22:00)")
    print_ranking_results("离开城市_22_24_占比", "离开寄件城市时间 (22:00 - 24:00)")
    print("\n🎉 全部季度任务执行完毕！")


# --- 5. 执行主函数 ---
if __name__ == "__main__":
    run_quarterly_plotting_and_analysis()

数据输入基路径: /Users/lava/Documents/国家邮政局发展研究中心实习/python_data_analysis/报告数据/输入/historical_data
图片输出路径: /Users/lava/Documents/国家邮政局发展研究中心实习/python_data_analysis/报告数据/输出/7_报告图片_季度
Matplotlib 全局字体已成功设置为: Microsoft YaHei
🚀 开始执行季度分析任务 (2025Q2)...
本季度包含月份: 202504, 202505, 202506

--- 正在处理公司: EMS (季度: 2025Q2) ---
  -> 已加载 202504 数据 (55407 行)
  -> 已加载 202505 数据 (64942 行)
  -> 已加载 202506 数据 (39227 行)
  -> EMS 季度数据聚合完毕，共 159576 行。
    -> 图片已保存: EMS_揽收时间_分布图_季度.png
    -> 图片已保存: EMS_到达分拣中心时间_分布图_季度.png
    -> 图片已保存: EMS_离开寄件城市时间_分布图_季度.png
    -> 图片已保存: EMS_到达收件城市时间_分布图_季度.png
    -> 图片已保存: EMS_派送时间_分布图_季度.png
    -> 图片已保存: EMS_签收时间_分布图_季度.png

--- 正在处理公司: 快包 (季度: 2025Q2) ---
  -> 已加载 202504 数据 (61418 行)
  -> 已加载 202505 数据 (75194 行)
  -> 已加载 202506 数据 (71603 行)
  -> 快包 季度数据聚合完毕，共 208215 行。
    -> 图片已保存: 快包_揽收时间_分布图_季度.png
    -> 图片已保存: 快包_到达分拣中心时间_分布图_季度.png
    -> 图片已保存: 快包_离开寄件城市时间_分布图_季度.png
    -> 图片已保存: 快包_到达收件城市时间_分布图_季度.png
    -> 图片已保存: 快包_派送时间_分布图_季度.png
    -> 图片已保存: 快包_签收时间_分布图_季度.png

--- 

In [None]:
# ==============================================================================
# Cell 5: 邮政季报 表1 生成
# ==============================================================================

# --- Part 1: 核心常量和映射---
COMPANIES_ALL = [
    "中通",
    "圆通",
    "极兔",
    "申通",
    "韵达",
    "顺丰",
    "京东",
    "EMS",
    "德邦",
    "快包",
]
COMPANIES_FOR_INDUSTRY_COMPARISON = [c for c in COMPANIES_ALL if c != "快包"]
COMPANY_FILE_MAP = {
    "EMS": "EMS",
    "德邦": "德邦",
    "极兔": "极兔",
    "圆通": "圆通",
    "顺丰": "顺丰",
    "中通": "中通",
    "京东": "京东",
    "韵达": "韵达",
    "申通": "申通",
    "快包": "邮政",
}
METRICS_MAP = {
    "全程时限（小时）": "全程时限",
    "72小时妥投率": "72小时准时率",
    "寄出地处理时限": "寄出地处理时限",
    "运输时限": "运输时限",
    "寄达地处理时限": "寄达地处理时限",
    "投递时限": "投递时限",
}
HIGHER_IS_BETTER = ["72小时妥投率"]


# --- Part 2: 适用于季度的路径获取与数据聚合函数 ---


def get_quarterly_source_paths(period_label: str) -> dict:
    """获取季报所需的所有【源文件】月度路径列表。"""
    target_quarter_str = ""
    if period_label == "本期":
        target_quarter_str = CURRENT_YEAR_QUARTER
    elif period_label == "上季度":
        target_quarter_str = PREVIOUS_YEAR_QUARTER
    elif period_label == "去年同期":
        target_quarter_str = LAST_YEAR_QUARTER
    else:
        return {}

    months_list = get_months_in_quarter(target_quarter_str)
    paths = {"data_analysis": []}
    for yyyymm in months_list:
        month_folder = HISTORICAL_DATA_PATH / yyyymm
        paths["data_analysis"].append(month_folder / f"data_analysis_result_{yyyymm}")
    return paths


def load_and_aggregate_quarterly_postal_data(period_label: str) -> pd.DataFrame:
    """核心函数：加载一个季度的所有月度源文件，聚合成最终的季度指标数据。"""
    print(
        f"\n--- 正在处理 {period_label} ({locals().get(f'{period_label.upper()}_YEAR_QUARTER', CURRENT_YEAR_QUARTER)}) 的季度数据 ---"
    )

    source_paths = get_quarterly_source_paths(period_label)
    if not source_paths.get("data_analysis"):
        return pd.DataFrame()

    all_monthly_dfs = []
    for analysis_dir in source_paths["data_analysis"]:
        monthly_metrics = {}
        if not analysis_dir.exists():
            print(f"  - ⚠️ 警告: 目录不存在，跳过: {analysis_dir}")
            continue

        for company_key, file_prefix in COMPANY_FILE_MAP.items():
            file_path = analysis_dir / f"{file_prefix}_data_analysis_result.xlsx"
            if not file_path.exists():
                continue
            try:
                df_basic = pd.read_excel(file_path, sheet_name="基础指标").set_index(
                    "项目"
                )
                metrics_to_extract = list(METRICS_MAP.values())
                if all(item in df_basic.index for item in metrics_to_extract):
                    monthly_metrics[company_key] = df_basic.loc[
                        metrics_to_extract, "mean"
                    ]
            except Exception as e:
                print(f"  - ⚠️ 警告: 读取文件 {file_path.name} 失败: {e}")

        if not monthly_metrics:
            continue
        all_monthly_dfs.append(pd.DataFrame(monthly_metrics))

    if not all_monthly_dfs:
        print(f"❌ 错误: 未能加载 {period_label} 的任何月度数据。")
        return pd.DataFrame()

    df_quarterly_aggregated = pd.concat(all_monthly_dfs).groupby(level=0).mean()
    df_quarterly_final = df_quarterly_aggregated.T
    df_quarterly_final.rename(
        columns={v: k for k, v in METRICS_MAP.items()}, inplace=True
    )

    industry_competitors = df_quarterly_final.index.intersection(
        COMPANIES_FOR_INDUSTRY_COMPARISON
    ).tolist()
    if industry_competitors:
        df_quarterly_final.loc["行业均值"] = df_quarterly_final.loc[
            industry_competitors
        ].mean()
        for metric in METRICS_MAP.keys():
            if metric in HIGHER_IS_BETTER:
                df_quarterly_final.loc["行业最优", metric] = df_quarterly_final.loc[
                    industry_competitors, metric
                ].max()
            else:
                df_quarterly_final.loc["行业最优", metric] = df_quarterly_final.loc[
                    industry_competitors, metric
                ].min()

    ems_competitors = [c for c in COMPANIES_ALL if c != "快包"]
    postal_competitors = [c for c in COMPANIES_ALL if c != "EMS"]

    for metric in METRICS_MAP.keys():
        ascending = metric not in HIGHER_IS_BETTER
        # 将排名结果作为新列添加到DataFrame中，而不是新行
        ems_ranks = df_quarterly_final.loc[ems_competitors, metric].rank(
            method="min", ascending=ascending, na_option="bottom"
        )
        df_quarterly_final[f"{metric}_EMS_rank"] = ems_ranks
        postal_ranks = df_quarterly_final.loc[postal_competitors, metric].rank(
            method="min", ascending=ascending, na_option="bottom"
        )
        df_quarterly_final[f"{metric}_POST_rank"] = postal_ranks

    print(f"  ✅ {period_label} 数据聚合与计算完成。")
    return df_quarterly_final.T  # 转置后返回


# --- Part 3: 最终表格生成与保存 (排名提取和格式化逻辑) ---


def create_quarterly_table1(period_label: str) -> pd.DataFrame:
    quarterly_data = load_and_aggregate_quarterly_postal_data(period_label)
    if quarterly_data.empty:
        return pd.DataFrame()

    index_order = list(METRICS_MAP.keys())
    column_order = [
        "EMS指标值",
        "EMS排名",
        "邮政快包指标值",
        "邮政快包排名",
        "行业均值",
        "行业最优",
    ]
    final_table = pd.DataFrame(index=index_order, columns=column_order)

    for metric in index_order:
        if metric in quarterly_data.index:
            # 填充指标值
            final_table.loc[metric, "EMS指标值"] = quarterly_data.loc[metric, "EMS"]
            final_table.loc[metric, "邮政快包指标值"] = quarterly_data.loc[
                metric, "快包"
            ]
            final_table.loc[metric, "行业均值"] = quarterly_data.loc[metric, "行业均值"]
            final_table.loc[metric, "行业最优"] = quarterly_data.loc[metric, "行业最优"]

            # 从正确的列中提取排名数据
            final_table.loc[metric, "EMS排名"] = quarterly_data.loc[
                f"{metric}_EMS_rank", "EMS"
            ]
            final_table.loc[metric, "邮政快包排名"] = quarterly_data.loc[
                f"{metric}_POST_rank", "快包"
            ]

    return final_table


def write_formatted_table_to_excel(writer, df, sheet_name):
    """
    【新增】将包含纯数字的DataFrame写入Excel，并应用数字格式。
    """
    df.to_excel(writer, sheet_name=sheet_name, index=True)

    workbook = writer.book
    worksheet = writer.sheets[sheet_name]

    # 定义Excel格式
    decimal_format = workbook.add_format({"num_format": "0.00"})
    percent_format = workbook.add_format({"num_format": "0.00%"})
    integer_format = workbook.add_format({"num_format": "0"})

    # 获取列名到列字母的映射
    header = ["指标"] + df.columns.tolist()  # 包括索引列
    col_map = {name: chr(ord("A") + i) for i, name in enumerate(header)}

    # 应用通用格式到整列
    worksheet.set_column(
        f"{col_map['EMS指标值']}:{col_map['EMS指标值']}", 12, decimal_format
    )
    worksheet.set_column(
        f"{col_map['EMS排名']}:{col_map['EMS排名']}", 12, integer_format
    )
    worksheet.set_column(
        f"{col_map['邮政快包指标值']}:{col_map['邮政快包指标值']}", 15, decimal_format
    )
    worksheet.set_column(
        f"{col_map['邮政快包排名']}:{col_map['邮政快包排名']}", 15, integer_format
    )
    worksheet.set_column(
        f"{col_map['行业均值']}:{col_map['行业均值']}", 12, decimal_format
    )
    worksheet.set_column(
        f"{col_map['行业最优']}:{col_map['行业最优']}", 12, decimal_format
    )
    worksheet.set_column(f"{col_map['指标']}:{col_map['指标']}", 20)  # 加宽指标列

    # 对 "72小时妥投率" 行特殊应用百分比格式
    try:
        # 找到 "72小时妥投率" 所在的行号 (0-indexed)
        percent_row_idx = df.index.get_loc("72小时妥投率")
        # 为该行的指标值、均值、最优值单元格重写格式
        # Excel行号是1-indexed，且有表头，所以要+1
        excel_row = percent_row_idx + 1
        value_cols = ["EMS指标值", "邮政快包指标值", "行业均值", "行业最优"]
        for col_name in value_cols:
            col_idx = df.columns.get_loc(col_name)  # 0-indexed
            # 重新写入该单元格的值和新格式
            worksheet.write(
                excel_row, col_idx + 1, df.loc["72小时妥投率", col_name], percent_format
            )
    except KeyError:
        print("⚠️ 警告: 未在表格中找到 '72小时妥投率' 行，跳过百分比格式化。")


# ==============================================================================
# Cell 4: 主程序执行入口
# ==============================================================================
print(f"🚀 开始生成 {CURRENT_QUARTER_DISPLAY} 邮政季报 表1...")

# 1. 生成本期表格（现在返回纯数字DataFrame）
table1_df = create_quarterly_table1("本期")

# 2. 显示并保存结果
if not table1_df.empty:
    print(f"\n预览 (原始数值数据): {CURRENT_QUARTER_DISPLAY} 时限数据及排名")
    display(table1_df)

    # 3. 使用新的写入函数来保存带格式的Excel文件
    output_filename = f"邮政季报_表1_季度时限数据及排名_{CURRENT_YEAR_QUARTER}.xlsx"
    output_filepath = OUTPUT_DIR / output_filename

    with pd.ExcelWriter(output_filepath, engine="xlsxwriter") as writer:
        write_formatted_table_to_excel(writer, table1_df, "时限及排名")

    print(f"\n🎉 成功！包含【数字格式】的表格已保存至:\n{output_filepath}")
else:
    print(
        f"\n🛑 未能生成 {CURRENT_QUARTER_DISPLAY} 的表格，请检查以上日志中的警告或错误信息。"
    )

🚀 开始生成 2025年第二季度 邮政季报 表1...

--- 正在处理 本期 (2025Q2) 的季度数据 ---
  ✅ 本期 数据聚合与计算完成。

预览 (原始数值数据): 2025年第二季度 时限数据及排名


Unnamed: 0,EMS指标值,EMS排名,邮政快包指标值,邮政快包排名,行业均值,行业最优
全程时限（小时）,43.624286,2.0,59.272335,8.0,51.188241,43.556039
72小时妥投率,0.929377,1.0,0.777892,8.0,0.866763,0.929377
寄出地处理时限,7.869239,6.0,8.742167,7.0,8.452711,5.679341
运输时限,23.308095,2.0,37.774676,9.0,31.058307,19.0741
寄达地处理时限,9.744223,8.0,9.63639,8.0,8.920253,5.887796
投递时限,2.702728,3.0,3.119102,7.0,2.75697,2.06011



🎉 成功！包含【数字格式】的表格已保存至:
/Users/lava/Documents/国家邮政局发展研究中心实习/python_data_analysis/报告数据/输出/6_邮政报告表格_季度/邮政季报_表1_季度时限数据及排名_2025Q2.xlsx


In [None]:
# ==============================================================================
# Cell 6: 邮政季报 表2 生成
# ==============================================================================

# --- Part 1: 常量定义 (沿用月报逻辑) ---
# 这些在之前的Cell中已定义，这里为保持独立性而重申
# COMPANIES_ALL, COMPANY_FILE_MAP, ...
# 定义表2所需指标
SEGMENTED_METRICS_MAP = {
    ("600公里以下", "T+1妥投率"): "under_600_t1_rate",
    ("600公里以上", "全产品T+2妥投率"): "over_600_t2_rate",
    ("600公里以上", "航空件T+1妥投率"): "over_600_air_t1_rate",
}

# --- Part 2: 季度分段数据聚合函数 (核心) ---


def load_and_aggregate_quarterly_segmented_data(period_label: str) -> pd.DataFrame:
    """
    核心函数：加载一个季度的所有月度 "线路详细数据",
    合并为季度总数据后，再进行分段计算。
    """
    print(
        f"\n--- 正在计算 {period_label} ({locals().get(f'{period_label.upper()}_YEAR_QUARTER', CURRENT_YEAR_QUARTER)}) 的分段指标 ---"
    )

    source_paths = get_quarterly_source_paths(period_label)
    if not source_paths.get("data_analysis"):
        return pd.DataFrame()

    all_company_quarterly_results = {}

    # 遍历所有公司以计算其季度指标
    for company_key in COMPANIES_ALL:
        file_prefix = COMPANY_FILE_MAP.get(company_key, company_key)

        # 1. 为当前公司加载并合并三个月的原始数据
        monthly_dfs_for_company = []
        for analysis_dir in source_paths["data_analysis"]:
            detail_file = analysis_dir / f"{file_prefix}_data_analysis_result.xlsx"
            if not detail_file.exists():
                continue
            try:
                # 假设：所有计算所需数据都在 "线路详细数据" sheet
                df_month = pd.read_excel(detail_file, sheet_name="线路详细数据")
                monthly_dfs_for_company.append(df_month)
            except Exception as e:
                print(f"  - ⚠️ 警告: 读取文件 {detail_file.name} 失败: {e}")

        if not monthly_dfs_for_company:
            # print(f"  - 信息: {company_key} 在此周期内无数据。")
            continue

        # 2. 【核心】将三个月数据合并为季度总数据
        df_quarterly_company = pd.concat(monthly_dfs_for_company, ignore_index=True)
        print(
            f"  - 正在处理: {company_key} (共 {len(df_quarterly_company)} 条季度数据)"
        )

        # 3. 在季度总数据上执行分段和计算 (逻辑源自您的月报代码)
        required_cols = ["公里", "T+1_achieved", "T+2_achieved", "is_air"]
        if not all(c in df_quarterly_company.columns for c in required_cols):
            print(f"    -> 警告: {company_key} 的季度数据缺少必要列，跳过。")
            continue

        df_under_600 = df_quarterly_company[df_quarterly_company["公里"] <= 600]
        df_over_600 = df_quarterly_company[df_quarterly_company["公里"] > 600]
        df_over_600_air = df_over_600[df_over_600["is_air"] == True]

        all_company_quarterly_results[company_key] = {
            "under_600_t1_rate": df_under_600["T+1_achieved"].mean()
            if not df_under_600.empty
            else np.nan,
            "over_600_t2_rate": df_over_600["T+2_achieved"].mean()
            if not df_over_600.empty
            else np.nan,
            "over_600_air_t1_rate": df_over_600_air["T+1_achieved"].mean()
            if not df_over_600_air.empty
            else np.nan,
        }

    if not all_company_quarterly_results:
        print(f"❌ 错误: 未能为 {period_label} 计算任何公司的分段指标。")
        return pd.DataFrame()

    df_results = pd.DataFrame.from_dict(all_company_quarterly_results, orient="index")

    # 4. 计算行业均值和最优值
    industry_competitors = df_results.index.intersection(
        COMPANIES_FOR_INDUSTRY_COMPARISON
    ).tolist()
    if industry_competitors:
        # 计算所有指标的行业均值和最优值
        df_results.loc["行业均值"] = df_results.loc[industry_competitors].mean()
        df_results.loc["行业最优"] = df_results.loc[
            industry_competitors
        ].max()  # 妥投率都是越高越好

        # 【特殊处理】根据模板，将航空件的行业值设为空
        df_results.loc[["行业均值", "行业最优"], "over_600_air_t1_rate"] = np.nan

    print(f"  ✅ {period_label} 分段指标计算完成。")
    return df_results.T


# --- Part 3: 最终表格生成与格式化 ---


def create_quarterly_table2(period_label: str) -> pd.DataFrame:
    """
    调用聚合函数获取数据，并按最终模板格式化输出。
    """
    quarterly_data = load_and_aggregate_quarterly_segmented_data(period_label)
    if quarterly_data.empty:
        return pd.DataFrame()

    # 创建一个带有多级索引的空DataFrame，以匹配模板结构
    index_tuples = list(SEGMENTED_METRICS_MAP.keys())
    multi_index = pd.MultiIndex.from_tuples(index_tuples, names=["类别", "指标"])

    columns = ["EMS", "邮政快包", "行业均值", "行业最优"]
    final_table = pd.DataFrame(index=multi_index, columns=columns)

    # 填充表格
    for (category, metric_name), internal_name in SEGMENTED_METRICS_MAP.items():
        if internal_name in quarterly_data.index:
            final_table.loc[(category, metric_name), "EMS"] = quarterly_data.loc[
                internal_name, "EMS"
            ]
            final_table.loc[(category, metric_name), "邮政快包"] = quarterly_data.loc[
                internal_name, "快包"
            ]
            final_table.loc[(category, metric_name), "行业均值"] = quarterly_data.loc[
                internal_name, "行业均值"
            ]
            final_table.loc[(category, metric_name), "行业最优"] = quarterly_data.loc[
                internal_name, "行业最优"
            ]

    return final_table


def write_table2_to_excel(writer, df, sheet_name):
    """专门为表2（带有多级索引）写入和格式化Excel的函数"""
    df.to_excel(writer, sheet_name=sheet_name, index=True)

    workbook = writer.book
    worksheet = writer.sheets[sheet_name]
    percent_format = workbook.add_format({"num_format": "0.00%"})

    # 设置列宽
    worksheet.set_column("A:B", 20)  # 类别和指标列
    worksheet.set_column("C:F", 15)  # 数据列

    # 应用百分比格式到所有数据列
    # header rows = 1, index cols = 2
    for col_num in range(df.shape[1]):
        for row_num in range(df.shape[0]):
            # Excel cell is (row+1, col+2)
            cell_value = df.iat[row_num, col_num]
            if pd.notna(cell_value):
                worksheet.write_number(
                    row_num + 1, col_num + 2, cell_value, percent_format
                )
            else:
                worksheet.write_string(
                    row_num + 1, col_num + 2, "-"
                )  # 写入模板中的横杠


# ==============================================================================
# Cell 6: 主程序执行入口
# ==============================================================================
print(f"🚀 开始生成 {CURRENT_QUARTER_DISPLAY} 邮政季报 表2...")

# 1. 生成本期表格
table2_df = create_quarterly_table2("本期")

# 2. 显示并保存结果
if not table2_df.empty:
    print(f"\n预览 (原始数值数据): {CURRENT_QUARTER_DISPLAY} 分公里妥投率")
    display(table2_df)

    # 3. 使用新的写入函数保存带格式的Excel文件
    output_filename = f"邮政季报_表2_分公里妥投率_{CURRENT_YEAR_QUARTER}.xlsx"
    output_filepath = OUTPUT_DIR / output_filename

    with pd.ExcelWriter(output_filepath, engine="xlsxwriter") as writer:
        write_table2_to_excel(writer, table2_df, "分公里妥投率")

    print(f"\n🎉 成功！包含【数字格式】的表格已保存至:\n{output_filepath}")
else:
    print(
        f"\n🛑 未能生成 {CURRENT_QUARTER_DISPLAY} 的表格，请检查以上日志中的警告或错误信息。"
    )

🚀 开始生成 2025年第二季度 邮政季报 表2...

--- 正在计算 本期 (2025Q2) 的分段指标 ---
  - 正在处理: 中通 (共 199983 条季度数据)
  - 正在处理: 圆通 (共 198572 条季度数据)
  - 正在处理: 极兔 (共 232705 条季度数据)
  - 正在处理: 申通 (共 196353 条季度数据)
  - 正在处理: 韵达 (共 194289 条季度数据)
  - 正在处理: 顺丰 (共 253596 条季度数据)
  - 正在处理: 京东 (共 182297 条季度数据)
  - 正在处理: EMS (共 159576 条季度数据)
  - 正在处理: 德邦 (共 202990 条季度数据)
  - 正在处理: 快包 (共 208215 条季度数据)
  ✅ 本期 分段指标计算完成。

预览 (原始数值数据): 2025年第二季度 分公里妥投率


Unnamed: 0_level_0,Unnamed: 1_level_0,EMS,邮政快包,行业均值,行业最优
类别,指标,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
600公里以下,T+1妥投率,0.731516,0.554082,0.794243,0.890263
600公里以上,全产品T+2妥投率,0.773137,0.529697,0.670983,0.786715
600公里以上,航空件T+1妥投率,0.520233,0.096073,,



🎉 成功！包含【数字格式】的表格已保存至:
/Users/lava/Documents/国家邮政局发展研究中心实习/python_data_analysis/报告数据/输出/6_邮政报告表格_季度/邮政季报_表2_分公里妥投率_2025Q2.xlsx
