In [12]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.api import ARDL, ardl_select_order
from statsmodels.tsa.stattools import coint
from IPython.display import display, Markdown
import re
import warnings

warnings.filterwarnings("ignore")

# --- 1. SOTA级参数与环境配置 ---
# ==============================================================================
FILE_PATH = r'E:\A智网\业扩分析\业扩ARDL模型\7月业扩月底电量报告全行业_净值.xlsx' 
OUTPUT_PATH = r'E:\A智网\业扩分析\业扩ARDL模型\ARDL分析报告_无春节变量版.xlsx'
SHEET_MAP = {
    'kwh': '电量', 
    'cap_high_new': '高压净新装', 
    'cap_high_exp': '高压净增容',
    'cap_low_new': '低压净新装', 
    'cap_low_exp': '低压净增容'
}
SIGNIFICANCE_LEVEL = 0.1
MAX_LAGS = 4
BOUNDS_TEST_ALPHA = '5%' 

# --- 2. SOTA级数据加载与特征工程 (已简化) ---
# ==============================================================================
def load_and_engineer_features(file_path, sheet_map):
    print("--- SOTA Step 1: Loading Data & Feature Engineering ---")
    all_data = {}
    for var, sheet in sheet_map.items():
        try:
            df = pd.read_excel(file_path, sheet_name=sheet)
            df['分类'] = df['分类'].astype(str).str.strip()
            
            date_cols = [c for c in df.columns if re.match(r'^\d{6}$', str(c).split('.')[0])]
            df_subset = df[['分类'] + date_cols]
            
            df_long = df_subset.melt(id_vars=['分类'], var_name='date', value_name=var)
            
            df_long['date'] = pd.to_datetime(df_long['date'].astype(str), format='%Y%m')
            df_long[var] = pd.to_numeric(df_long[var], errors='coerce').fillna(0)
            all_data[var] = df_long.set_index(['分类', 'date'])
            
        except Exception as e:
            raise RuntimeError(f"Error processing sheet '{sheet}' for variable '{var}': {e}")
            
    df_full = pd.concat(all_data.values(), axis=1)
    
    print("  -> Data loaded successfully.")
    return df_full


# --- 3. SOTA级核心分析流程 (保证DataFrame结构完整性) ---
# ==============================================================================
def run_sota_ardl_analysis(df_full):
    print("\n--- SOTA Step 2: Running Industry-by-Industry ARDL Analysis ---")
    
    industries = df_full.index.get_level_values('分类').unique()
    results_list = []
    
    # 【SOTA 终极修复】定义一个完整的列名模板
    # 确保无论成功失败，DataFrame的结构都保持一致
    dynamic_cols = [c for c in df_full.columns if c != 'kwh']
    
    all_col_names = ['行业分类', 'R-squared_adj', 'Bounds_Test_F_stat', 
                     f'Bounds_Test_Crit_Val_{BOUNDS_TEST_ALPHA}', 'Is_Cointegrated', 
                     'Coint_Test_pval(EG)', 'Bounds_Test_Method', 'speed_of_adj']
    for col in dynamic_cols:
        all_col_names.extend([f'β_long_{col}', f'p_long_{col}', f'β_short_{col}'])
    
    for industry in industries:
        print(f"\n{'='*25} Analyzing: {industry} {'='*25}")
        
        # 使用模板初始化结果字典，所有值默认为 np.nan
        result = {col: np.nan for col in all_col_names}
        result['行业分类'] = industry
        
        df_industry = df_full.loc[industry].dropna()
        if len(df_industry) < 40:
            print("    -> Insufficient data points (< 40), skipping.")
            result['R-squared_adj'] = 'Skipped: <40 data points'
            results_list.append(result)
            continue

        endog, exog = df_industry['kwh'], df_industry.drop(columns='kwh')
        
        try:
            coint_test_pval = coint(endog, exog)[1]
            print(f"    -> Engle-Granger Cointegration Test p-value: {coint_test_pval:.4f}")
            result['Coint_Test_pval(EG)'] = coint_test_pval
        except Exception as e:
            print(f"    -> Engle-Granger test failed: {e}")
            result['Coint_Test_pval(EG)'] = f'Error: {e}'

        try:
            sel_order = ardl_select_order(
                endog, maxlag=MAX_LAGS, exog=exog, 
                maxorder=MAX_LAGS, ic='aic', trend='c'
            )
            print(f"    -> Selected Order: AR={sel_order.ar_lags}, DL={sel_order.dl_lags}")
            model_fit = sel_order.model.fit()
            
            # --- 成功路径：填充所有结果 ---
            result['R-squared_adj'] = model_fit.rsquared_adj
            
            if hasattr(model_fit, 'bounds_test'):
                print("    -> Model object has `bounds_test` method. Performing test.")
                try:
                    bounds_test_summary = model_fit.bounds_test()
                    result['Bounds_Test_F_stat'] = bounds_test_summary.statistic
                    result[f'Bounds_Test_Crit_Val_{BOUNDS_TEST_ALPHA}'] = bounds_test_summary.crit_vals[BOUNDS_TEST_ALPHA][1]
                    result['Is_Cointegrated'] = 'Yes' if result['Bounds_Test_F_stat'] > result[f'Bounds_Test_Crit_Val_{BOUNDS_TEST_ALPHA}'] else 'No'
                    result['Bounds_Test_Method'] = 'Official Bounds Test'
                except Exception as e:
                    print(f"    -> WARNING: `bounds_test()` call failed: {e}. Test skipped.")
                    result['Is_Cointegrated'] = 'Error'
                    result['Bounds_Test_Method'] = 'Failed'
            else:
                print("    -> WARNING: Model object lacks `bounds_test` method. Test skipped.")
                result['Is_Cointegrated'] = 'N/A (Not applicable)'
                result['Bounds_Test_Method'] = 'N/A (Method unavailable)'

            long_run_params = model_fit.long_run_params
            ecm_params = model_fit.ecm_params
            
            for exog_var in exog.columns:
                if exog_var in long_run_params.index:
                    result[f'β_long_{exog_var}'] = long_run_params.loc[exog_var, 'coeff']
                    result[f'p_long_{exog_var}'] = long_run_params.loc[exog_var, 'pvalue']
                
                short_run_param_name = f'L0.{exog_var}'
                if short_run_param_name in ecm_params.index:
                    result[f'β_short_{exog_var}'] = ecm_params.loc[short_run_param_name, 'coeff']
            
            ect_param_name = 'ECT.L1'
            if ect_param_name in ecm_params.index:
                result['speed_of_adj'] = ecm_params.loc[ect_param_name, 'coeff']
            
            print(f"    -> Analysis complete. Adj R²: {result['R-squared_adj']:.3f}")

        except Exception as e:
            # --- 失败路径：只记录错误信息 ---
            print(f"    -> CRITICAL ERROR for '{industry}': {e}, skipping.")
            result['R-squared_adj'] = f'Error: {e}'
        
        # 无论成功或失败，都将结构完整的 result 添加到列表中
        results_list.append(result)
        
    return pd.DataFrame(results_list)

# --- 4. SOTA级结果展示与保存 (保持不变) ---
# ==============================================================================
def display_sota_results(df_summary, output_path):
    if df_summary.empty:
        print("\nNo industries could be analyzed.")
        return

    print(f"\n{'='*30} SOTA ARDL Analysis Summary {'='*30}")
    df_summary.set_index('行业分类', inplace=True)

    is_cointegrated = df_summary['Is_Cointegrated'] == 'Yes'

    def style_table(df, p_col_prefix, is_coint):
        styles = pd.DataFrame('', index=df.index, columns=df.columns)
        p_cols_in_summary = {col for col in df_summary.columns if col.startswith(p_col_prefix)}
        
        for beta_col in df.columns:
            var_name = beta_col.replace('β_long_', '').replace('β_short_', '')
            pval_col = p_col_prefix + var_name
            
            if pval_col in p_cols_in_summary:
                is_significant = pd.to_numeric(df_summary[pval_col], errors='coerce').fillna(1.0) < SIGNIFICANCE_LEVEL
                styles[beta_col] = np.where(is_significant & is_coint, 'font-weight: bold; color: green;', 
                                        np.where(is_significant, 'font-weight: bold; color: orange;', 'color: grey;'))
        return styles

    beta_long_cols = sorted([c for c in df_summary.columns if c.startswith('β_long_')])
    df_betas_long = df_summary[beta_long_cols].rename(columns=lambda c: c.replace('β_long_', ''))
    display(Markdown("### 各行业【长期影响系数 β_long】(kWh/kVA)"))
    display(Markdown("绿色粗体: **显著**且存在长期协整关系; 橙色粗体: **显著**但无长期协整关系; 灰色: 不显著"))
    display(df_betas_long.style.format("{:.2f}", na_rep="-").apply(style_table, p_col_prefix='p_long_', is_coint=is_cointegrated, axis=None))
    
    beta_short_cols = sorted([c for c in df_summary.columns if c.startswith('β_short_')])
    df_betas_short = df_summary[beta_short_cols].rename(columns=lambda c: c.replace('β_short_', ''))
    display(Markdown("### 各行业【短期冲击系数 β_short】(当期影响)"))
    display(df_betas_short.style.format("{:.2f}", na_rep="-"))
    
    with pd.ExcelWriter(output_path) as writer:
        df_betas_long.to_excel(writer, sheet_name='长期影响系数(β_long)')
        df_betas_short.to_excel(writer, sheet_name='短期冲击系数(β_short)')
        df_summary.to_excel(writer, sheet_name='全部详细结果')
    print(f"\nFull summary report saved to: {output_path}")

# --- 5. 执行主程序 ---
# ==============================================================================
if __name__ == '__main__':
    main_df = load_and_engineer_features(FILE_PATH, SHEET_MAP)
    sota_results = run_sota_ardl_analysis(main_df)
    display_sota_results(sota_results, OUTPUT_PATH)
    print("\n--- State-of-the-Art Analysis Complete ---")

--- SOTA Step 1: Loading Data & Feature Engineering ---
  -> Data loaded successfully.

--- SOTA Step 2: Running Industry-by-Industry ARDL Analysis ---

    -> Engle-Granger Cointegration Test p-value: 0.2279
    -> Selected Order: AR=[1, 2, 3, 4], DL={'cap_high_new': [0, 1, 2, 3, 4], 'cap_high_exp': [0, 1, 2, 3, 4], 'cap_low_new': [0, 1, 2, 3, 4], 'cap_low_exp': [0]}
    -> CRITICAL ERROR for '全行业总计': 'ARDLResults' object has no attribute 'rsquared_adj', skipping.

    -> Engle-Granger Cointegration Test p-value: 0.0330
    -> Selected Order: AR=[1, 2, 3, 4], DL={'cap_high_new': [0, 1, 2, 3, 4], 'cap_high_exp': [0, 1, 2, 3, 4], 'cap_low_new': [0, 1, 2, 3, 4]}
    -> CRITICAL ERROR for 'A、全行业用电合计': 'ARDLResults' object has no attribute 'rsquared_adj', skipping.

    -> Engle-Granger Cointegration Test p-value: 0.0309
    -> Selected Order: AR=[1, 2, 3, 4], DL={'cap_high_new': [0, 1, 2, 3, 4], 'cap_high_exp': [0, 1, 2], 'cap_low_new': [0], 'cap_low_exp': [0, 1, 2, 3, 4]}
    -> CRITICAL

### 各行业【长期影响系数 β_long】(kWh/kVA)

绿色粗体: **显著**且存在长期协整关系; 橙色粗体: **显著**但无长期协整关系; 灰色: 不显著

Unnamed: 0_level_0,cap_high_exp,cap_high_new,cap_low_exp,cap_low_new
行业分类,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
全行业总计,-,-,-,-
A、全行业用电合计,-,-,-,-
第一产业,-,-,-,-
第二产业,-,-,-,-
第三产业,-,-,-,-
B、城乡居民生活用电合计,-,-,-,-
二、工业,-,-,-,-
（二）制造业,-,-,-,-
13.石油、煤炭及其他燃料加工业,-,-,-,-
14.化学原料和化学制品制造业,-,-,-,-


### 各行业【短期冲击系数 β_short】(当期影响)

Unnamed: 0_level_0,cap_high_exp,cap_high_new,cap_low_exp,cap_low_new
行业分类,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
全行业总计,-,-,-,-
A、全行业用电合计,-,-,-,-
第一产业,-,-,-,-
第二产业,-,-,-,-
第三产业,-,-,-,-
B、城乡居民生活用电合计,-,-,-,-
二、工业,-,-,-,-
（二）制造业,-,-,-,-
13.石油、煤炭及其他燃料加工业,-,-,-,-
14.化学原料和化学制品制造业,-,-,-,-



Full summary report saved to: E:\A智网\业扩分析\业扩ARDL模型\ARDL分析报告_无春节变量版.xlsx

--- State-of-the-Art Analysis Complete ---
