## APC--Area per capita

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import math
import os

print(sys.executable)
print(os.environ.get('CONDA_DEFAULT_ENV', '未检测到Conda环境'))
!conda env list  # 列出所有环境，当前环境前会标有 *
!conda info      # 查看详细信息，包括当前环境
!jupyter kernelspec list  # 列出所有内核

/opt/miniconda3/envs/mixb/bin/python
mixb

# conda environments:
#
base                   /opt/miniconda3
flwr                   /opt/miniconda3/envs/flwr
mixb                 * /opt/miniconda3/envs/mixb


     active environment : mixb
    active env location : /opt/miniconda3/envs/mixb
            shell level : 2
       user config file : /Users/apple/.condarc
 populated config files : /opt/miniconda3/.condarc
                          /Users/apple/.condarc
          conda version : 25.3.1
    conda-build version : not installed
         python version : 3.13.2.final.0
                 solver : libmamba (default)
       virtual packages : __archspec=1=skylake
                          __conda=25.3.1=0
                          __osx=13.4=0
                          __unix=0=0
       base environment : /opt/miniconda3  (writable)
      conda av data dir : /opt/miniconda3/etc/conda
  conda av metadata url : None
           channel URLs : https://repo.anaconda.com/pkgs/main/osx-64
     

In [5]:
!jupyter kernelspec list          # 查看所有 kernel
# !jupyter kernelspec remove myenv

Available kernels:
  python3    /opt/miniconda3/envs/mixb/share/jupyter/kernels/python3
  mixb       /Users/apple/Library/Jupyter/kernels/mixb


## 1. 数据预处理

In [6]:
def load_and_melt(file_path, value_name):
    """
    加载CSV文件并检查年份列中的无效值
    
    参数:
        file_path (str): CSV文件路径
        value_name (str): 要创建的指标名称
    
    返回:
        DataFrame: 处理后的数据框
    """
    # 读取CSV文件
    df = pd.read_csv(file_path)
    
    # 清理未命名列
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    
    # 转换为长格式
    melted = df.melt(
        id_vars='Countries',
        var_name='Year',
        value_name=value_name
    )
    
    # 尝试将年份转换为数值，无效值设为NaN
    melted['Year_numeric'] = pd.to_numeric(melted['Year'], errors='coerce')
    
    # 查找无效年份值
    invalid_mask = melted['Year_numeric'].isna()
    invalid_rows = melted[invalid_mask]
    
    # 如果有无效值，打印详细信息
    if not invalid_rows.empty:
        print(f"\n⚠️ 在文件 {file_path} 中发现无效年份值:")
        for idx, row in invalid_rows.iterrows():
            print(f"  行索引: {idx}, 国家: '{row['Countries']}', 无效年份值: '{row['Year']}'")
        print(f"共发现 {len(invalid_rows)} 个无效年份值\n")
    
    # 删除无效行并转换年份为整数
    melted = melted.dropna(subset=['Year_numeric'])
    melted['Year'] = melted['Year_numeric'].astype(int)
    melted = melted.drop(columns=['Year_numeric'])
    
    return melted

'''
def load_and_melt(file_path, value_name):
    """
    读取文件并转换为长格式（Country, Year, Value）
    """
    # 读取文件，第一列为国家名称
    df = pd.read_csv(file_path)   
    # 转换为长格式
    melted = df.melt(
        id_vars='Countries',
        var_name='Year',
        value_name=value_name
    )
    melted['Year'] = melted['Year'].astype(int)  # 确保年份为整数
    return melted
'''

'\ndef load_and_melt(file_path, value_name):\n    """\n    读取文件并转换为长格式（Country, Year, Value）\n    """\n    # 读取文件，第一列为国家名称\n    df = pd.read_csv(file_path)   \n    # 转换为长格式\n    melted = df.melt(\n        id_vars=\'Countries\',\n        var_name=\'Year\',\n        value_name=value_name\n    )\n    melted[\'Year\'] = melted[\'Year\'].astype(int)  # 确保年份为整数\n    return melted\n'

In [7]:
# 定义文件路径及其对应的指标名
file_config = {
    'Data1/processed/CO2EmissionsFromEnergy.csv': 'total_CE',
    'Data1/processed/CarbonSequestration.csv': 'cement_CS',
    'Data1/processed/PrimaryEnergyConsumption.csv': 'energy_consumption',
    'Data1/processed/Population.csv': 'Population',
    'Data1/processed/GDP.csv': 'GDP',
    'Data1/processed/CementProduction.csv': 'cement_production',
    'Data1/processed/SurfaceArea.csv': 'built_surface_area'
}

In [8]:
# 示例：加载数据
for file, col_name in file_config.items():
    f_df = load_and_melt(file, col_name)
    print(f_df[f_df["Countries"]=="China"].head(4))

    Countries  Year total_CE
76      China  1965    488.5
168     China  1966    530.3
260     China  1967    475.9
352     China  1968    476.7
    Countries  Year  cement_CS
36      China  1928   0.034032
163     China  1928   0.034032
208     China  1929   0.054537
335     China  1929   0.054537
    Countries  Year energy_consumption
76      China  1965               5.53
168     China  1966               6.01
260     China  1967                5.4
352     China  1968               5.46
    Countries  Year   Population
39      China  1960  667070000.0
254     China  1961  660330000.0
469     China  1962  665770000.0
684     China  1963  682335000.0
    Countries  Year           GDP
39      China  1960  5.971625e+10
254     China  1961  5.005669e+10
469     China  1962  4.720919e+10
684     China  1963  5.070661e+10
    Countries  Year  cement_production
36      China  1928         252.000000
199     China  1929         369.000000
362     China  1930         451.613505
525     China 

In [9]:
# 初始化合并后的 DataFrame
merged_df = None
previous_countries = set()  # 用于跟踪国家列表变化

# 逐个加载并合并
for file, col_name in file_config.items():
    print(f"处理文件: {file}")
    temp_df = load_and_melt(file, col_name)  # 使用修改后的函数
    
    if merged_df is None:
        merged_df = temp_df
        previous_countries = set(merged_df['Countries'].unique())
        print(f"  初始数据框大小: {merged_df.shape}")
        print(f"  包含国家数量: {len(previous_countries)}")
    else:
        before_merge_countries = set(merged_df['Countries'].unique())
        before_merge_rows = merged_df.shape[0]
        
        merged_df = pd.merge(
            merged_df,
            temp_df, 
            on=['Countries', 'Year'],  # 注意列名是'Countries'而不是'Country'
            how='inner'
        )
        
        after_merge_countries = set(merged_df['Countries'].unique())
        after_merge_rows = merged_df.shape[0]
        
        # 计算减少的国家数量
        lost_countries = before_merge_countries - after_merge_countries
        num_lost = len(lost_countries)
        
        print(f"  合并后数据框大小: {merged_df.shape}, 减少行数: {before_merge_rows - after_merge_rows}")
        print(f"  减少国家数量: {num_lost}")
        if num_lost > 0:
            # print(f"  丢失的国家示例: {list(lost_countries)[:5]}...")  # 只显示前5个示例
            print(f"  丢失的国家列表: {list(lost_countries)[:]}")
            
        previous_countries = after_merge_countries

# 按国家和年份排序
merged_df = merged_df.sort_values(['Countries', 'Year']).reset_index(drop=True)
print("\n✅ 所有文件处理完成!")
print(f"最终数据框大小: {merged_df.shape}")
print(f"最终包含国家数量: {len(merged_df['Countries'].unique())}")

# --- 新增: 筛选1975年之后的数据 ---
before_filter = merged_df.shape[0]
before_filter_countries = set(merged_df['Countries'].unique())
merged_df = merged_df[merged_df['Year'] >= 1975]
after_filter_countries = set(merged_df['Countries'].unique())

lost_countries_filter = before_filter_countries - after_filter_countries
num_lost_filter = len(lost_countries_filter)

print(f"\n筛选1975年之后的数据: {merged_df.shape}")
print(f"因年份筛选减少的国家数量: {num_lost_filter}")
if num_lost_filter > 0:
    # print(f"因年份筛选丢失的国家示例: {list(lost_countries_filter)[:5]}...")
    print(f"因年份筛选丢失的国家列表: {list(lost_countries_filter)[:]}")

# --- 新增: 去除国家名和年份都重复的数据 ---
before_dedup = merged_df.shape[0]
merged_df = merged_df.drop_duplicates(subset=['Countries', 'Year'], keep='first')
after_dedup = merged_df.shape[0]
dup_count = before_dedup - after_dedup

if dup_count > 0:
    print(f"\n⚠️ 发现并移除了 {dup_count} 行国家名和年份重复的数据")
else:
    print("\n✅ 没有发现国家名和年份重复的数据")
print(f"去重后数据框大小: {merged_df.shape}")

# 将所有数值列转换为浮点数（为后续除法计算）
numeric_cols = [
    'total_CE', 'cement_CS', 'energy_consumption', 'Population', 
    'GDP', 'cement_production', 'built_surface_area'
]

# 定义极小值常量
SMALL_VALUE = 1e-10

for col in numeric_cols:
    # 转换并替换无效值为极小值
    initial_valid = merged_df[col].notna().sum()
    merged_df[col] = pd.to_numeric(merged_df[col], errors='coerce')
    
    # 统计无效值数量
    invalid_count = merged_df[col].isna().sum()
    
    if invalid_count > 0:
        # 将NaN替换为极小值
        merged_df[col] = merged_df[col].fillna(SMALL_VALUE)
        print(f"⚠️ 列 '{col}' 中发现 {invalid_count} 个无效值，已替换为极小值 ({SMALL_VALUE})")
    else:
        print(f"✅ 列 '{col}' 无无效值")

# 显示中国数据
print("\n中国数据示例:")
print(merged_df[merged_df["Countries"]=="China"].head())

处理文件: Data1/processed/CO2EmissionsFromEnergy.csv
  初始数据框大小: (5336, 3)
  包含国家数量: 92
处理文件: Data1/processed/CarbonSequestration.csv
  合并后数据框大小: (4350, 4), 减少行数: 986
  减少国家数量: 21
  丢失的国家列表: ['Turkmenistan', 'Eastern Africa', 'United Arab Emirates', 'Singapore', 'USSR', 'Other Southern Africa', 'Other Asia Pacific', 'Russian Federation', 'Other Europe', 'Other CIS', 'Western Africa', 'Other Middle East', 'Central America', 'Other Northern Africa', 'Other Caribbean', 'Latvia', 'Other South America', 'Trinidad & Tobago', 'China Hong Kong SAR', 'US', 'Middle Africa']
处理文件: Data1/processed/PrimaryEnergyConsumption.csv
  合并后数据框大小: (4350, 5), 减少行数: 0
  减少国家数量: 0
处理文件: Data1/processed/Population.csv
  合并后数据框大小: (3886, 6), 减少行数: 464
  减少国家数量: 8
  丢失的国家列表: ['Taiwan', 'South Korea', 'Netherlands', 'Venezuela', 'United Kingdom', 'Vietnam', 'Turkey', 'Iran']
处理文件: Data1/processed/GDP.csv
  合并后数据框大小: (3886, 7), 减少行数: 0
  减少国家数量: 0
处理文件: Data1/processed/CementProduction.csv
  合并后数据框大小: (3886, 8), 减少行数: 0

## 2. 因素的构建与操作

### 2.1 计算因素

In [17]:
# 数据整理
# 定义分解所需的因子列名
# APC更新后的因子列表
factors = [
    'total_CE/energy_consumption',
    'energy_consumption/GDP',
    'GDP/Population',
    'built_surface_area/Population',  # 修改为建筑面积/人口
    'cement_CS/built_surface_area' # 单位建筑面积的吸收量
]
# 在计算分解因子前添加单位转换
print("执行单位转换...")
# 质量单位转换 (水泥产量: kt → Mt) 百万吨
merged_df['cement_production'] = merged_df['cement_production'] / 1000
# 经济单位转换 (GDP: USD → trillion USD) 万亿美元
merged_df['GDP'] = merged_df['GDP'] / 1e12
# 人口单位转换 (persons → billion persons) 人口(亿人)
merged_df['Population'] = merged_df['Population'] / 1e8
# 现在计算分解因子（使用统一单位）
print("计算分解因子...")

# 确保所有关键列均为正数（避免除以0/对非正值取对数）
merged_df = merged_df[
    (merged_df['total_CE'] > 0) &
    (merged_df['cement_CS'] > 0) &
    (merged_df['built_surface_area'] > 0) &
    (merged_df['cement_production'] > 0) &
    (merged_df['GDP'] > 0) &
    (merged_df['Population'] > 0)&
    (merged_df['energy_consumption'] > 0) 
]

# 计算各分解因子
merged_df['total_CE/energy_consumption'] = merged_df['total_CE'] / merged_df['energy_consumption']
merged_df['energy_consumption/GDP'] = merged_df['energy_consumption'] / merged_df['GDP']
merged_df['GDP/Population'] = merged_df['GDP'] / merged_df['Population']
# APC修改后的因子计算
merged_df['built_surface_area/Population'] = merged_df['built_surface_area'] / merged_df['Population']
# CS per Area修改后因子计算
merged_df['cement_CS/built_surface_area'] = merged_df['cement_CS'] / merged_df['built_surface_area']

# 再次检查无穷大和缺失值
# print("清理无穷值和缺失值...")
# merged_df = merged_df.replace([np.inf, -np.inf], np.nan)
# merged_df = merged_df.dropna(subset=factors + ['total_CE', 'cement_CS'])
print(f"最终有效数据量: {len(merged_df)} 行 (1975年之后)")
print(f"最终有效国家共{len(merged_df["Countries"].unique())}个，列表: {merged_df["Countries"].unique()}")
merged_df[merged_df["Countries"] == "China"]

执行单位转换...
计算分解因子...
最终有效数据量: 604 行 (1975年之后)
最终有效国家共63个，列表: ['Algeria' 'Argentina' 'Australia' 'Austria' 'Azerbaijan' 'Bangladesh'
 'Belarus' 'Belgium' 'Brazil' 'Bulgaria' 'Canada' 'Chile' 'China'
 'Colombia' 'Croatia' 'Cyprus' 'Czechia' 'Denmark' 'Ecuador' 'Egypt'
 'Estonia' 'Finland' 'France' 'Germany' 'Greece' 'Hungary' 'Iceland'
 'India' 'Indonesia' 'Iraq' 'Ireland' 'Israel' 'Italy' 'Japan'
 'Kazakhstan' 'Kuwait' 'Lithuania' 'Luxembourg' 'Malaysia' 'Mexico'
 'Morocco' 'New Zealand' 'North Macedonia' 'Norway' 'Oman' 'Pakistan'
 'Peru' 'Philippines' 'Poland' 'Portugal' 'Qatar' 'Romania' 'Saudi Arabia'
 'Slovakia' 'Slovenia' 'South Africa' 'Spain' 'Sri Lanka' 'Sweden'
 'Switzerland' 'Thailand' 'Ukraine' 'Uzbekistan']


Unnamed: 0,Countries,Year,total_CE,cement_CS,energy_consumption,Population,GDP,cement_production,built_surface_area,total_CE/energy_consumption,energy_consumption/GDP,GDP/Population,built_surface_area/Population,built_surface_area/cement_CS,cement_CS/built_surface_area,total_CE/cement_CS
130,China,1975,1131.3,5.143255,13.25,9.16395e-24,1.6343e-37,3.00217e-08,256660900000.0,85.381132,8.107447e+37,1.783401e-14,2.8007669999999997e+34,2.003911e-11,2.003911e-11,219.957978
132,China,1980,1460.9,14.51676,17.38,9.81235e-24,1.91149e-37,9.54427e-08,313653200000.0,84.056387,9.092382999999999e+37,1.948045e-14,3.196514e+34,4.628284e-11,4.628284e-11,100.635406
134,China,1985,1821.7,20.384433,22.14,1.0510400000000001e-23,3.09488e-37,1.424897e-07,391127600000.0,82.280939,7.153751e+37,2.944588e-14,3.721339e+34,5.211708e-11,5.211708e-11,89.367216
136,China,1990,2308.8,28.138505,28.58,1.135185e-23,3.60858e-37,2.03168e-07,485590400000.0,80.783765,7.920013e+37,3.178848e-14,4.277633e+34,5.794699e-11,5.794699e-11,82.051267
138,China,1995,3009.2,56.739027,37.27,1.2048550000000001e-23,7.3448499999999995e-37,4.4561e-07,584252600000.0,80.740542,5.0743039999999995e+37,6.096045e-14,4.849153e+34,9.711387e-11,9.711387e-11,53.035805
140,China,2000,3328.0,78.414183,42.48,1.2626450000000002e-23,1.2113299999999999e-36,5.8319e-07,705060700000.0,78.34275,3.506889e+37,9.593591e-14,5.583998e+34,1.112162e-10,1.112162e-10,42.441302
142,China,2005,6079.3,132.202206,75.7,1.3037200000000002e-23,2.28596e-36,1.0383e-06,816558100000.0,80.307794,3.3115189999999998e+37,1.753413e-13,6.263294e+34,1.619018e-10,1.619018e-10,45.98486
144,China,2010,8121.7,242.079586,104.6,1.337705e-23,6.08719e-36,1.88e-06,954987400000.0,77.645315,1.718363e+37,4.550473e-13,7.138998e+34,2.534898e-10,2.534898e-10,33.54971
146,China,2015,9171.3,309.104196,126.49,1.3798600000000002e-23,1.10616e-35,2.359188e-06,1106582000000.0,72.506127,1.1435049999999999e+37,8.016465e-13,8.019523e+34,2.793324e-10,2.793324e-10,29.670577
148,China,2020,10130.9,359.195176,149.45,1.4111e-23,1.46877e-35,2.394708e-06,1243587000000.0,67.787889,1.0175179999999999e+37,1.040869e-12,8.812889e+34,2.88838e-10,2.88838e-10,28.204443


## 3.LMDI分解

#### 注意：因为数据本来就是5年interval，所以也是按5年interval分解的

total_CE/cement_CS = <br>
   (total_CE/energy_consumption) × <br>
   (energy_consumption/GDP) × <br>
   (GDP/Population) × <br>
    (1/(built_surface_area/Population)) ×  # 倒数关系 <br>
    (1/(cement_CS/built_surface_area))     # 倒数关系

In [18]:
def lmdi_decomposition(df, base_year, target_ratio='total_CE/cement_CS'):
    
    """
    执行 LMDI 分解，计算各因子贡献
    - df: 合并后的数据（包含 Country, Year 和各因子）
    - base_year: 基期年份（如 1990）
    - target_ratio: 分解的目标比例（默认为 CS/CE）
    """
    
    # 计算目标比例 CFP = CE/CS
    df[target_ratio] = df['total_CE'] / df['cement_CS']

    # 初始化结果存储
    results = []
    
    for country, country_df in df.groupby('Countries'):
        country_df = country_df.sort_values('Year')
        country_df = country_df[country_df['Year'] >= base_year]
        if len(country_df) < 2:
            continue

        for idx in range(1, len(country_df)):
            t_row = country_df.iloc[idx]
            b_row = country_df.iloc[idx-1]

            cfp_t = t_row[target_ratio]
            cfp_b = b_row[target_ratio]

            # 添加对 CFP 值的保护
            if cfp_t <= 0 or cfp_b <= 0:
                continue  # 跳过非正值

            # 计算 L(CFP^t, CFP^b)
            if np.isclose(cfp_t, cfp_b, rtol=1e-6):  # 浮点数精度容错
                L = cfp_t
            else:
                log_diff = np.log(cfp_t) - np.log(cfp_b)
                '''
                # 防止除以零
                if np.isclose(log_diff, 0):  
                    L = cfp_t
                else:
                    L = (cfp_t - cfp_b) / log_diff
                '''
                L = (cfp_t - cfp_b) / log_diff

            # 计算各因子贡献（添加对 ratio_t/ratio_b 的保护）
            contributions = {}
            for factor in factors:
                ratio_t = t_row[factor]
                ratio_b = b_row[factor]
                if ratio_t <= 0 or ratio_b <= 0:
                    contributions[f'Δ{factor}'] = np.nan
                else:
                     # 对建筑面积/人口项取负号
                    if factor in ['built_surface_area/Population', 'cement_CS/built_surface_area']:
                        contributions[f'Δ{factor}'] = -L * np.log(ratio_t / ratio_b)
                    else:
                        contributions[f'Δ{factor}'] = L * np.log(ratio_t / ratio_b)
                

            '''
            # 跳过存在 NaN 的行
            if not np.isnan(list(contributions.values())).any():
                results.append({
                    'Countries': country,
                    'Year': t_row['Year'],
                    'ΔTotal': cfp_t - cfp_b,
                    **contributions
                })
            '''

            results.append({
                    'Countries': country,
                    'Start_Year': b_row['Year'],
                    'End_Year': t_row['Year'],
                    'ΔTotal': cfp_t - cfp_b,
                    **contributions
                })
        
    return pd.DataFrame(results)

In [19]:
# 设置基期年份（根据数据实际起始时间调整）
base_year = 1975  # 示例

# 运行分解
lmdi_results_5 = lmdi_decomposition(merged_df, base_year)

# 保存结果
lmdi_results_5.to_csv('Data1/results/LMDI_Contributions_5yrs_interval.csv', index=False, encoding='utf-8-sig')

# 查看示例输出
print(lmdi_results_5.head())

  Countries  Start_Year  End_Year     ΔTotal  Δtotal_CE/energy_consumption  \
0   Algeria        1975      1980 -26.735148                     -2.599560   
1   Algeria        1980      1985   1.662086                      1.370299   
2   Algeria        1985      1990  -6.010295                     -4.113439   
3   Algeria        1990      1995 -13.021165                     -0.190956   
4   Algeria        1995      2000  -9.595745                      0.233208   

   Δenergy_consumption/GDP  ΔGDP/Population  Δbuilt_surface_area/Population  \
0                -5.277985        71.118306                        7.481355   
1                 5.265836        10.920952                        6.167747   
2                 8.324942        -5.315408                        4.581892   
3                24.681683       -31.524017                        0.609481   
4               -16.189702         9.912027                       -0.960845   

   Δcement_CS/built_surface_area  
0                    

In [21]:
# 验证：
lmdi_results_5['ΔSum'] = lmdi_results_5[[f'Δ{factor}' for factor in factors]].sum(axis=1)
print(lmdi_results_5[['ΔTotal', 'ΔSum']].head())

      ΔTotal       ΔSum
0 -26.735148 -26.735148
1   1.662086   1.662086
2  -6.010295  -6.010295
3 -13.021165 -13.021165
4  -9.595745  -9.595745


## 分解20年

In [30]:
def lmdi_decomposition_single_period(df, base_year, target_ratio='cement_CS/total_CE'):
    """
    执行LMDI分解（每个国家只计算最新一年与最开始一年的对比）
    - df: 合并后的数据（包含Country, Year和各因子）
    - base_year: 基期年份（如1975）
    - target_ratio: 分解的目标比例（默认为CE/CS）
    """ 
    # 计算目标比例 CFP = CE/CS
    df[target_ratio] = df['total_CE'] / df['cement_CS']
    
    # 初始化结果存储
    results = []
    for country, country_df in df.groupby('Countries'):
        # 筛选基期之后的数据并按年份排序
        country_df = country_df[country_df['Year'] >= base_year].sort_values('Year')
        # if len(country_df) < 2:
        #    continue  # 至少需要首尾两年数据
        
        # 获取首尾两年数据
        first_row = country_df.iloc[0]    # 最开始一年（>= base_year的最早年份）
        last_row = country_df.iloc[-1]    # 最新一年
        
        cfp_first = first_row[target_ratio]
        cfp_last = last_row[target_ratio]

        # 跳过非正值
        # if cfp_first <= 0 or cfp_last <= 0:
        #    continue

        # 计算L(CFP_last, CFP_first)
        if np.isclose(cfp_last, cfp_first, rtol=1e-6):
            L = cfp_last
        else:
            log_diff = np.log(cfp_last) - np.log(cfp_first)
            L = (cfp_last - cfp_first) / log_diff if not np.isclose(log_diff, 0) else cfp_last

        # 计算各因子贡献
        contributions = {}
        for factor in factors:
            ratio_first = first_row[factor]
            ratio_last = last_row[factor]
            if ratio_first <= 0 or ratio_last <= 0:
                contributions[f'Δ{factor}'] = np.nan
            else:
                 # 对建筑面积/人口项取负号
                if factor in ['built_surface_area/Population', 'cement_CS/built_surface_area']:
                    contributions[f'Δ{factor}'] = -L * np.log(ratio_last / ratio_first)
                else:
                    contributions[f'Δ{factor}'] = L * np.log(ratio_last / ratio_first)
        
        # 保存结果（每个国家一条记录）
        if not np.isnan(list(contributions.values())).any():
            results.append({
                'Countries': country,
                'Start_Year': first_row['Year'],
                'End_Year': last_row['Year'],
                'ΔTotal': cfp_last - cfp_first,  # 总变化量
                **contributions
            })
    
    return pd.DataFrame(results)

In [31]:
# 运行分解（每个国家输出首尾年对比）
lmdi_results_ttl = lmdi_decomposition_single_period(merged_df, base_year=1975)

# 保存结果
lmdi_results_ttl.to_csv('Data1/results/CE_LMDI_Contributions_total_yrs_interval.csv', index=False)

# 查看示例输出
print(lmdi_results_ttl.head())

    Countries  Start_Year  End_Year      ΔTotal  Δtotal_CE/energy_consumption  \
0     Algeria        1975      2020  -61.823809                     -5.742591   
1   Argentina        1975      2020 -375.147644                    -45.790970   
2   Australia        1975      2020  -38.299867                    -17.083003   
3     Austria        1975      2020  -22.924944                    -12.691645   
4  Azerbaijan        1975      2020   63.085636                      9.879642   

   Δenergy_consumption/GDP  ΔGDP/Population  Δbuilt_surface_area/Population  \
0                -7.993874        86.542818                       15.745046   
1            -10485.738756     10530.259252                        0.407054   
2              -290.943671       317.892709                      -30.617221   
3               -83.850003        91.783881                      -20.973817   
4               -61.845422       116.595719                       -0.431641   

   Δcement_CS/built_surface_area  
0  

## 4.计算单位CS变化产生的贡献变化率
### 根据tapio解耦思路，计算因素变化量，用来衡量单位因素变化的贡献变化量

#### tapio弹性? tapio elasticity

#### $ T = \frac{(C_5^T - C_5^0) / C_5^0}{(CS^T - CS^0)/CS^0} $



In [23]:
def calc_grad(attr, df = merged_df):
    res = []
    for country, country_df in df.groupby('Countries'):
        country_df = country_df.sort_values('Year')

        if len(country_df) < 2:
            continue

        for idx in range(1, len(country_df)):
            current_row = country_df.iloc[idx]  # current interval
            previous_row = country_df.iloc[idx-1]  # previous interval
    
            current_val = current_row[attr]
            previous_val = previous_row[attr]
        
            # 处理分母为0的情况
            if previous_val == 0:
                if current_val > 0:
                    grad = np.inf
                elif current_val < 0:
                    grad = -np.inf
                else:
                    grad = 0  # 0/0的情况
            else:
                grad = (current_val - previous_val) / previous_val
            
            res.append({
                'Countries': country,
                'Interval': f"{idx}",
                'Years': f"{previous_row['Year']} - {current_row['Year']}",
                f'grad({attr})': grad
            })

    return pd.DataFrame(res)

#### 4.1 计算 $ (C_5^T - C_5^0) / C_5^0 $

In [24]:
C5_grad_results = calc_grad("built_surface_area/cement_CS")
C5_grad_results

Unnamed: 0,Countries,Interval,Years,grad(built_surface_area/cement_CS)
0,Algeria,1,1975 - 1980,2.101185
1,Algeria,2,1980 - 1985,0.345950
2,Algeria,3,1985 - 1990,0.140760
3,Algeria,4,1990 - 1995,0.111618
4,Algeria,5,1995 - 2000,0.051984
...,...,...,...,...
536,Uzbekistan,5,1995 - 2000,-0.160959
537,Uzbekistan,6,2000 - 2005,0.161982
538,Uzbekistan,7,2005 - 2010,0.174895
539,Uzbekistan,8,2010 - 2015,0.074890


#### 4.2 计算 $ (CS^T - CS^0)/CS^0 $

In [25]:
CS_grad_results = calc_grad("cement_CS")
CS_grad_results

Unnamed: 0,Countries,Interval,Years,grad(cement_CS)
0,Algeria,1,1975 - 1980,2.388194
1,Algeria,2,1980 - 1985,0.462990
2,Algeria,3,1985 - 1990,0.234197
3,Algeria,4,1990 - 1995,0.228491
4,Algeria,5,1995 - 2000,0.158393
...,...,...,...,...
536,Uzbekistan,5,1995 - 2000,-0.059247
537,Uzbekistan,6,2000 - 2005,0.251003
538,Uzbekistan,7,2005 - 2010,0.271284
539,Uzbekistan,8,2010 - 2015,0.165293


In [26]:
# 合并数据时指定只保留一个Years列
Tapio_results_df = pd.merge(
    C5_grad_results,
    CS_grad_results, 
    on=['Countries', 'Interval', 'Years'],  # 将Years也作为合并键
    how='inner'
)

# 选择需要的列
Tapio_results_df = Tapio_results_df[['Countries', 'Years', 'grad(built_surface_area/cement_CS)', 'grad(cement_CS)']]

# 将grad(built_surface_area/cement_CS)转换为百分数形式（乘以100）
# Tapio_results_df["grad(built_surface_area/cement_CS)"] = Tapio_results_df["grad(built_surface_area/cement_CS)"] * 100

# 计算Tapio弹性
Tapio_results_df["CS_tapio_elasticity"] = Tapio_results_df["grad(built_surface_area/cement_CS)"] / Tapio_results_df["grad(cement_CS)"]

In [27]:
Tapio_results_df.to_csv('Data1/results/C5_CS_Tapio_results.csv', index=False)
Tapio_results_df

Unnamed: 0,Countries,Years,grad(built_surface_area/cement_CS),grad(cement_CS),CS_tapio_elasticity
0,Algeria,1975 - 1980,2.101185,2.388194,0.879822
1,Algeria,1980 - 1985,0.345950,0.462990,0.747207
2,Algeria,1985 - 1990,0.140760,0.234197,0.601031
3,Algeria,1990 - 1995,0.111618,0.228491,0.488500
4,Algeria,1995 - 2000,0.051984,0.158393,0.328199
...,...,...,...,...,...
536,Uzbekistan,1995 - 2000,-0.160959,-0.059247,2.716769
537,Uzbekistan,2000 - 2005,0.161982,0.251003,0.645336
538,Uzbekistan,2005 - 2010,0.174895,0.271284,0.644693
539,Uzbekistan,2010 - 2015,0.074890,0.165293,0.453074
