In [1]:
import pandas as pd

## 加载 ISO

In [2]:
iso = pd.read_csv('Data1/processed/iso.csv',index_col=0).reset_index()
iso.head(3)

Unnamed: 0,name,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code
0,Afghanistan,AF,AFG,4,ISO 3166-2:AF,Asia,Southern Asia,,142.0,34.0,
1,Åland Islands,AX,ALA,248,ISO 3166-2:AX,Europe,Northern Europe,,150.0,154.0,
2,Albania,AL,ALB,8,ISO 3166-2:AL,Europe,Southern Europe,,150.0,39.0,


## 处理 Population/GDP

In [10]:
ppl_gdp = pd.read_excel('Data1/processed/ppl_gdp.xlsx')
ppl_gdp.head(3)

Unnamed: 0,CountryName,CountryCode,year,人口,gdp,人均gdp
0,阿鲁巴,ABW,1960,54608.0,,
1,阿鲁巴,ABW,1961,55811.0,,
2,阿鲁巴,ABW,1962,56682.0,,


In [12]:
# 用 map 快速添加列（需确保 CountryCode 是唯一键）
eng_name_map = iso.set_index("alpha-3")["name"].to_dict()
ppl_gdp["name"] = ppl_gdp["CountryCode"].map(eng_name_map)
'''
表链接方法
ppl_gdp_with_name = pd.merge(
    ppl_gdp,
    iso[["alpha-3", "name"]], 
    left_on="CountryCode",
    right_on="alpha-3",
    how="left"
)
ppl_gdp_with_name
'''
ppl_gdp.head()

Unnamed: 0,CountryName,CountryCode,year,人口,gdp,人均gdp,name
0,阿鲁巴,ABW,1960,54608.0,,,Aruba
1,阿鲁巴,ABW,1961,55811.0,,,Aruba
2,阿鲁巴,ABW,1962,56682.0,,,Aruba
3,阿鲁巴,ABW,1963,57475.0,,,Aruba
4,阿鲁巴,ABW,1964,58178.0,,,Aruba


In [21]:
# 删除 eng_name 缺失的行
ppl_gdp_cleaned = ppl_gdp.dropna(subset=["name"])

# 检查剩余数据
print(f"原始数据行数: {len(ppl_gdp)}")
print(f"清理后行数: {len(ppl_gdp_cleaned)}")
print("缺失的国家代码:", ppl_gdp[ppl_gdp["name"].isna()]["CountryCode"].unique())

原始数据行数: 17024
清理后行数: 13760
缺失的国家代码: ['AFE' 'AFW' 'ARB' 'CEB' 'CHI' 'CSS' 'EAP' 'EAR' 'EAS' 'ECA' 'ECS' 'EMU'
 'EUU' 'FCS' 'HIC' 'HPC' 'IBD' 'IBT' 'IDA' 'IDB' 'IDX' 'INX' 'LAC' 'LCN'
 'LDC' 'LIC' 'LMC' 'LMY' 'LTE' 'MEA' 'MIC' 'MNA' 'NAC' 'OED' 'OSS' 'PRE'
 'PSS' 'PST' 'SAS' 'SSA' 'SSF' 'SST' 'TEA' 'TEC' 'TLA' 'TMN' 'TSA' 'TSS'
 'UMC' 'WLD' 'XKX']


In [24]:
gdp_df = ppl_gdp_cleaned.pivot(index="name", columns="year", values="gdp").reset_index()
gdp_df

year,name,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Afghanistan,,,,,,,,,,...,2.049713e+10,1.913422e+10,1.811657e+10,1.875346e+10,1.805322e+10,1.879944e+10,1.995593e+10,1.426650e+10,1.450216e+10,
1,Albania,,,,,,,,,,...,1.322815e+10,1.138685e+10,1.186120e+10,1.301973e+10,1.515642e+10,1.540183e+10,1.516273e+10,1.793057e+10,1.891638e+10,2.297768e+10
2,Algeria,2.723615e+09,2.434747e+09,2.001445e+09,2.702982e+09,2.909316e+09,3.136284e+09,3.039859e+09,3.370870e+09,3.852147e+09,...,2.389427e+11,1.874939e+11,1.807638e+11,1.898809e+11,1.945545e+11,1.934597e+11,1.648734e+11,1.862654e+11,2.255603e+11,2.398995e+11
3,American Samoa,,,,,,,,,,...,6.430000e+08,6.730000e+08,6.710000e+08,6.120000e+08,6.390000e+08,6.470000e+08,7.210000e+08,7.500000e+08,8.710000e+08,
4,Andorra,,,,,,,,,,...,3.271686e+09,2.789881e+09,2.896610e+09,3.000162e+09,3.218420e+09,3.155149e+09,2.891001e+09,3.324683e+09,3.380602e+09,3.727674e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,Virgin Islands (British),,,,,,,,,,...,,,,,,,,,,
211,Virgin Islands (U.S.),,,,,,,,,,...,3.565000e+09,3.663000e+09,3.798000e+09,3.794000e+09,3.923000e+09,4.121000e+09,4.189000e+09,4.444000e+09,,
212,Yemen,,,,,,,,,,...,4.322859e+10,4.244449e+10,3.131783e+10,2.684223e+10,2.160616e+10,,,,,
213,Zambia,7.130000e+08,6.962857e+08,6.931429e+08,7.187143e+08,8.394286e+08,1.082857e+09,1.264286e+09,1.368000e+09,1.605857e+09,...,2.714102e+10,2.125122e+10,2.095841e+10,2.587360e+10,2.631151e+10,2.330867e+10,1.813776e+10,2.209642e+10,2.916378e+10,2.816263e+10


population_df = ppl_gdp_cleaned.pivot(index="name", columns="year", values="人口").reset_index()
population_df

In [27]:
# gdp_df.to_csv("Data1/final/GDP.csv")
# population_df.to_csv("Data1/final/population.csv")

## 处理 Surface

In [3]:
surface = pd.read_csv('BuiltSurfaceCalc/surface_results_rastercalc_first/surface_statistics_iso_summary.csv')
surface.head(3)

Unnamed: 0,ISO_A3,NAM_0,year,total_surface
0,ABW,Aruba (Neth.),1975,3446472.0
1,ABW,Aruba (Neth.),1980,4812638.0
2,ABW,Aruba (Neth.),1985,6460605.5


In [4]:
# 用 map 快速添加列（需确保 CountryCode 是唯一键）
eng_name_map = iso.set_index("alpha-3")["name"].to_dict()
surface["Countries"] = surface["ISO_A3"].map(eng_name_map)
surface.head()

Unnamed: 0,ISO_A3,NAM_0,year,total_surface,Countries
0,ABW,Aruba (Neth.),1975,3446472.0,Aruba
1,ABW,Aruba (Neth.),1980,4812638.0,Aruba
2,ABW,Aruba (Neth.),1985,6460605.5,Aruba
3,ABW,Aruba (Neth.),1990,10701824.0,Aruba
4,ABW,Aruba (Neth.),1995,13598802.0,Aruba


In [6]:
surface_df = surface.pivot(index="Countries", columns="year", values="total_surface").reset_index()
surface_df

year,Countries,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020,2025,2030
0,,4.651911e+07,4.903417e+07,5.159184e+07,5.418677e+07,6.056185e+07,6.727144e+07,7.821310e+07,8.986252e+07,1.009455e+08,1.081945e+08,1.140575e+08,1.179649e+08
1,Afghanistan,2.240245e+08,2.499207e+08,2.860036e+08,3.288705e+08,3.564305e+08,3.925731e+08,4.323971e+08,4.883282e+08,5.597124e+08,6.501660e+08,6.974230e+08,7.265302e+08
2,Albania,1.195511e+08,1.285421e+08,1.376679e+08,1.469224e+08,1.655773e+08,1.857004e+08,2.053611e+08,2.259674e+08,2.455197e+08,2.588924e+08,2.671661e+08,2.722590e+08
3,Algeria,1.175932e+09,1.278680e+09,1.383762e+09,1.490991e+09,1.639678e+09,1.796494e+09,1.963683e+09,2.140430e+09,2.312012e+09,2.447871e+09,2.506240e+09,2.547816e+09
4,American Samoa,2.143546e+06,2.584431e+06,3.052325e+06,3.553728e+06,3.709371e+06,3.865298e+06,4.003578e+06,4.145944e+06,4.272875e+06,4.349306e+06,4.349306e+06,4.390132e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,Western Sahara,1.083601e+07,1.138981e+07,1.195084e+07,1.251772e+07,1.396999e+07,1.561607e+07,1.694237e+07,1.840189e+07,1.988653e+07,2.111370e+07,2.143331e+07,2.166018e+07
244,Yemen,1.065825e+08,1.257765e+08,1.522725e+08,1.838727e+08,1.965772e+08,2.118464e+08,2.236755e+08,2.382961e+08,2.571499e+08,2.872784e+08,3.023572e+08,3.132692e+08
245,Zambia,4.463435e+08,5.279606e+08,6.370592e+08,7.608071e+08,8.250948e+08,9.095973e+08,1.015371e+09,1.165021e+09,1.342878e+09,1.515173e+09,1.671511e+09,1.774634e+09
246,Zimbabwe,2.975937e+08,3.403089e+08,3.962399e+08,4.589045e+08,5.048160e+08,5.722144e+08,6.542210e+08,8.004373e+08,1.033890e+09,1.479128e+09,1.760298e+09,1.936121e+09


In [7]:
surface_df.to_csv("Data1/processed/SurfaceArea.csv")