In [68]:
import requests
import json
import time
import schedule
import logging
from typing import Dict, List, Any, Optional
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import pandas as pd
import os

In [66]:

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("world_bank_data.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class WorldBankDataFetcher:
    BASE_URL = "http://api.worldbank.org/v2"
    
    def __init__(self, output_dir: str = "world_bank_data"):
        """
        初始化世界银行数据获取器  
        Args:
            output_dir: 数据保存目录
        """
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        
    def get_indicators(self, per_page: int = 5000, page: int = 1) -> Dict[str, Any]:
        """
        获取世界银行指标列表
        Args:
            per_page: 每页返回的记录数
            page: 请求的页码
            
        Returns:
            包含指标信息的字典
        """
        url = f"{self.BASE_URL}/indicators"
        params = {
            "format": "json",
            "per_page": per_page,
            "page": page
        }
        
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            logger.error(f"获取指标列表失败: {e}")
            return {"error": str(e)}
    
    def get_all_indicators(self) -> List[Dict[str, Any]]:
        """
        获取所有世界银行指标
        
        Returns:
            包含所有指标的列表
        """
        all_indicators = []
        page = 1
        
        while True:
            result = self.get_indicators(page=page)
            if "error" in result or len(result) < 2:
                break
                
            indicators = result[1]
            all_indicators.extend(indicators)
            
            # 获取总页数
            total_pages = result[0]["pages"]
            if page >= total_pages:
                break
                
            page += 1
            # 避免频繁请求
            time.sleep(1)
            
        return all_indicators
    def get_all_country(self):
        # 获取国家列表
        params = {
            "format": "json",
            "per_page": 1000
        }
        response = requests.get("https://api.worldbank.org/v2/country",params =params)
        response.raise_for_status()
        country_json = response.json()
        country_list = [i.get("id") for i in country_json[1]]
        return country_list
    
    def get_indicator_data(self, indicator_id: str, country: str = "all", 
                           start_year: int = 1994, end_year: int = 2024) -> Dict[str, Any]:
        """
        获取特定指标的数据
        
        Args:
            indicator_id: 指标ID
            country: 国家代码或'all'表示所有国家
            start_year: 开始年份
            end_year: 结束年份
            
        Returns:
            包含指标数据的字典
        """
        url = f"{self.BASE_URL}/country/{country}/indicator/{indicator_id}"
        params = {
            "format": "json",
            "per_page": 6000,
            "date": f"{start_year}:{end_year}"
        }
        
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            logger.error(f"获取指标 {indicator_id} 数据失败: {e}")
            return {"error": str(e)}
    
    def fetch_and_save_data(self, indicator_ids: List[str], 
                            countries: List[str] = ["all"],
                            start_year: int = 2000, end_year: int = 2023) -> None:
        """
        获取并保存多个指标的数据
        
        Args:
            indicator_ids: 指标ID列表
            countries: 国家代码列表
            start_year: 开始年份
            end_year: 结束年份
        """
        if len(countries)>60:
            countries = [";".join(country_list[60*i:60*i+60]) for i in range(5)]
        else :
            countries = [";".join(country_list)]
        
        for indicator_id in tqdm(indicator_ids):
            for ind,country in enumerate(countries):
                # print(country)
                data = self.get_indicator_data(indicator_id, country, start_year, end_year)
                
                # 生成保存文件名
                filename = self.output_dir / f"{indicator_id}_{ind}_{start_year}_{end_year}.json" # _{country}
                
                try:
                    with open(filename, "w") as f:
                        json.dump(data, f, indent=2)
                #     logger.info(f"成功保存数据到 {filename}")
                except Exception as e:
                    continue
                    # logger.error(f"保存数据失败: {e}")            
            # 避免频繁请求
            # time.sleep(0.5)
    
    # 字典数据转为数据框    
    def convert_indicators_to_dataframe(self,indicators: List[Dict[str, Any]]) -> pd.DataFrame:
        """
        将指标数据转换为 Pandas 数据框
        
        Args:
            indicators: 指标数据列表
            
        Returns:
            包含指标信息的数据框
        """
        if not indicators:
            return pd.DataFrame()
            
        # 提取每个指标的关键信息
        data = []
        for indicator in indicators:
            # 确保数据格式正确
            if not isinstance(indicator, dict):
                continue
                
            # 提取基本信息
            indicator_info = {
                'id': indicator.get('id', ''),
                'name': indicator.get('name', ''),
                'unit': indicator.get('unit', ''),
                'source_id': indicator.get('source', {}).get('id', ''),
                'source_name': indicator.get('source', {}).get('value', ''),
                'source_note': indicator.get('sourceNote', ''),
                'source_organization': indicator.get('sourceOrganization', '')
            }
            
            # 提取主题信息
            topics = indicator.get('topics', [])
            if topics:
                # 获取所有主题名称，用逗号连接
                topic_names = ', '.join([topic.get('value', '') for topic in topics])
                indicator_info['topics'] = topic_names
            
            data.append(indicator_info)    
        # 创建数据框
        df = pd.DataFrame(data)
        return df

In [67]:
fetcher = WorldBankDataFetcher()
country_list = fetcher.get_all_country()
print(len(country_list))
# indicators = fetcher.get_all_indicators() # 2万多个指标太多了，筛选下
# 筛选一下所有WDI的指标
# df_indicators = convert_indicators_to_dataframe(indicators)
# df_indicators.to_excel("./世界银行指标数据.xlsx",index=False)
df_indicators = pd.read_excel("./世界银行指标数据.xlsx")
df_indicators = df_indicators[df_indicators['source_name'].isin(['WDI Database Archives','World Development Indicators'])]
indicators_json = dict(zip(df_indicators['id'],df_indicators['name']))
with open("指标名称.json", "w") as f:
    json.dump(indicators_json, f, indent=2)
sample_indicators = list(df_indicators['id'])
# 开始获取数据
fetcher.fetch_and_save_data(
    indicator_ids=sample_indicators,# 指标列表
    countries= country_list,# 国家列表，不能有错误的代码，否则无法返回
    start_year=1994,
    end_year=2024
    )  

296


  0%|                                                                                               | 0/2466 [00:00<?, ?it/s]

ABW;AFE;AFG;AFR;AFW;AGO;ALB;AND;ARB;ARE;ARG;ARM;ASM;ATG;AUS;AUT;AZE;BDI;BEA;BEC;BEL;BEN;BFA;BGD;BGR;BHI;BHR;BHS;BIH;BLA;BLR;BLZ;BMN;BMU;BOL;BRA;BRB;BRN;BSS;BTN;BWA;CAA;CAF;CAN;CEA;CEB;CEU;CHE;CHI;CHL;CHN;CIV;CLA;CME;CMR;COD;COG;COL;COM;CPV
CRI;CSA;CSS;CUB;CUW;CYM;CYP;CZE;DEA;DEC;DEU;DJI;DLA;DMA;DMN;DNK;DNS;DOM;DSA;DSF;DSS;DZA;EAP;EAR;EAS;ECA;ECS;ECU;EGY;EMU;ERI;ESP;EST;ETH;EUU;FCS;FIN;FJI;FRA;FRO;FSM;FXS;GAB;GBR;GEO;GHA;GIB;GIN;GMB;GNB;GNQ;GRC;GRD;GRL;GTM;GUM;GUY;HIC;HKG;HND
HPC;HRV;HTI;HUN;IBB;IBD;IBT;IDA;IDB;IDN;IDX;IMN;IND;INX;IRL;IRN;IRQ;ISL;ISR;ITA;JAM;JOR;JPN;KAZ;KEN;KGZ;KHM;KIR;KNA;KOR;KWT;LAC;LAO;LBN;LBR;LBY;LCA;LCN;LDC;LIC;LIE;LKA;LMC;LMY;LSO;LTE;LTU;LUX;LVA;MAC;MAF;MAR;MCO;MDA;MDE;MDG;MDV;MEA;MEX;MHL
MIC;MKD;MLI;MLT;MMR;MNA;MNE;MNG;MNP;MOZ;MRT;MUS;MWI;MYS;NAC;NAF;NAM;NCL;NER;NGA;NIC;NLD;NOR;NPL;NRS;NRU;NXS;NZL;OED;OMN;OSS;PAK;PAN;PER;PHL;PLW;PNG;POL;PRE;PRI;PRK;PRT;PRY;PSE;PSS;PST;PYF;QAT;ROU;RRS;RUS;RWA;SAS;SAU;SDN;SEN;SGP;SLB;SLE;SLV
SMR;SOM;SRB;SSA;SSD;SSF;SST;STP;SUR;SVK;

  0%|                                                                                     | 1/2466 [00:03<2:30:55,  3.67s/it]

ABW;AFE;AFG;AFR;AFW;AGO;ALB;AND;ARB;ARE;ARG;ARM;ASM;ATG;AUS;AUT;AZE;BDI;BEA;BEC;BEL;BEN;BFA;BGD;BGR;BHI;BHR;BHS;BIH;BLA;BLR;BLZ;BMN;BMU;BOL;BRA;BRB;BRN;BSS;BTN;BWA;CAA;CAF;CAN;CEA;CEB;CEU;CHE;CHI;CHL;CHN;CIV;CLA;CME;CMR;COD;COG;COL;COM;CPV
CRI;CSA;CSS;CUB;CUW;CYM;CYP;CZE;DEA;DEC;DEU;DJI;DLA;DMA;DMN;DNK;DNS;DOM;DSA;DSF;DSS;DZA;EAP;EAR;EAS;ECA;ECS;ECU;EGY;EMU;ERI;ESP;EST;ETH;EUU;FCS;FIN;FJI;FRA;FRO;FSM;FXS;GAB;GBR;GEO;GHA;GIB;GIN;GMB;GNB;GNQ;GRC;GRD;GRL;GTM;GUM;GUY;HIC;HKG;HND
HPC;HRV;HTI;HUN;IBB;IBD;IBT;IDA;IDB;IDN;IDX;IMN;IND;INX;IRL;IRN;IRQ;ISL;ISR;ITA;JAM;JOR;JPN;KAZ;KEN;KGZ;KHM;KIR;KNA;KOR;KWT;LAC;LAO;LBN;LBR;LBY;LCA;LCN;LDC;LIC;LIE;LKA;LMC;LMY;LSO;LTE;LTU;LUX;LVA;MAC;MAF;MAR;MCO;MDA;MDE;MDG;MDV;MEA;MEX;MHL
MIC;MKD;MLI;MLT;MMR;MNA;MNE;MNG;MNP;MOZ;MRT;MUS;MWI;MYS;NAC;NAF;NAM;NCL;NER;NGA;NIC;NLD;NOR;NPL;NRS;NRU;NXS;NZL;OED;OMN;OSS;PAK;PAN;PER;PHL;PLW;PNG;POL;PRE;PRI;PRK;PRT;PRY;PSE;PSS;PST;PYF;QAT;ROU;RRS;RUS;RWA;SAS;SAU;SDN;SEN;SGP;SLB;SLE;SLV
SMR;SOM;SRB;SSA;SSD;SSF;SST;STP;SUR;SVK;

  0%|                                                                                     | 2/2466 [00:07<2:45:08,  4.02s/it]

ABW;AFE;AFG;AFR;AFW;AGO;ALB;AND;ARB;ARE;ARG;ARM;ASM;ATG;AUS;AUT;AZE;BDI;BEA;BEC;BEL;BEN;BFA;BGD;BGR;BHI;BHR;BHS;BIH;BLA;BLR;BLZ;BMN;BMU;BOL;BRA;BRB;BRN;BSS;BTN;BWA;CAA;CAF;CAN;CEA;CEB;CEU;CHE;CHI;CHL;CHN;CIV;CLA;CME;CMR;COD;COG;COL;COM;CPV


  0%|                                                                                     | 2/2466 [00:08<3:02:45,  4.45s/it]


KeyboardInterrupt: 

# 整理本地json文件为一个csv

In [7]:
def json_to_dataframe(folder_path):
    all_data = []
    # 遍历文件夹中的所有.json文件
    for filename in tqdm(os.listdir(folder_path)):
        if filename.endswith('.json'):
            file_path = os.path.join(folder_path, filename)
            # print(file_path)
            with open(file_path, 'r', encoding='utf-8') as f:
                json_data = json.load(f)
                if len(json_data)<=1:
                    continue
                # 提取数据部分（假设第一个元素是元数据，后续是数据条目）
                data_entries = json_data[1]  # 跳过第一个元数据字典
                # print(len(data_entries))
                # 展开嵌套结构并转换为DataFrame
                df = pd.json_normalize(data_entries, 
                                       record_path=None, 
                                       meta=[
                                           ['indicator', 'id'], ['indicator', 'value'],
                                           ['country', 'id'], ['country', 'value'],
                                           'countryiso3code', 'date', 'value', 'unit', 
                                           'obs_status', 'decimal'
                                       ])
                
                all_data.append(df)    
    # 合并所有DataFrame
    final_df = pd.concat(all_data, ignore_index=True)
    # 重命名列（可选，根据需要调整）
    final_df = final_df.rename(columns={
        'indicator.id': 'indicator_id',
        'indicator.value': 'indicator_description',
        'country.id': 'country_id',
        'country.value': 'country_name'
    })
    return final_df

# 使用示例
folder_path = './world_bank_data/'  # 替换为实际文件夹路径
result_df = json_to_dataframe(folder_path)
print(result_df.head())
result_df.to_csv('./所有世界银行数据.csv',index=False)

100%|████████████████████████████████████████████████████████████████████████████| 12330/12330 [04:55<00:00, 41.72it/s]
  final_df = pd.concat(all_data, ignore_index=True)


  countryiso3code  date  value unit obs_status  decimal       indicator_id  \
0             ABW  2024    NaN                        1  AG.CON.FERT.PT.ZS   
1             ABW  2023    NaN                        1  AG.CON.FERT.PT.ZS   
2             ABW  2022    NaN                        1  AG.CON.FERT.PT.ZS   
3             ABW  2021    NaN                        1  AG.CON.FERT.PT.ZS   
4             ABW  2020    NaN                        1  AG.CON.FERT.PT.ZS   

                               indicator_description country_id country_name  
0  Fertilizer consumption (% of fertilizer produc...         AW        Aruba  
1  Fertilizer consumption (% of fertilizer produc...         AW        Aruba  
2  Fertilizer consumption (% of fertilizer produc...         AW        Aruba  
3  Fertilizer consumption (% of fertilizer produc...         AW        Aruba  
4  Fertilizer consumption (% of fertilizer produc...         AW        Aruba  


./world_bank_data/IC.BRE.BI.OS_all_1994_2024.json
./world_bank_data/IC.BRE.BI.P1_all_1994_2024.json
./world_bank_data/IC.BRE.BI.P2_all_1994_2024.json
./world_bank_data/IC.BRE.BI.P3_all_1994_2024.json
./world_bank_data/IC.BRE.BL.OS_all_1994_2024.json
./world_bank_data/IC.BRE.BL.P1_all_1994_2024.json
./world_bank_data/IC.BRE.BL.P2_all_1994_2024.json
./world_bank_data/IC.BRE.BL.P3_all_1994_2024.json
./world_bank_data/IC.BRE.DR.OS_all_1994_2024.json
./world_bank_data/IC.BRE.DR.P1_all_1994_2024.json
./world_bank_data/IC.BRE.DR.P2_all_1994_2024.json
./world_bank_data/IC.BRE.DR.P3_all_1994_2024.json
./world_bank_data/IC.BRE.FS.OS_all_1994_2024.json
./world_bank_data/IC.BRE.FS.P1_all_1994_2024.json
./world_bank_data/IC.BRE.FS.P2_all_1994_2024.json
./world_bank_data/IC.BRE.FS.P3_all_1994_2024.json
./world_bank_data/IC.BRE.IT.OS_all_1994_2024.json
./world_bank_data/IC.BRE.IT.P1_all_1994_2024.json
./world_bank_data/IC.BRE.IT.P2_all_1994_2024.json
./world_bank_data/IC.BRE.IT.P3_all_1994_2024.json


./world_bank_data/IQ.SPI.PIL3_all_1994_2024.json
./world_bank_data/IQ.SPI.PIL4_all_1994_2024.json
./world_bank_data/IQ.SPI.PIL5_all_1994_2024.json
./world_bank_data/IQ.WEF.CUST.XQ_all_1994_2024.json
./world_bank_data/IQ.WEF.PORT.XQ_all_1994_2024.json
./world_bank_data/IS.AIR.DPRT.P3_all_1994_2024.json
./world_bank_data/IS.AIR.DPRT_all_1994_2024.json
./world_bank_data/IS.AIR.GOOD.MT.K1_all_1994_2024.json
./world_bank_data/IS.AIR.PSGR.P3_all_1994_2024.json
./world_bank_data/IS.AIR.PSGR_all_1994_2024.json
./world_bank_data/IS.ROD.DESL.KT_all_1994_2024.json
./world_bank_data/IS.ROD.DESL.PC_all_1994_2024.json
./world_bank_data/IS.ROD.ENGY.KT_all_1994_2024.json
./world_bank_data/IS.ROD.ENGY.PC_all_1994_2024.json
./world_bank_data/IS.ROD.ENGY.ZS_all_1994_2024.json
./world_bank_data/IS.ROD.NORM.XD_all_1994_2024.json
./world_bank_data/IS.ROD.SGAS.KT_all_1994_2024.json
./world_bank_data/IS.ROD.SGAS.PC_all_1994_2024.json
./world_bank_data/IS.ROD.TRAF_all_1994_2024.json
./world_bank_data/IS.RRS.DE

./world_bank_data/NE.DAB.TOTL.ZS_all_1994_2024.json
./world_bank_data/NE.EXP.CAPM.KN_all_1994_2024.json
./world_bank_data/NE.EXP.GNFS.CD_all_1994_2024.json
./world_bank_data/NE.EXP.GNFS.CN_all_1994_2024.json
./world_bank_data/NE.EXP.GNFS.KD.87_all_1994_2024.json
./world_bank_data/NE.EXP.GNFS.KD.ZG_all_1994_2024.json
./world_bank_data/NE.EXP.GNFS.KD_all_1994_2024.json
./world_bank_data/NE.EXP.GNFS.KN.87.ZG_all_1994_2024.json
./world_bank_data/NE.EXP.GNFS.KN.87_all_1994_2024.json
./world_bank_data/NE.EXP.GNFS.KN_all_1994_2024.json
./world_bank_data/NE.EXP.GNFS.ZS_all_1994_2024.json
./world_bank_data/NE.EXP.TTEF.KN_all_1994_2024.json
./world_bank_data/NE.GDI.FIXD.CN_all_1994_2024.json
./world_bank_data/NE.GDI.FIXD.KN_all_1994_2024.json
./world_bank_data/NE.GDI.FPRV.CN_all_1994_2024.json
./world_bank_data/NE.GDI.FPRV.GI.ZS.IC_all_1994_2024.json
./world_bank_data/NE.GDI.FPRV.GI.ZS_all_1994_2024.json
./world_bank_data/NE.GDI.FPRV.IC.ZS_all_1994_2024.json
./world_bank_data/NE.GDI.FPRV.IFC.ZS_

./world_bank_data/NY.ADJ.NNTY.KD.ZG_all_1994_2024.json
./world_bank_data/NY.ADJ.NNTY.KD_all_1994_2024.json
./world_bank_data/NY.ADJ.NNTY.PC.CD_all_1994_2024.json
./world_bank_data/NY.ADJ.NNTY.PC.KD.ZG_all_1994_2024.json
./world_bank_data/NY.ADJ.NNTY.PC.KD_all_1994_2024.json
./world_bank_data/NY.ADJ.SVNG.CD_all_1994_2024.json
./world_bank_data/NY.ADJ.SVNG.GN.ZS_all_1994_2024.json
./world_bank_data/NY.ADJ.SVNX.CD_all_1994_2024.json
./world_bank_data/NY.ADJ.SVNX.GN.ZS_all_1994_2024.json
./world_bank_data/NY.EXP.CAPM.KD.87_all_1994_2024.json
./world_bank_data/NY.EXP.CAPM.KN.87_all_1994_2024.json
./world_bank_data/NY.EXP.CAPM.KN_all_1994_2024.json
./world_bank_data/NY.GDP.COAL.RT.ZS_all_1994_2024.json
./world_bank_data/NY.GDP.DEFL.87.ZG_all_1994_2024.json
./world_bank_data/NY.GDP.DEFL.KD.ZG.AD_all_1994_2024.json
./world_bank_data/NY.GDP.DEFL.KD.ZG_all_1994_2024.json
./world_bank_data/NY.GDP.DEFL.ZS.87_all_1994_2024.json
./world_bank_data/NY.GDP.DEFL.ZS.AD_all_1994_2024.json
./world_bank_dat

./world_bank_data/per_sa_allsa.cov_q5_tot_all_1994_2024.json
./world_bank_data/per_si_allsi.adq_pop_tot_all_1994_2024.json
./world_bank_data/per_si_allsi.ben_q1_tot_all_1994_2024.json
./world_bank_data/per_si_allsi.cov_pop_tot_all_1994_2024.json
./world_bank_data/per_si_allsi.cov_q1_tot_all_1994_2024.json
./world_bank_data/per_si_allsi.cov_q2_tot_all_1994_2024.json
./world_bank_data/per_si_allsi.cov_q3_tot_all_1994_2024.json
./world_bank_data/per_si_allsi.cov_q4_tot_all_1994_2024.json
./world_bank_data/per_si_allsi.cov_q5_tot_all_1994_2024.json
./world_bank_data/PV.EST_all_1994_2024.json
./world_bank_data/PV.NO.SRC_all_1994_2024.json
./world_bank_data/PV.PER.RNK.LOWER_all_1994_2024.json
./world_bank_data/PV.PER.RNK.UPPER_all_1994_2024.json
./world_bank_data/PV.PER.RNK_all_1994_2024.json
./world_bank_data/PV.STD.ERR_all_1994_2024.json
./world_bank_data/PX.REX.REER_all_1994_2024.json
./world_bank_data/RL.EST_all_1994_2024.json
./world_bank_data/RL.NO.SRC_all_1994_2024.json
./world_bank_d

./world_bank_data/SE.SEC.ENRL_all_1994_2024.json
./world_bank_data/SE.SEC.ENRR.FE_all_1994_2024.json
./world_bank_data/SE.SEC.ENRR.MA_all_1994_2024.json
./world_bank_data/SE.SEC.ENRR.MF_all_1994_2024.json
./world_bank_data/SE.SEC.ENRR_all_1994_2024.json
./world_bank_data/SE.SEC.NENR.FE_all_1994_2024.json
./world_bank_data/SE.SEC.NENR.MA_all_1994_2024.json
./world_bank_data/SE.SEC.NENR_all_1994_2024.json
./world_bank_data/SE.SEC.PRIV.ZS_all_1994_2024.json
./world_bank_data/SE.SEC.PROG.FE.ZS_all_1994_2024.json
./world_bank_data/SE.SEC.PROG.MA.ZS_all_1994_2024.json
./world_bank_data/SE.SEC.PROG.ZS_all_1994_2024.json
./world_bank_data/SE.SEC.REPT.FE.ZS_all_1994_2024.json
./world_bank_data/SE.SEC.REPT.MA.ZS_all_1994_2024.json
./world_bank_data/SE.SEC.REPT.ZS_all_1994_2024.json
./world_bank_data/SE.SEC.TCAQ.FE.ZS_all_1994_2024.json
./world_bank_data/SE.SEC.TCAQ.LO.FE.ZS_all_1994_2024.json
./world_bank_data/SE.SEC.TCAQ.LO.MA.ZS_all_1994_2024.json
./world_bank_data/SE.SEC.TCAQ.LO.ZS_all_1994_2

./world_bank_data/SH.H2O.BASW.RU.ZS_all_1994_2024.json
./world_bank_data/SH.H2O.BASW.UR.ZS_all_1994_2024.json
./world_bank_data/SH.H2O.BASW.ZS_all_1994_2024.json
./world_bank_data/SH.H2O.SAFE.RU.ZS_all_1994_2024.json
./world_bank_data/SH.H2O.SAFE.UR.ZS_all_1994_2024.json
./world_bank_data/SH.H2O.SAFE.ZS_all_1994_2024.json
./world_bank_data/SH.H2O.SMDW.RU.ZS_all_1994_2024.json
./world_bank_data/SH.H2O.SMDW.UR.ZS_all_1994_2024.json
./world_bank_data/SH.H2O.SMDW.ZS_all_1994_2024.json
./world_bank_data/SH.HIV.0014_all_1994_2024.json
./world_bank_data/SH.HIV.1524.FE.ZS_all_1994_2024.json
./world_bank_data/SH.HIV.1524.MA.ZS_all_1994_2024.json
./world_bank_data/SH.HIV.ARTC.ZS_all_1994_2024.json
./world_bank_data/SH.HIV.INCD.14_all_1994_2024.json
./world_bank_data/SH.HIV.INCD.TL.P3_all_1994_2024.json
./world_bank_data/SH.HIV.INCD.TL_all_1994_2024.json
./world_bank_data/SH.HIV.INCD.YG.P3_all_1994_2024.json
./world_bank_data/SH.HIV.INCD.YG_all_1994_2024.json
./world_bank_data/SH.HIV.INCD.ZS_all_

./world_bank_data/SI.POV.GAP2_all_1994_2024.json
./world_bank_data/SI.POV.GAPS_all_1994_2024.json
./world_bank_data/SI.POV.GINI_all_1994_2024.json
./world_bank_data/SI.POV.LMIC.GP_all_1994_2024.json
./world_bank_data/SI.POV.LMIC_all_1994_2024.json
./world_bank_data/SI.POV.MPUN_all_1994_2024.json
./world_bank_data/SI.POV.MPWB_all_1994_2024.json
./world_bank_data/SI.POV.NAGP_all_1994_2024.json
./world_bank_data/SI.POV.NAHC_all_1994_2024.json
./world_bank_data/SI.POV.NOP1_all_1994_2024.json
./world_bank_data/SI.POV.RUGP_all_1994_2024.json
./world_bank_data/SI.POV.RUHC_all_1994_2024.json
./world_bank_data/SI.POV.SOPO_all_1994_2024.json
./world_bank_data/SI.POV.UMIC.GP_all_1994_2024.json
./world_bank_data/SI.POV.UMIC_all_1994_2024.json
./world_bank_data/SI.POV.URGP_all_1994_2024.json
./world_bank_data/SI.POV.URHC_all_1994_2024.json
./world_bank_data/SI.RMT.COST.IB.ZS_all_1994_2024.json
./world_bank_data/SI.RMT.COST.OB.ZS_all_1994_2024.json
./world_bank_data/SI.RMT.COST.ZS_all_1994_2024.json

./world_bank_data/SL.UEM.1524.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.ADVN.FE.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.ADVN.MA.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.ADVN.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.BASC.FE.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.BASC.MA.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.BASC.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.INTM.FE.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.INTM.MA.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.INTM.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.LTRM.FE.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.LTRM.MA.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.LTRM.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.NEET.FE.ME.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.NEET.FE.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.NEET.MA.ME.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.NEET.MA.ZS_all_1994_2024.json
./world_bank_data/SL.UEM.NEET.ME.ZS_all_1994_2024.json
./world_bank_data/S

./world_bank_data/SP.POP.SCIE.RD.P6_all_1994_2024.json
./world_bank_data/SP.POP.SCIE.RD_all_1994_2024.json
./world_bank_data/SP.POP.TECH.RD.P6_all_1994_2024.json
./world_bank_data/SP.POP.TECH.RD_all_1994_2024.json
./world_bank_data/SP.POP.TOTL.FE.IN_all_1994_2024.json
./world_bank_data/SP.POP.TOTL.FE.ZS_all_1994_2024.json
./world_bank_data/SP.POP.TOTL.MA.IN_all_1994_2024.json
./world_bank_data/SP.POP.TOTL.MA.ZS_all_1994_2024.json
./world_bank_data/SP.POP.TOTL_all_1994_2024.json
./world_bank_data/SP.REG.BRTH.FE.ZS_all_1994_2024.json
./world_bank_data/SP.REG.BRTH.MA.ZS_all_1994_2024.json
./world_bank_data/SP.REG.BRTH.RU.ZS_all_1994_2024.json
./world_bank_data/SP.REG.BRTH.UR.ZS_all_1994_2024.json
./world_bank_data/SP.REG.BRTH.ZS_all_1994_2024.json
./world_bank_data/SP.REG.DTHS.ZS_all_1994_2024.json
./world_bank_data/SP.RUR.TOTL.ZG_all_1994_2024.json
./world_bank_data/SP.RUR.TOTL.ZS_all_1994_2024.json
./world_bank_data/SP.RUR.TOTL_all_1994_2024.json
./world_bank_data/SP.URB.GROW_all_1994_2

./world_bank_data/TX.VAL.MRCH.R2.ZS_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.R3.CD_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.R3.ZS_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.R4.CD_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.R4.ZS_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.R5.CD_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.R5.ZS_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.R6.CD_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.R6.ZS_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.RS.ZS_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.WL.CD_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.WR.CD_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.WR.ZS_all_1994_2024.json
./world_bank_data/TX.VAL.MRCH.XD.WD_all_1994_2024.json
./world_bank_data/TX.VAL.NFOD.UN.ZS_all_1994_2024.json
./world_bank_data/TX.VAL.NFPP.CD_all_1994_2024.json
./world_bank_data/TX.VAL.NFPR.CD_all_1994_2024.json
./world_bank_data/TX.VAL.OPRM.UN.ZS_all_1994_2024.json
./world_bank_dat

# 处理成面板数据并筛选缺失值少于50%的指标

In [None]:
result_df = pd.read_csv('./所有世界银行数据.csv')
result_df.head()
result_df = result_df[['countryiso3code','date','indicator_id','country_id','country_name','value']]
df = result_df.pivot_table(index=['countryiso3code','country_id','country_name','date'],
                      columns='indicator_id',
                      values='value')
print(df.shape)

In [52]:
stat_na = df.isna().sum()/df.shape[0]
filter_cols = list(stat_na[stat_na>0.5].index)
df0 = df[filter_cols]
df0 = df0.reset_index()
df0.to_csv('./过滤后的数据.csv',index=False)