In [35]:
import wbdata
import pandas as pd
import requests
from datetime import datetime
# Retrieve indicators
result = wbdata.get_indicators()

# Convert the result to a DataFrame
df = pd.DataFrame(result)

# Display the first few rows of the DataFrame
print(df.head())

# Filter indicators related to debt
gender_indicators = df[df['name'].str.contains('gender', case=False, na=False)]

# Save the DataFrame to a CSV file
gender_indicators.to_csv('gender_indicators.csv', index=False)

print("CSV file 'gender_indicators.csv' has been created.")

                     id                                     name unit  \
0    1.0.HCount.1.90usd          Poverty Headcount ($1.90 a day)        
1     1.0.HCount.2.5usd          Poverty Headcount ($2.50 a day)        
2  1.0.HCount.Mid10to50    Middle Class ($10-50 a day) Headcount        
3       1.0.HCount.Ofcl  Official Moderate Poverty Rate-National        
4   1.0.HCount.Poor4uds             Poverty Headcount ($4 a day)        

                                    source  \
0  {'id': '37', 'value': 'LAC Equity Lab'}   
1  {'id': '37', 'value': 'LAC Equity Lab'}   
2  {'id': '37', 'value': 'LAC Equity Lab'}   
3  {'id': '37', 'value': 'LAC Equity Lab'}   
4  {'id': '37', 'value': 'LAC Equity Lab'}   

                                          sourceNote  \
0  The poverty headcount index measures the propo...   
1  The poverty headcount index measures the propo...   
2  The poverty headcount index measures the propo...   
3  The poverty headcount index measures the propo...   
4  T

In [36]:
gender_indicators_df = pd.read_csv('gender_indicators.csv')

# Inspect the DataFrame to understand its structure
print(gender_indicators_df.head())

                     id                                               name  \
0           2.3_GIR.GPI  Gender parity index for gross intake ratio in ...   
1           2.6_PCR.GPI   Gender parity index for primary completion rate    
2  BI.EMP.PWRK.PB.FE.ZS  Public sector employment, as a share of paid e...   
3  BI.EMP.PWRK.PB.MA.ZS  Public sector employment, as a share of paid e...   
4  BI.EMP.TOTL.PB.FE.ZS  Public sector employment, as a share of total ...   

   unit                                             source  \
0   NaN  {'id': '34', 'value': 'Global Partnership for ...   
1   NaN  {'id': '34', 'value': 'Global Partnership for ...   
2   NaN  {'id': '64', 'value': 'Worldwide Bureaucracy I...   
3   NaN  {'id': '64', 'value': 'Worldwide Bureaucracy I...   
4   NaN  {'id': '64', 'value': 'Worldwide Bureaucracy I...   

                                          sourceNote  \
0  Ratio of female to male values of gross intake...   
1  Ratio of female to male values of Primary C

In [37]:
# Extract IDs from the DataFrame
indicator_ids = gender_indicators_df['id'].tolist()

In [39]:
gender_indicators_df = pd.read_csv('gender_indicators.csv')

def fetch_indicator_data(indicator_id):
    url = f"http://api.worldbank.org/v2/indicator/{indicator_id}?format=json&per_page=50000"
    response = requests.get(url)
    response.raise_for_status()
    data = response.json()
    # Print the first few entries to inspect the structure
    print(data)
    return data

def extract_annual_data(data, indicator_id):
    annual_data = {}
    if data and isinstance(data, list) and len(data) > 1:
        indicator_data = data[1]  # The actual data is usually in the second item
        for entry in indicator_data:
            # Check for available keys
            year = entry.get('date')
            amount = entry.get('value')
            if year and amount is not None:
                if year not in annual_data:
                    annual_data[year] = {}
                annual_data[year][indicator_id] = amount
    return annual_data

def organize_data(indicator_ids):
    all_data = {}
    
    for indicator_id in indicator_ids:
        try:
            data = fetch_indicator_data(indicator_id)
            annual_data = extract_annual_data(data, indicator_id)
            for year, values in annual_data.items():
                if year not in all_data:
                    all_data[year] = {}
                all_data[year][indicator_id] = values.get(indicator_id, None)
        except requests.HTTPError as e:
            print(f"Failed to fetch data for indicator ID {indicator_id}: {e}")
    
    # Convert to DataFrame
    df = pd.DataFrame.from_dict(all_data, orient='index').sort_index()
    df.reset_index(inplace=True)
    df.rename(columns={'index': 'Year'}, inplace=True)
    return df

# Extract indicator IDs
indicator_ids = gender_indicators_df['id'].tolist()

# Organize data
data_df = organize_data(indicator_ids)

[{'page': 1, 'pages': 1, 'per_page': '50000', 'total': 1}, [{'id': '2.3_GIR.GPI', 'name': 'Gender parity index for gross intake ratio in grade 1', 'unit': '', 'source': {'id': '34', 'value': 'Global Partnership for Education'}, 'sourceNote': 'Ratio of female to male values of gross intake ratio for primary first grade. Country-specific definition, method and targets are determined by countries themselves. ', 'sourceOrganization': 'Data were collected from national and other publicly available sources, and validated by the Local Education Group (LEG) in each country. LEGs are typically led by the Ministry of Education and include development partners and other education stakeholders. Data were not processed or analyzed by the Global Partnership for Education. It is reported as it was presented in the original sources, or as it was communicated to us through the Coordinating Agency or Lead Donor of the LEG.', 'topics': [{}]}]]
[{'page': 1, 'pages': 1, 'per_page': '50000', 'total': 1}, [{

In [40]:
data_df.head()

Unnamed: 0,Year
