In [1]:
import requests
import pandas as pd
from typing import List, Union
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Constants
WORLD_BANK_URL = 'http://api.worldbank.org/v2'

def fetch_world_bank_data(indicator: str, countries: Union[str, List[str]], start_year: int = 1960, end_year: int = None) -> pd.DataFrame:
    """Fetch data for specified years and countries for a given indicator from the World Bank API."""
    if end_year is None:
        end_year = pd.Timestamp.now().year
    
    countries_str = ';'.join(countries) if isinstance(countries, list) else countries
    
    url = f"{WORLD_BANK_URL}/country/{countries_str}/indicator/{indicator}"
    params = {
        'format': 'json',
        'per_page': 10000,  
        'date': f"{start_year}:{end_year}"
    }
    
    all_data = []
    page = 1
    
    while True:
        params['page'] = page
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            data = response.json()
            
            if not data or len(data) < 2 or not data[1]:
                break
            
            all_data.extend(data[1])
            
            if len(data[1]) < params['per_page']:
                break
            
            page += 1
        except requests.RequestException as e:
            logger.error(f"Error fetching data: {str(e)}")
            break
    
    return process_world_bank_data(all_data, indicator)

def process_world_bank_data(data: List[dict], indicator: str) -> pd.DataFrame:
    """Process the fetched World Bank data into a DataFrame."""
    if not data:
        logger.warning(f"No data retrieved for indicator: {indicator}")
        return pd.DataFrame()

    df = pd.DataFrame(data)
    
    df['country_name'] = df['country'].apply(lambda x: x['value'] if isinstance(x, dict) else x)
    df['value'] = pd.to_numeric(df['value'], errors='coerce')
    df['date'] = pd.to_datetime(df['date'], format='%Y')
    
    df = df.drop(columns=['indicator', 'obs_status', 'decimal', 'country', 'unit'])
    df = df.rename(columns={'countryiso3code': 'country_code', 'date': 'year', 'value': indicator})
    
    return df.set_index(['country_name', 'country_code', 'year']).sort_index()

def get_world_bank_data(indicator: str, countries: Union[str, List[str]], start_year: int = 1960, end_year: int = None) -> pd.DataFrame:
    """Fetch and process World Bank data for a given indicator."""
    try:
        df = fetch_world_bank_data(indicator, countries, start_year, end_year)
        logger.info(f"Successfully retrieved data for {indicator}")
        return df
    except Exception as e:
        logger.exception(f"Error retrieving data for {indicator}: {str(e)}")
        return pd.DataFrame()

In [2]:
#Example
if __name__ == "__main__":
    indicator = "NY.GDP.PCAP.CD"
    countries = ["USA", "UKR", "JPN"]
    start_year = 1960
    end_year = 2020
    df = get_world_bank_data(indicator, countries, start_year, end_year)
    print(df.head())

2024-08-05 17:00:57,295 - INFO - Successfully retrieved data for NY.GDP.PCAP.CD


                                      NY.GDP.PCAP.CD
country_name country_code year                      
Japan        JPN          1960-01-01      508.702779
                          1961-01-01      608.864581
                          1962-01-01      684.565510
                          1963-01-01      775.592123
                          1964-01-01      902.867722


In [3]:
import pandas as pd

# List of African countries
african_countries = ['ZAF', 'NGA', 'EGY', 'KEN', 'ETH', 'GHA', 'TZA', 'UGA', 'CIV', 'CMR']

# Dictionary of indicators with their standard names
indicator_names = {
    'NY.GDP.MKTP.CD': 'GDP (current US$)',
    'NY.GDP.PCAP.CD': 'GDP per capita (current US$)',
    'NY.GDP.MKTP.KD.ZG': 'GDP growth (annual %)',
    'FP.CPI.TOTL.ZG': 'Inflation, consumer prices (annual %)',
    'DT.DOD.DECT.CD': 'External debt stocks, total (DOD, current US$)',
    'SI.POV.NAHC': 'Poverty headcount ratio at national poverty lines (% of population)',
    'SI.POV.DDAY': 'Poverty headcount ratio at $2.15 a day (2017 PPP) (% of population)',
    'SH.DYN.MORT': 'Mortality rate, under-5 (per 1,000 live births)',
    'SH.STA.MMRT': 'Maternal mortality ratio (modeled estimate, per 100,000 live births)',
    'SH.HIV.INCD.ZS': 'Incidence of HIV (% of uninfected population ages 15-49)',
    'SH.IMM.MEAS': 'Immunization, measles (% of children ages 12-23 months)',
    'SE.PRM.ENRR': 'School enrollment, primary (% gross)',
    'SE.SEC.ENRR': 'School enrollment, secondary (% gross)',
    'SE.ADT.LITR.ZS': 'Literacy rate, adult total (% of people ages 15 and above)',
    'SG.GEN.PARL.ZS': 'Proportion of seats held by women in national parliaments (%)',
    'SL.TLF.CACT.FE.ZS': 'Labor force participation rate, female (% of female population ages 15+)',
    'SH.H2O.SMDW.ZS': 'People using safely managed drinking water services (% of population)',
    'SH.STA.SMSS.ZS': 'People using safely managed sanitation services (% of population)',
    'EG.ELC.ACCS.ZS': 'Access to electricity (% of population)',
    'EG.USE.ELEC.KH.PC': 'Electric power consumption (kWh per capita)',
    'SL.EMP.VULN.ZS': 'Vulnerable employment, total (% of total employment)',
    'SL.UEM.TOTL.ZS': 'Unemployment, total (% of total labor force)',
    'IT.NET.USER.ZS': 'Individuals using the Internet (% of population)',
    'IT.CEL.SETS.P2': 'Mobile cellular subscriptions (per 100 people)',
    'EN.ATM.CO2E.PC': 'CO2 emissions (metric tons per capita)',
    'AG.LND.FRST.ZS': 'Forest area (% of land area)',
    'AG.YLD.CREL.KG': 'Cereal yield (kg per hectare)',
    'SN.ITK.DEFC.ZS': 'Prevalence of undernourishment (% of population)',
    'FX.OWN.TOTL.ZS': 'Account ownership at a financial institution or with a mobile-money-service provider (% of population ages 15+)'
}

# Dictionary of indicators
indicators = {
    'Economic Growth': ['NY.GDP.MKTP.CD', 'NY.GDP.PCAP.CD', 'NY.GDP.MKTP.KD.ZG'],
    'Liquidity': ['FP.CPI.TOTL.ZG', 'DT.DOD.DECT.CD'],
    'Poverty and Inequality': ['SI.POV.NAHC', 'SI.POV.DDAY'],
    'Health': ['SH.DYN.MORT', 'SH.STA.MMRT', 'SH.HIV.INCD.ZS', 'SH.IMM.MEAS'],
    'Education': ['SE.PRM.ENRR', 'SE.SEC.ENRR', 'SE.ADT.LITR.ZS'],
    'Gender Equality': ['SG.GEN.PARL.ZS', 'SL.TLF.CACT.FE.ZS'],
    'Water and Sanitation': ['SH.H2O.SMDW.ZS', 'SH.STA.SMSS.ZS'],
    'Energy': ['EG.ELC.ACCS.ZS', 'EG.USE.ELEC.KH.PC'],
    'Employment and Decent Work': ['SL.EMP.VULN.ZS', 'SL.UEM.TOTL.ZS'],
    'Infrastructure and Innovation': ['IT.NET.USER.ZS', 'IT.CEL.SETS.P2'],
    'Climate Action': ['EN.ATM.CO2E.PC', 'AG.LND.FRST.ZS'],
    'Agriculture and Food Security': ['AG.YLD.CREL.KG', 'SN.ITK.DEFC.ZS'],
    'Financial Inclusion': ['FX.OWN.TOTL.ZS']
}

# Fetch data for all indicators and rename them
data = {}
for category, indicator_list in indicators.items():
    for indicator in indicator_list:
        df = get_world_bank_data(indicator, african_countries)
        df.columns = [indicator_names[indicator]]  # Rename the column
        data[indicator_names[indicator]] = df  # Use the standard name as the key

# Update the indicators dictionary to use standard names
indicators_standard = {category: [indicator_names[ind] for ind in indicator_list] 
                       for category, indicator_list in indicators.items()}

2024-08-05 17:00:57,689 - INFO - Successfully retrieved data for NY.GDP.MKTP.CD
2024-08-05 17:00:57,982 - INFO - Successfully retrieved data for NY.GDP.PCAP.CD
2024-08-05 17:00:58,276 - INFO - Successfully retrieved data for NY.GDP.MKTP.KD.ZG
2024-08-05 17:00:58,609 - INFO - Successfully retrieved data for FP.CPI.TOTL.ZG
2024-08-05 17:00:58,913 - INFO - Successfully retrieved data for DT.DOD.DECT.CD
2024-08-05 17:00:59,171 - INFO - Successfully retrieved data for SI.POV.NAHC
2024-08-05 17:00:59,528 - INFO - Successfully retrieved data for SI.POV.DDAY
2024-08-05 17:00:59,835 - INFO - Successfully retrieved data for SH.DYN.MORT
2024-08-05 17:01:00,144 - INFO - Successfully retrieved data for SH.STA.MMRT
2024-08-05 17:01:00,450 - INFO - Successfully retrieved data for SH.HIV.INCD.ZS
2024-08-05 17:01:00,710 - INFO - Successfully retrieved data for SH.IMM.MEAS
2024-08-05 17:01:00,970 - INFO - Successfully retrieved data for SE.PRM.ENRR
2024-08-05 17:01:01,219 - INFO - Successfully retrieved

In [10]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objs as go
import pandas as pd
import numpy as np

data = {indicator: pd.DataFrame(np.random.randn(10, 10), 
                                columns=african_countries, 
                                index=pd.date_range(start='2014', periods=10, freq='YE'))
        for indicator in sum(indicators_standard.values(), [])}

app = dash.Dash(__name__)

# Define custom CSS
app.index_string = '''
<!DOCTYPE html>
<html>
    <head>
        {%metas%}
        <title>{%title%}</title>
        {%favicon%}
        {%css%}
        <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap" rel="stylesheet">
        <style>
            body {
                font-family: 'Roboto', sans-serif;
                background-color: #f0f0f0;
                margin: 0;
                padding: 20px;
            }
            .container {
                background-color: white;
                border-radius: 10px;
                padding: 20px;
                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            }
            h1 {
                color: #2c3e50;
                font-weight: 700;
            }
            .dropdown-container {
                background-color: #ecf0f1;
                border-radius: 5px;
                padding: 15px;
                margin-bottom: 20px;
            }
            .graph-container {
                background-color: white;
                border-radius: 5px;
                padding: 15px;
                margin-bottom: 20px;
                box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
            }
        </style>
    </head>
    <body>
        {%app_entry%}
        <footer>
            {%config%}
            {%scripts%}
            {%renderer%}
        </footer>
    </body>
</html>
'''



# Define the layout
app.layout = html.Div([
    html.Div([
        html.H1("World Bank African Development Dashboard", style={'textAlign': 'center'}),
        
        html.Div([
            html.Div([
                dcc.Dropdown(
                    id='category-dropdown',
                    options=[{'label': k, 'value': k} for k in indicators_standard.keys()],
                    value='Economic Growth',
                    style={'width': '100%'}
                )
            ], style={'width': '32%', 'display': 'inline-block', 'marginRight': '2%'}),
            
            html.Div([
                dcc.Dropdown(
                    id='indicator-dropdown',
                    style={'width': '100%'}
                )
            ], style={'width': '32%', 'display': 'inline-block', 'marginRight': '2%'}),
            
            html.Div([
                dcc.Dropdown(
                    id='country-dropdown',
                    options=[{'label': country, 'value': country} for country in african_countries],
                    value=african_countries[:5],
                    multi=True,
                    style={'width': '100%'}
                )
            ], style={'width': '32%', 'display': 'inline-block'})
        ], className='dropdown-container'),
        
        html.Div([
            dcc.Graph(id='main-graph')
        ], className='graph-container'),
        
        html.Div([
            html.Div([
                dcc.Graph(id='bar-chart')
            ], style={'width': '49%', 'display': 'inline-block', 'marginRight': '2%'}),
            
            html.Div([
                dcc.Graph(id='scatter-plot')
            ], style={'width': '49%', 'display': 'inline-block'})
        ], className='graph-container')
    ], className='container')
])

# Callback to update indicator dropdown based on category selection
@app.callback(
    Output('indicator-dropdown', 'options'),
    Output('indicator-dropdown', 'value'),
    Input('category-dropdown', 'value')
)
def update_indicator_dropdown(selected_category):
    options = [{'label': indicator, 'value': indicator} for indicator in indicators_standard[selected_category]]
    return options, options[0]['value']

# Callback to update main graph
@app.callback(
    Output('main-graph', 'figure'),
    Input('indicator-dropdown', 'value'),
    Input('country-dropdown', 'value')
)
def update_main_graph(selected_indicator, selected_countries):
    df = data[selected_indicator]
    fig = px.line(df[selected_countries], x=df.index, y=selected_countries,
                  title=f'{selected_indicator} Over Time')
    fig.update_layout(yaxis_title=selected_indicator)
    return fig

# Callback to update bar chart
@app.callback(
    Output('bar-chart', 'figure'),
    Input('indicator-dropdown', 'value'),
    Input('country-dropdown', 'value')
)
def update_bar_chart(selected_indicator, selected_countries):
    df = data[selected_indicator]
    latest_year = df.index[-1]
    fig = px.bar(x=selected_countries, y=df.loc[latest_year, selected_countries],
                 title=f'{selected_indicator} - Latest Year')
    fig.update_layout(xaxis_title='Country', yaxis_title=selected_indicator)
    return fig

# Callback to update scatter plot
@app.callback(
    Output('scatter-plot', 'figure'),
    Input('category-dropdown', 'value'),
    Input('country-dropdown', 'value')
)
def update_scatter_plot(selected_category, selected_countries):
    indicator1, indicator2 = indicators_standard[selected_category][:2]
    df1 = data[indicator1]
    df2 = data[indicator2]
    latest_year = df1.index[-1]
    
    fig = px.scatter(x=df1.loc[latest_year, selected_countries],
                     y=df2.loc[latest_year, selected_countries],
                     text=selected_countries)
    fig.update_traces(textposition='top center')
    fig.update_layout(title=f'{indicator1} vs {indicator2} - Latest Year',
                      xaxis_title=indicator1, yaxis_title=indicator2)
    return fig

# Update the graph layouts
def update_graph_layout(fig):
    fig.update_layout(
        font=dict(family="Roboto"),
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        title_font=dict(size=20, color='#2c3e50'),
        legend_title_font=dict(size=14),
        legend_font=dict(size=12),
        xaxis=dict(title_font=dict(size=14), tickfont=dict(size=12)),
        yaxis=dict(title_font=dict(size=14), tickfont=dict(size=12))
    )
    return fig

if __name__ == '__main__':
    app.run_server(debug=True)