In [3]:
# Start writing code here...
!pip install wbgapi

You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.[0m


In [19]:
import csv
import numpy as np
import pandas as pd
import wbgapi as wb
import matplotlib.pyplot as plt

%matplotlib inline

In [35]:
def get_all_indicator_codes():
    '''
    (Helper function) Get list of code/id's for all indicators.
    '''
    return [row['id'] for row in wb.series.list(db=2)] # 2 specifies WDI

def get_iso_code(country):
    '''
    (Helper function) Get iso code for country.

    country (string) -- country to get iso code for
    '''
    iso_code = wb.economy.coder(country)

    if iso_code is None:
        print("ERROR: get_iso_code could not resolve country name.")
        return None
    else:
        return iso_code

def get_specific_indicator_code(keyword):
    '''
    (Helper function) Get specific codes for indicators related to a keyword

    keyword (string) -- keyword to get codes for
    '''

    indicator_codes = wb.series.info(q=keyword)

    if indicator_codes is None:
        print("ERROR: indicator_codes could not resolve keyword argument.")
        return None
    else:
        return indicator_codes
        
def get_indicators_for_country(country, min_year=None, max_year=None):
    '''
    Get set (generator) of indicators for specific country.
    This is a generator function, as it returns a generator object.

    country (string) -- country specified to get indicators for
    min_year (4-digit int) -- start year of data coverage consideration (only
                              used if max_year also available)
    max_year (4-digit int) -- end year of data coverage consideration (only
                              used if max_year also available)   
    '''
    joined_indicator_codes = ';'.join(get_indicator_codes()) # joined for refetch fxn
    country_iso_code = get_iso_code(country)
    
    if country_iso_code is None:
        return [] # no error print needed as one will be printed in get_iso_code

    if min_year is not None and max_year is not None:
        ind_generator = wb.refetch('sources/{source}/series/{series}/country/{economy}', \
            ['series', 'economy'], source=2, series=joined_indicator_codes, \
            economy=country_iso_code, time=range(min_year, max_year, 1))
    else:
       ind_generator = wb.refetch('sources/{source}/series/{series}/country/{economy}', \
            ['series', 'economy'], source=2, series=joined_indicator_codes, \
            economy=country_iso_code) 

    return ind_generator

def filter_indicators_by_coverage(ind_generator, threshold=0.0):
    '''
    Get set of indicators filtered by data coverage. Indicators that
    have data coverage equal to or above 'threshold' will be included.

    Deciding against converting the generator into a dataframe and just iterating
    due to immense size of data; it will probably be more efficient iterating
    than manipulating the large dataframe for each unique indicator.

    ind_generator (generator) -- list of indicators and their respective values
    threshold (float) -- minimum data coverage amount                         
    '''
    filtered_ind = set()
    prev_ind_code = None
    num_nan = 0
    num_total = 0

    for row in ind_generator:
        curr_ind_code = row['variable'][0]['id']
        curr_value = row['value']

        # calculate stats for prev indicator
        if curr_ind_code != prev_ind_code and prev_ind_code is not None:

            coverage_percentage = (num_total - num_nan) / num_total
            if coverage_percentage >= threshold:
                filtered_ind.add(prev_ind_code)

            num_nan = 0
            num_total = 0

        if curr_value is None:
            num_nan += 1
        num_total += 1
        prev_ind_code = curr_ind_code

    return list(filtered_ind)

def export_array(arr, filename):
    '''
    Exports array of values into a CSV file.

    arr (array) -- array of values to export
    filename (string) -- name of file to export to, including ".csv"
    '''
    # reshape file for export formatting
    len_arr = len(arr)

    if len_arr == 0:
        print("ERROR: export_array cannot convert empty data.")
        return

    reshaped_arr = np.array(arr).reshape((len_arr, 1))

    file = open(filename, 'w+', newline ='')

    with file:     
        write = csv.writer(file) 
        write.writerows(reshaped_arr)
    file.close()

    
if __name__ == "__main__":
    '''
    The Main Function of this file, where execution starts.
    '''
    fetched_ind = get_indicators_for_country('Colombia', 1980, 2010)
    filtered_ind = filter_indicators_by_coverage(fetched_ind, 0.90)
    print(len(filtered_ind))
    export_array(filtered_ind, 'NEW_col_data_90_threshold.csv')

300


In [36]:
colombia_data = wb.economy.info(['COL'])

get_specific_indicator_code('life expectancy')
#wb.series.info(q='life expectancy')

id,value
SP.DYN.LE00.FE.IN,"Life expectancy at birth, female (years)"
SP.DYN.LE00.IN,"Life expectancy at birth, total (years)"
SP.DYN.LE00.MA.IN,"Life expectancy at birth, male (years)"
,3 elements


In [21]:
wb.search('fossil fuels')

ID,Name,Field,Value
EG.ELC.COAL.KH,Electricity production from coal sources (kWh),Developmentrelevance,...from fossil fuel combustion and cement manufacturing. In combustion different fossil fuels release different amounts of carbon dioxide for the same level of energy use:...
EG.ELC.COAL.ZS,Electricity production from coal sources (% of total),Developmentrelevance,...from fossil fuel combustion and cement manufacturing. In combustion different fossil fuels release different amounts of carbon dioxide for the same level of energy use:...
EG.ELC.HYRO.KH,Electricity production from hydroelectric sources (kWh),Developmentrelevance,...from fossil fuel combustion and cement manufacturing. In combustion different fossil fuels release different amounts of carbon dioxide for the same level of energy use:...
EG.ELC.HYRO.ZS,Electricity production from hydroelectric sources (% of total),Developmentrelevance,...from fossil fuel combustion and cement manufacturing. In combustion different fossil fuels release different amounts of carbon dioxide for the same level of energy use:...
EG.ELC.NGAS.KH,Electricity production from natural gas sources (kWh),Developmentrelevance,...2) lower carbon relative to other fossil fuels;...
EG.ELC.NGAS.ZS,Electricity production from natural gas sources (% of total),Developmentrelevance,...2) lower carbon relative to other fossil fuels;...
EG.ELC.NUCL.KH,Electricity production from nuclear sources (kWh),Developmentrelevance,"...grew rapidly in the 1970s and 1980s as countries sought to reduce dependence on fossil fuels, especially after the oil crises of the 1970s. There was a renewed interest in..."
EG.ELC.NUCL.ZS,Electricity production from nuclear sources (% of total),Developmentrelevance,"...grew rapidly in the 1970s and 1980s as countries sought to reduce dependence on fossil fuels, especially after the oil crises of the 1970s. There was a renewed interest in..."
EG.ELC.PETR.KH,Electricity production from oil sources (kWh),Developmentrelevance,...from fossil fuel combustion and cement manufacturing. In combustion different fossil fuels release different amounts of carbon dioxide for the same level of energy use:...
EG.ELC.PETR.ZS,Electricity production from oil sources (% of total),Developmentrelevance,...from fossil fuel combustion and cement manufacturing. In combustion different fossil fuels release different amounts of carbon dioxide for the same level of energy use:...


In [22]:
help(wb)

Help on package wbgapi:

NAME
    wbgapi

DESCRIPTION
    wbgapi provides a comprehensive interface to the World Bank's data and
    metadata API with built-in pandas integration

PACKAGE CONTENTS
    __version__
    data
    economy
    economy_coder
    economy_metadata
    income
    lending
    region
    series
    series_metadata
    source
    time
    topic
    utils

CLASSES
    builtins.Exception(builtins.BaseException)
        APIError
            APIResponseError
        URLError
    builtins.dict(builtins.object)
        Coder
    builtins.object
        Featureset
        Metadata
        MetadataCollection
    
    class APIError(builtins.Exception)
     |  APIError(url, msg, code=None)
     |  
     |  Common base class for all non-exit exceptions.
     |  
     |  Method resolution order:
     |      APIError
     |      builtins.Exception
     |      builtins.BaseException
     |      builtins.object
     |  
     |  Methods defined here:
     |  
     |  __init__(sel

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=72b1e015-02d7-48b2-beb6-dcae4caaa79e' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>