In [7]:
import requests
from collections import defaultdict
import pandas as pd
import numpy as np

## Population APIs
Total Population: SP.POP.TOTL

In [21]:
# get the World Bank Population data by iterating through all pages
total_population_data = []
for page in range(1,18):
    payload = {'format': 'json', 'per_page': '1000', 'date':'1960:2022', 'page':page}
    population_json = requests.get('http://api.worldbank.org/v2/countries/indicators/SP.POP.TOTL', params=payload)
    total_population_data+=population_json.json()[1]


In [22]:
total_population_df = pd.DataFrame(total_population_data)

In [23]:
#remove unnecessary columns
total_population_df.drop(columns=['indicator','obs_status','decimal','countryiso3code', 'unit'], inplace=True, axis=1)

total_population_df["date"] = pd.to_datetime(total_population_df["date"])


In [24]:
#turn country feature into just country name
for i, country in enumerate(total_population_df['country']):
    total_population_df.loc[i,'country'] = country['value']

In [26]:
countries_population_df = total_population_df[3038:]

In [27]:
#countries with the top 10 populations
largest_10_population = countries_population_df[countries_population_df['date']=='2021'].nlargest(n=10,columns='value')

largest_10_list = [country for country in largest_10_population['country']]

largest_10_pop_df = countries_population_df[countries_population_df['country'].isin(largest_10_list)]

In [28]:
%store largest_10_pop_df
%store countries_population_df

Stored 'largest_10_pop_df' (DataFrame)
Stored 'countries_population_df' (DataFrame)


## Land Development APIs
1. Forest: AG.LND.FRST.ZS
2. Permanent cropland: AG.LND.CROP.ZS
3. Agricultural Land: AG.LND.AGRI.ZS
4. Arable Land: AG.LND.ARBL.ZS
5. Land Cereal production : AG.LND.CREL.HA


In [52]:
Land_API = ['AG.LND.FRST.ZS','AG.LND.CROP.ZS','AG.LND.AGRI.ZS','AG.LND.ARBL.ZS','AG.LND.CREL.HA']

urls = []
for indicator in Land_API:
    url = 'http://api.worldbank.org/v2/countries/indicators/' + indicator 
    urls.append(url)
dataframe_list = []

for url in urls:
    data = []
    try:  
        for page in range(1,18):
            payload = {'format': 'json', 'per_page': '1000', 'date':'1960:2022', 'page':page}     
            r = requests.get(url, params=payload)
            data+=r.json()[1]
        
        dataframe_list.append(pd.DataFrame(data))

    except:
        print('could not load data', url)

In [31]:
def removeFormatColumns(df):
    """
    _summary_

    Args:
        df (DataFrame): _description_

    Returns:
        DataFrame: _description_
    """
    df.drop(columns=['indicator','obs_status','decimal','countryiso3code', 'unit'], inplace=True, axis=1)

    return df

def formatColumns(df):
    """
    _summary_

    Args:
        df (DataFrame): _description_
    Returns:
        df (DateFrame):
    """
    
    df["date"] = pd.to_datetime(df["date"])

    #turn country feature into just country name
    for i, country in enumerate(df['country']):
        df.loc[i,'country'] = country['value']

    return df

def justCountries(df):
    """
    _summary_
    Args:
        df (DataFrame): _description_
    Returns:
        DataFrame: _description_
    """
    return df

In [54]:
for i, df in enumerate(dataframe_list):
  dataframe_list[i] = removeFormatColumns(df)
  dataframe_list[i] = formatColumns(df)
  

Unnamed: 0,country,date,value
0,Africa Eastern and Southern,2021-01-01,
1,Africa Eastern and Southern,2020-01-01,
2,Africa Eastern and Southern,2019-01-01,
3,Africa Eastern and Southern,2018-01-01,0.774625
4,Africa Eastern and Southern,2017-01-01,0.764390
...,...,...,...
16487,Zimbabwe,1964-01-01,0.258498
16488,Zimbabwe,1963-01-01,0.258498
16489,Zimbabwe,1962-01-01,0.258498
16490,Zimbabwe,1961-01-01,0.258498


In [33]:
df = dataframe_list[1]

for i, country in enumerate(df['country']):
    df.loc[i,'country'] = country['value']

df

TypeError: string indices must be integers

## Living Condition APIs
1. Rural population: SP.RUR.TOTL.ZS
2. Urban Population:SP.URB.TOTL.IN.ZS
3. Male Employment:SL.AGR.EMPL.MA.ZS
4. Female Employement: SL.AGR.EMPL.FE.ZS



## Agricultural Practice APIs
1. Fertilizer Use: AG.CON.FERT.ZS
2. Cereal Production: AG.YLD.CREL.KG
3. % of GDP value: NV.AGR.TOTL.ZS

## World Issue APIs
1. Under 5 mortality: SH.DYN.MORT
2. Greenhouse Gases: EN.ATM.GHGT.KT.CE
3. CO2 emissions: EN.ATM.CO2E.KT
4. Poverty : SI.POV.DDAY
