# World Bank - Population Summarizing
Reduced '.csv' files from WB_data Notebook

In [1]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path
import requests
import matplotlib.pyplot as plt
from scipy.stats import linregress
import numpy as np
import time


# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

In [2]:
# WB data set
WB_africadata_2000 = Path("Resources\WB_africadata_2000.csv")

In [3]:
# Read data file with the Pandas library
# encoding?, i.e encoding="ISO-8859-1"
WB_africadata_2000_df = pd.read_csv(WB_africadata_2000)

In [4]:
WB_africadata_2000_df

Unnamed: 0,series_id,country_code,country_name,year,value
0,SH.HIV.INCD.TL,KEN,Kenya,2017,3.400000e+04
1,SH.HIV.INCD.TL,LBR,Liberia,2017,1.600000e+03
2,SH.HIV.INCD.TL,LSO,Lesotho,2017,1.000000e+04
3,SH.HIV.INCD.TL,MDG,Madagascar,2017,7.200000e+03
4,SH.HIV.INCD.TL,MOZ,Mozambique,2017,1.300000e+05
...,...,...,...,...,...
652157,VA.NO.SRC,WSM,Samoa,2006,3.000000e+00
652158,FM.LBL.BMNY.GD.ZS,TCD,Chad,2010,1.146678e+01
652159,NE.CON.PRVT.CN.AD,SDN,Sudan,2000,2.199464e+10
652160,NE.CON.PRVT.CN.AD,SDN,Sudan,2003,3.190503e+10


# World Bank Data Source Break-down
###The 'WB_africadata_2000.csv' created in the WB_data notebook will be used to summarize the Foreign Aid and different indicators data.

###To help with the anlysis of the data, the data was put in bins of 5 years from 2000 to 2020, and a last bin for 2021 to 2022.

In [5]:
WB_africadata_2000_df[["year"]].min()

year    2000
dtype: int64

In [6]:
WB_africadata_2000_df[["year"]].max()

year    2023
dtype: int64

In [7]:
WB_africadata_2000_df2 = WB_africadata_2000_df

In [8]:
# Create bins in which to place values based upon IMDB vote count
bins = [0, 2005, 2010, 2015, 2020, 2023]

# Create labels for these bins
group_labels = ["2000 - 2005", "2006 - 2010", "2011 - 2015", "2016 - 2020", "2021 - 2023"]

In [9]:
# Slice the data and place it into bins
pd.cut(WB_africadata_2000_df2["year"], bins, labels=group_labels)

0         2016 - 2020
1         2016 - 2020
2         2016 - 2020
3         2016 - 2020
4         2016 - 2020
             ...     
652157    2006 - 2010
652158    2006 - 2010
652159    2000 - 2005
652160    2000 - 2005
652161    2011 - 2015
Name: year, Length: 652162, dtype: category
Categories (5, object): ['2000 - 2005' < '2006 - 2010' < '2011 - 2015' < '2016 - 2020' < '2021 - 2023']

In [10]:
WB_africadata_2000_df2["Year Group"] = pd.cut(WB_africadata_2000_df2["year"], bins, labels=group_labels)

In [11]:
WB_africadata_2000_df2

Unnamed: 0,series_id,country_code,country_name,year,value,Year Group
0,SH.HIV.INCD.TL,KEN,Kenya,2017,3.400000e+04,2016 - 2020
1,SH.HIV.INCD.TL,LBR,Liberia,2017,1.600000e+03,2016 - 2020
2,SH.HIV.INCD.TL,LSO,Lesotho,2017,1.000000e+04,2016 - 2020
3,SH.HIV.INCD.TL,MDG,Madagascar,2017,7.200000e+03,2016 - 2020
4,SH.HIV.INCD.TL,MOZ,Mozambique,2017,1.300000e+05,2016 - 2020
...,...,...,...,...,...,...
652157,VA.NO.SRC,WSM,Samoa,2006,3.000000e+00,2006 - 2010
652158,FM.LBL.BMNY.GD.ZS,TCD,Chad,2010,1.146678e+01,2006 - 2010
652159,NE.CON.PRVT.CN.AD,SDN,Sudan,2000,2.199464e+10,2000 - 2005
652160,NE.CON.PRVT.CN.AD,SDN,Sudan,2003,3.190503e+10,2000 - 2005


In [12]:
country_counts = WB_africadata_2000_df2['country_name'].value_counts()

print("Counts of entries for each country name:")
print(country_counts)

Counts of entries for each country name:
Ghana                       23636
Egypt, Arab Rep.            23602
Senegal                     23478
Tanzania                    23326
Madagascar                  23133
Tunisia                     23069
Burkina Faso                22825
Togo                        22793
Kenya                       22709
Uganda                      22644
Mozambique                  22352
Rwanda                      22281
Malawi                      22053
Botswana                    22013
Lesotho                     21918
Nigeria                     21857
Ethiopia                    21735
Zambia                      21589
Angola                      21552
Sierra Leone                20680
Sudan                       20533
Zimbabwe                    20436
Mauritania                  20087
Gabon                       19102
Liberia                     19072
Chad                        18744
Samoa                       18738
Central African Republic    18315
Djibout

# Slicing of the Data
###In order to analyze the data and to understand the impact of the Foreign Aid with regards to poverty rate, literacy and mortality, the data was sliced by the "series_id". The deifinition of the diferent "series_id" values (indicators) is described in a second file downloaded from the Nasdaq API and saved as 'WB_metadata_df'.

###To facilitate/expedite the search and filtering of the relevant a function was created to search the text strings : "key_word". This function is case sensitive and it's used is combined with the review of the search output selection to identify the indicators that best match our work objective.

In [13]:
# WB id series definition data set
WB_metadata = Path("Resources/WB_METADATA_f7ce7fba293ccc6eb39cdf15fb097982.csv")

In [14]:
# Read data file with the Pandas library
WB_metadata_df = pd.read_csv(WB_metadata)

In [15]:
WB_metadata_df

Unnamed: 0,series_id,name,description
0,DC.DAC.DEUL.CD,"Net bilateral aid flows from DAC donors, Germa...",Net bilateral aid flows from DAC donors are th...
1,RQ.STD.ERR,Regulatory Quality: Standard Error,Regulatory Quality captures perceptions of the...
2,EG.USE.PCAP.KG.OE,Energy use (kg of oil equivalent per capita),Energy use refers to use of primary energy bef...
3,EN.POP.EL5M.UR.ZS,Urban population living in areas where elevati...,Urban population below 5m is the percentage of...
4,per_lm_alllm.cov_q1_tot,Coverage of unemployment benefits and ALMP in ...,Coverage of unemployment benefits and active l...
...,...,...,...
1479,SL.TLF.BASC.FE.ZS,"Labor force with basic education, female (% of...",The ratio of the labor force with basic educat...
1480,SL.GDP.PCAP.EM.KD,GDP per person employed (constant 2021 PPP $),GDP per person employed is gross domestic prod...
1481,SL.TLF.INTM.MA.ZS,"Labor force with intermediate education, male ...",The ratio of the labor force with intermediate...
1482,SL.UEM.NEET.FE.ZS,"Share of youth not in education, employment or...","Share of youth not in education, employment or..."


In [16]:
# This function will search within a text string for a given 'key word'. It is case sensitive

def key_word(df, search_col, word_txt, result_col):
    i=0   
    result_ls = []
    for item in df[search_col]:
        text_ls = [x.strip() for x in df[search_col][i].split()]
        if word_txt in text_ls:
            result_ls.append(df[result_col][i])
        i+=1
    if i >= df[search_col].count():
        if len(result_ls) > 0:
            return result_ls
        else:
            return print("Keyword Not found")

In [17]:
population_search = key_word(WB_metadata_df, 'name', 'population', 'series_id')

In [18]:
len(population_search)

96

In [19]:
population_search

['EN.POP.EL5M.UR.ZS',
 'FX.OWN.TOTL.60.ZS',
 'SL.TLF.ADVN.FE.ZS',
 'FX.OWN.TOTL.OL.ZS',
 'SP.URB.TOTL',
 'SL.TLF.ADVN.MA.ZS',
 'SL.TLF.INTM.ZS',
 'EN.POP.EL5M.RU.ZS',
 'SH.UHC.NOP2.ZS',
 'SH.UHC.OOPC.25.ZS',
 'SL.EMP.1524.SP.MA.NE.ZS',
 'SH.UHC.NOP1.ZS',
 'SL.TLF.CACT.NE.ZS',
 'SL.TLF.CACT.FE.NE.ZS',
 'SL.TLF.BASC.ZS',
 'SI.SPR.PC40.ZG',
 'SP.RUR.TOTL.ZS',
 'SP.URB.TOTL.IN.ZS',
 'EN.ATM.PM25.MC.ZS',
 'SL.TLF.CACT.ZS',
 'EN.ATM.PM25.MC.T2.ZS',
 'SL.TLF.ADVN.ZS',
 'EN.ATM.PM25.MC.T3.ZS',
 'SP.RUR.TOTL',
 'SE.SEC.CUAT.LO.ZS',
 'SP.URB.GROW',
 'FX.OWN.TOTL.MA.ZS',
 'SH.CON.1524.MA.ZS',
 'SL.EMP.1524.SP.FE.ZS',
 'SL.EMP.TOTL.SP.MA.ZS',
 'SL.TLF.CACT.MA.ZS',
 'SL.EMP.1524.SP.MA.ZS',
 'SE.SEC.CUAT.PO.FE.ZS',
 'SE.TER.CUAT.ST.MA.ZS',
 'SE.SEC.CUAT.UP.ZS',
 'SE.TER.CUAT.MS.ZS',
 'SN.ITK.MSFI.ZS',
 'SM.POP.REFG.OR',
 'SL.EMP.TOTL.SP.FE.NE.ZS',
 'SE.TER.CUAT.MS.FE.ZS',
 'SE.SEC.CUAT.LO.FE.ZS',
 'SI.SPR.PCAP.ZG',
 'SP.RUR.TOTL.ZG',
 'SE.TER.CUAT.ST.ZS',
 'SE.TER.CUAT.MS.MA.ZS',
 'SE.SEC.CUAT.UP.FE

In [25]:
population_search2 = WB_metadata_df[WB_metadata_df['series_id'].isin(population_search)]

In [26]:
population_search2

Unnamed: 0,series_id,name,description
3,EN.POP.EL5M.UR.ZS,Urban population living in areas where elevati...,Urban population below 5m is the percentage of...
8,FX.OWN.TOTL.60.ZS,Account ownership at a financial institution o...,Account denotes the percentage of respondents ...
26,SL.TLF.ADVN.FE.ZS,"Labor force with advanced education, female (%...",The ratio of the labor force with advanced edu...
41,FX.OWN.TOTL.OL.ZS,Account ownership at a financial institution o...,Account denotes the percentage of respondents ...
42,SP.URB.TOTL,Urban population,Urban population refers to people living in ur...
...,...,...,...
1476,SL.EMP.1524.SP.NE.ZS,"Employment to population ratio, ages 15-24, to...",Employment to population ratio is the proporti...
1477,SL.EMP.1524.SP.ZS,"Employment to population ratio, ages 15-24, to...",Employment to population ratio is the proporti...
1478,SL.TLF.BASC.MA.ZS,"Labor force with basic education, male (% of m...",The ratio of the labor force with basic educat...
1479,SL.TLF.BASC.FE.ZS,"Labor force with basic education, female (% of...",The ratio of the labor force with basic educat...


In [47]:
#SP.URB.TOTL:Urban population, SP.URB.TOTL.IN.ZS:Urban population (% of total population), SP.URB.GROW:Urban population growth (annual %)

#SP.RUR.TOTL:Rural population, SP.RUR.TOTL.ZS:Rural population (% of total population), SP.RUR.TOTL.ZG:Rural population growth (annual %),

#population_search_choiceURBT = ['SP.URB.TOTL']
#population_search_choiceRURT = ['SP.RUR.TOTL']
#population_search_choiceURBP = ['SP.URB.TOTL.IN.ZS']
#population_search_choiceRURP = ['SP.RUR.TOTL.ZS']
#population_search_choiceURBG = ['SP.URB.GROW']
#population_search_choiceRURG = ['SP.RUR.TOTL.ZG']

In [50]:
#WB_pop-URBT_df = WB_africadata_2000_df2[WB_africadata_2000_df2["series_id"].isin(population_search_choiceURBT)]
#WB_pop-URBT_df
WB_pop_URBT_df = WB_africadata_2000_df2.loc[WB_africadata_2000_df2['series_id'] == 'SP.URB.TOTL']
WB_pop_URBT_df

Unnamed: 0,series_id,country_code,country_name,year,value,Year Group
335909,SP.URB.TOTL,AGO,Angola,2000,8211294.0,2000 - 2005
335910,SP.URB.TOTL,BFA,Burkina Faso,2000,2120383.0,2000 - 2005
335911,SP.URB.TOTL,BWA,Botswana,2000,919084.0,2000 - 2005
335912,SP.URB.TOTL,CAF,Central African Republic,2000,1414914.0,2000 - 2005
335913,SP.URB.TOTL,DJI,Djibouti,2000,567893.0,2000 - 2005
...,...,...,...,...,...,...
336655,SP.URB.TOTL,TZA,Tanzania,2023,25227921.0,2021 - 2023
336656,SP.URB.TOTL,UGA,Uganda,2023,13005977.0,2021 - 2023
336657,SP.URB.TOTL,WSM,Samoa,2023,39512.0,2021 - 2023
336658,SP.URB.TOTL,ZMB,Zambia,2023,9530988.0,2021 - 2023


In [51]:
WB_pop_RURT_df = WB_africadata_2000_df2.loc[WB_africadata_2000_df2['series_id'] == 'SP.RUR.TOTL']
WB_pop_RURT_df

Unnamed: 0,series_id,country_code,country_name,year,value,Year Group
391064,SP.RUR.TOTL,TGO,Togo,2018,4691053.0,2016 - 2020
391079,SP.RUR.TOTL,LSO,Lesotho,2019,1589485.0,2016 - 2020
391147,SP.RUR.TOTL,LBR,Liberia,2022,2488813.0,2021 - 2023
508799,SP.RUR.TOTL,AGO,Angola,2000,8182768.0,2000 - 2005
508800,SP.RUR.TOTL,BFA,Burkina Faso,2000,9762505.0,2000 - 2005
...,...,...,...,...,...,...
509535,SP.RUR.TOTL,TZA,Tanzania,2023,42210185.0,2021 - 2023
509536,SP.RUR.TOTL,UGA,Uganda,2023,35576357.0,2021 - 2023
509537,SP.RUR.TOTL,WSM,Samoa,2023,186169.0,2021 - 2023
509538,SP.RUR.TOTL,ZMB,Zambia,2023,11038749.0,2021 - 2023


In [52]:
WB_pop_URBP_df = WB_africadata_2000_df2.loc[WB_africadata_2000_df2['series_id'] == 'SP.URB.TOTL.IN.ZS']
WB_pop_URBP_df

Unnamed: 0,series_id,country_code,country_name,year,value,Year Group
145759,SP.URB.TOTL.IN.ZS,GAB,Gabon,2017,88.976,2016 - 2020
466045,SP.URB.TOTL.IN.ZS,GAB,Gabon,2009,84.955,2006 - 2010
466046,SP.URB.TOTL.IN.ZS,AGO,Angola,2000,50.087,2000 - 2005
466047,SP.URB.TOTL.IN.ZS,BFA,Burkina Faso,2000,17.844,2000 - 2005
466048,SP.URB.TOTL.IN.ZS,BWA,Botswana,2000,53.219,2000 - 2005
...,...,...,...,...,...,...
466783,SP.URB.TOTL.IN.ZS,TZA,Tanzania,2023,37.409,2021 - 2023
466784,SP.URB.TOTL.IN.ZS,UGA,Uganda,2023,26.771,2021 - 2023
466785,SP.URB.TOTL.IN.ZS,WSM,Samoa,2023,17.508,2021 - 2023
466786,SP.URB.TOTL.IN.ZS,ZMB,Zambia,2023,46.335,2021 - 2023


In [53]:
WB_pop_URBP_df = WB_pop_URBP_df.rename(columns = {'value': 'Population (%)'})
WB_pop_URBP_df

Unnamed: 0,series_id,country_code,country_name,year,Population (%),Year Group
145759,SP.URB.TOTL.IN.ZS,GAB,Gabon,2017,88.976,2016 - 2020
466045,SP.URB.TOTL.IN.ZS,GAB,Gabon,2009,84.955,2006 - 2010
466046,SP.URB.TOTL.IN.ZS,AGO,Angola,2000,50.087,2000 - 2005
466047,SP.URB.TOTL.IN.ZS,BFA,Burkina Faso,2000,17.844,2000 - 2005
466048,SP.URB.TOTL.IN.ZS,BWA,Botswana,2000,53.219,2000 - 2005
...,...,...,...,...,...,...
466783,SP.URB.TOTL.IN.ZS,TZA,Tanzania,2023,37.409,2021 - 2023
466784,SP.URB.TOTL.IN.ZS,UGA,Uganda,2023,26.771,2021 - 2023
466785,SP.URB.TOTL.IN.ZS,WSM,Samoa,2023,17.508,2021 - 2023
466786,SP.URB.TOTL.IN.ZS,ZMB,Zambia,2023,46.335,2021 - 2023


In [54]:
# Split up our data into groups based upon 'country_name' and 'Year Group'
WB_pop_URBP_groups = WB_pop_URBP_df.groupby(['country_name','year'])
WB_pop_URBP_groups.head()

Unnamed: 0,series_id,country_code,country_name,year,Population (%),Year Group
145759,SP.URB.TOTL.IN.ZS,GAB,Gabon,2017,88.976,2016 - 2020
466045,SP.URB.TOTL.IN.ZS,GAB,Gabon,2009,84.955,2006 - 2010
466046,SP.URB.TOTL.IN.ZS,AGO,Angola,2000,50.087,2000 - 2005
466047,SP.URB.TOTL.IN.ZS,BFA,Burkina Faso,2000,17.844,2000 - 2005
466048,SP.URB.TOTL.IN.ZS,BWA,Botswana,2000,53.219,2000 - 2005
...,...,...,...,...,...,...
466783,SP.URB.TOTL.IN.ZS,TZA,Tanzania,2023,37.409,2021 - 2023
466784,SP.URB.TOTL.IN.ZS,UGA,Uganda,2023,26.771,2021 - 2023
466785,SP.URB.TOTL.IN.ZS,WSM,Samoa,2023,17.508,2021 - 2023
466786,SP.URB.TOTL.IN.ZS,ZMB,Zambia,2023,46.335,2021 - 2023


In [None]:
# Split up our data into groups based upon 'country_name' and 'Year Group'
WB_pop_URBP_groups2 = WB_pop_URBP_df.groupby(['Year Group','country_name'])
WB_pop_URBP_groups2.head()

In [None]:
WB_pop_RURP_df = WB_africadata_2000_df2.loc[WB_africadata_2000_df2['series_id'] == 'SP.RUR.TOTL.ZS']
WB_pop_RURP_df

In [None]:
WB_pop_URBG_df = WB_africadata_2000_df2.loc[WB_africadata_2000_df2['series_id'] == 'SP.URB.GROW']
WB_pop_URBG_df

In [None]:
WB_pop_RURG_df = WB_africadata_2000_df2.loc[WB_africadata_2000_df2['series_id'] == 'SP.RUR.TOTL.ZG]
WB_pop_RURG_df

In [None]:
WB_pop1_df = WB_pop1_df.rename(columns = {'value': 'Population'})
WB_pop1_df

In [46]:
WB_pop2_df = WB_africadata_2000_df2[WB_africadata_2000_df2["series_id"].isin(population_search_choice2)]
WB_pop2_df

Unnamed: 0,series_id,country_code,country_name,year,value,Year Group
140795,SP.RUR.TOTL.ZG,KEN,Kenya,2001,2.594721,2000 - 2005
140796,SP.RUR.TOTL.ZG,KEN,Kenya,2003,2.516856,2000 - 2005
140797,SP.RUR.TOTL.ZG,AGO,Angola,2004,0.923422,2000 - 2005
140799,SP.RUR.TOTL.ZG,GAB,Gabon,2010,-0.495245,2006 - 2010
140800,SP.RUR.TOTL.ZG,GHA,Ghana,2010,1.095754,2006 - 2010
...,...,...,...,...,...,...
606268,SP.RUR.TOTL.ZG,TZA,Tanzania,2023,1.764632,2021 - 2023
606269,SP.RUR.TOTL.ZG,UGA,Uganda,2023,1.949347,2021 - 2023
606270,SP.RUR.TOTL.ZG,WSM,Samoa,2023,1.603820,2021 - 2023
606271,SP.RUR.TOTL.ZG,ZMB,Zambia,2023,1.656604,2021 - 2023


In [32]:
WB_pop_df[["year"]].max()

year    2023
dtype: int64

In [33]:
pop_counts = WB_pop_df['country_name'].value_counts()

print("Counts of entries for each country name:")
print(pop_counts)

Counts of entries for each country name:
Kenya                       144
Somalia                     144
Uganda                      144
Madagascar                  144
Lesotho                     144
Tanzania                    144
Tunisia                     144
Samoa                       144
Togo                        144
Chad                        144
Sao Tome and Principe       144
Central African Republic    144
Botswana                    144
Djibouti                    144
Nigeria                     144
Sierra Leone                144
Angola                      144
Senegal                     144
Sudan                       144
Egypt, Arab Rep.            144
Zambia                      144
Burkina Faso                144
Ethiopia                    144
Zimbabwe                    144
Rwanda                      144
Malawi                      144
Mauritania                  144
Mozambique                  144
Ghana                       144
Gabon                       144

In [36]:
# Split up our data into groups based upon 'country_name' and 'Year Group'
WB_pop_groups = WB_pop_df.groupby(['series_id','country_name'])
WB_pop_groups.head()

Unnamed: 0,series_id,country_code,country_name,year,value,Year Group
140795,SP.RUR.TOTL.ZG,KEN,Kenya,2001,2.594721,2000 - 2005
140796,SP.RUR.TOTL.ZG,KEN,Kenya,2003,2.516856,2000 - 2005
140797,SP.RUR.TOTL.ZG,AGO,Angola,2004,0.923422,2000 - 2005
140799,SP.RUR.TOTL.ZG,GAB,Gabon,2010,-0.495245,2006 - 2010
140800,SP.RUR.TOTL.ZG,GHA,Ghana,2010,1.095754,2006 - 2010
...,...,...,...,...,...,...
548544,SP.URB.GROW,TUN,Tunisia,2004,1.524486,2000 - 2005
548545,SP.URB.GROW,TZA,Tanzania,2004,5.272588,2000 - 2005
548546,SP.URB.GROW,UGA,Uganda,2004,5.697538,2000 - 2005
548547,SP.URB.GROW,WSM,Samoa,2004,-0.780000,2000 - 2005


In [39]:
# Create a new df that holds the sum of population variables (series id) for each country
sum_pop = WB_pop_groups[['value']].sum()
sum_pop

Unnamed: 0_level_0,Unnamed: 1_level_0,value
series_id,country_name,Unnamed: 2_level_1
SP.RUR.TOTL,Angola,2.330939e+08
SP.RUR.TOTL,Botswana,1.873822e+07
SP.RUR.TOTL,Burkina Faso,3.033249e+08
SP.RUR.TOTL,Central African Republic,6.753092e+07
SP.RUR.TOTL,Chad,2.378302e+08
...,...,...
SP.URB.TOTL.IN.ZS,Togo,9.228160e+02
SP.URB.TOTL.IN.ZS,Tunisia,1.609563e+03
SP.URB.TOTL.IN.ZS,Uganda,4.893460e+02
SP.URB.TOTL.IN.ZS,Zambia,9.657170e+02


In [40]:
WB_pop_groups2 = WB_pop_df.groupby(['series_id','Year Group'])
WB_pop_groups2.head()
#total_pop = WB_pop_df.groupby("Year Group")["value"].sum()
#total_pop

Unnamed: 0,series_id,country_code,country_name,year,value,Year Group
140795,SP.RUR.TOTL.ZG,KEN,Kenya,2001,2.594721,2000 - 2005
140796,SP.RUR.TOTL.ZG,KEN,Kenya,2003,2.516856,2000 - 2005
140797,SP.RUR.TOTL.ZG,AGO,Angola,2004,0.923422,2000 - 2005
140799,SP.RUR.TOTL.ZG,GAB,Gabon,2010,-0.495245,2006 - 2010
140800,SP.RUR.TOTL.ZG,GHA,Ghana,2010,1.095754,2006 - 2010
...,...,...,...,...,...,...
549051,SP.URB.GROW,AGO,Angola,2021,4.111786,2021 - 2023
549052,SP.URB.GROW,BFA,Burkina Faso,2021,4.697431,2021 - 2023
549053,SP.URB.GROW,BWA,Botswana,2021,2.595763,2021 - 2023
549054,SP.URB.GROW,CAF,Central African Republic,2021,3.174372,2021 - 2023


In [42]:
sum_pop2 = WB_pop_groups2[['value']].sum()
sum_pop2

Unnamed: 0_level_0,Unnamed: 1_level_0,value
series_id,Year Group,Unnamed: 2_level_1
SP.RUR.TOTL,2000 - 2005,2455159000.0
SP.RUR.TOTL,2006 - 2010,2285418000.0
SP.RUR.TOTL,2011 - 2015,2515604000.0
SP.RUR.TOTL,2016 - 2020,2760983000.0
SP.RUR.TOTL,2021 - 2023,1768766000.0
SP.RUR.TOTL.ZG,2000 - 2005,316.0092
SP.RUR.TOTL.ZG,2006 - 2010,254.8041
SP.RUR.TOTL.ZG,2011 - 2015,235.2552
SP.RUR.TOTL.ZG,2016 - 2020,221.9342
SP.RUR.TOTL.ZG,2021 - 2023,114.1241


Year Group
2000 - 2005    3.601416e+09
2006 - 2010    3.470071e+09
2011 - 2015    3.963396e+09
2016 - 2020    4.522543e+09
2021 - 2023    2.995515e+09
Name: value, dtype: float64

In [None]:
sum_country_up2 = sum_country_up.reset_index()

In [None]:
africa_aid_pivot = sum_country_up2.pivot(index='country_name', columns='Year Group', values='Foreign Aid (USD)')
africa_aid_pivot

In [None]:
africa_aid_pivot2 = pd.concat([africa_aid_pivot, country_aid], axis=1)
africa_aid_pivot2

In [None]:
africa_aid_pivot2 = africa_aid_pivot2.rename(columns = {'2000 - 2005': 'Aid 2000 - 2005', '2006 - 2010': 'Aid 2006 - 2010','2011 - 2015': 'Aid 2011 - 2015', '2016 - 2020': 'Aid 2016 - 2020', '2021 - 2023': 'Aid 2021 - 2022', 'Foreign Aid (USD)': 'Total Foreign Aid (USD)'})
africa_aid_pivot2

In [None]:
africa_aid_pivot2['Total Foreign Aid (USD)'].sum()

In [None]:
africa_aid_pivot3 = africa_aid_pivot2.reset_index()
africa_aid_pivot3

In [None]:
africa_aid_pivot3['Total Foreign Aid (USD)'].sum()

In [None]:
country_aid_pivot = sum_country_up2.pivot(index='Year Group', columns='country_name', values='Foreign Aid (USD)')
country_aid_pivot

In [None]:
WB_aid_df2 = WB_africadata_2000_df[WB_africadata_2000_df["series_id"].isin(aid_search)]
WB_aid_df2

In [None]:
WB_aid_country_groups2 = WB_aid_df2.groupby(['country_name', 'year'])
WB_aid_country_groups2.head()

In [None]:
sum_country_all = WB_aid_country_groups2[['value']].sum()
sum_country_all

In [None]:
sum_country_all2 = sum_country_all.reset_index()

In [None]:
country_aid_pivot2 = sum_country_all2.pivot(index='year', columns='country_name', values='value')
country_aid_pivot2

In [None]:
country_aid_pivot2["Angola"] = country_aid_pivot2["Angola"].map("${:,.2f}".format)

In [None]:
country_aid_pivot2

In [None]:
poverty_search = key_word(WB_metadata_df, 'name', 'poverty', 'series_id')

In [None]:
len(poverty_search)

In [None]:
poverty_search

In [None]:
poverty_search2 = WB_metadata_df[WB_metadata_df['series_id'].isin(poverty_search)]

In [None]:
poverty_search2

In [None]:
WB_poverty_df = WB_africadata_2000_df2.loc[WB_africadata_2000_df2['series_id'] == "SI.POV.MDIM.XQ"]
WB_poverty_df


In [None]:
WB_poverty_country_groups = WB_poverty_df.groupby(['Year Group','country_name'])
WB_poverty_country_groups.head()

In [None]:
pov_counts = WB_poverty_df['country_name'].value_counts()

print("Counts of entries for each country name:")
print(pov_counts)

In [None]:
Poverty_search = key_word(WB_metadata_df, 'name', 'Poverty', 'series_id')

In [None]:
Poverty_search

In [None]:
Poverty_search2= WB_metadata_df[WB_metadata_df['series_id'].isin(Poverty_search)]

In [None]:
Poverty_search2

In [None]:
WB_Poverty_df = WB_africadata_2000_df2.loc[WB_africadata_2000_df2['series_id'] == "SI.POV.UMIC"]
WB_Poverty_df

In [None]:
WB_Poverty_df = WB_Poverty_df.rename(columns = {'value': 'Poverty Ratio < $6.85/day (% Pop.)'})
WB_Poverty_df

In [None]:
Pov_counts = WB_Poverty_df['country_name'].value_counts()

print("Counts of entries for each country name:")
print(Pov_counts)

In [None]:
# Create a new variable that holds the sum of our groups
avg_Poverty = WB_Poverty_df[['Poverty Ratio < $6.85/day (% Pop.)']].mean()
avg_Poverty

In [None]:
WB_Pov_country_groups = WB_Poverty_df.groupby(['Year Group','country_name'])
WB_Pov_country_groups.head()

In [None]:
avg_Poverty = WB_Pov_country_groups[['Poverty Ratio < $6.85/day (% Pop.)']].mean()
avg_Poverty

In [None]:
avg_Poverty.dropna(how='all')

In [None]:
total_Pov = WB_Poverty_df.groupby("Year Group")["Poverty Ratio < $6.85/day (% Pop.)"].mean()
total_Pov

In [None]:
# Create a pie chart based upon the total aid over the period of 2000 to 2022
Poverty_pie = total_Pov.plot(kind="pie", y='Poverty Ratio < $6.85/day (% Pop.)', title=("Poverty headcount ratio at $6.85 a day"), autopct="%1.1f%%")                                                                       
country_pie.set_ylabel("Poverty Ratio < $6.85/day (% Pop.)")

plt.axis("equal")
plt.show()

In [None]:
#WB_aid_df2 = WB_africadata_2000_df[WB_africadata_2000_df["series_id"].isin(aid_search)]
#WB_aid_df2
WB_Poverty_df2 = WB_africadata_2000_df.loc[WB_africadata_2000_df['series_id'] == "SI.POV.UMIC"]
WB_Poverty_df2

In [None]:
WB_Pov_country_groups2 = WB_Poverty_df2.groupby(['country_name', 'year'])
WB_Pov_country_groups2.head()

In [None]:
sum_Pov_all = WB_Pov_country_groups2[['value']].sum()
sum_Pov_all

In [None]:
sum_Pov_all2 = sum_Pov_all.reset_index()

In [None]:
country_Pov_pivot2 = sum_Pov_all2.pivot(index='year', columns='country_name', values='value')
country_Pov_pivot2

In [None]:
country_Pov_pivot3 = country_Pov_pivot2.reset_index()

In [None]:
country_Pov_pivot3.dropna(how='all')

In [None]:
country_Pov_pivot2["Angola"] = country_Pov_pivot2["Angola"].map("{:.1f}%".format)
country_Pov_pivot2

In [None]:
#WB_aid_df2 = WB_africadata_2000_df[WB_africadata_2000_df["series_id"].isin(aid_search)]
#WB_aid_df2
WB_Poverty_df2 = WB_africadata_2000_df.loc[WB_africadata_2000_df['series_id'] == "SI.POV.UMIC"]
WB_Poverty_df2

WB_Pov_country_groups2 = WB_Poverty_df2.groupby(['country_name', 'year'])
WB_Pov_country_groups2.head()

sum_Pov_all = WB_Pov_country_groups2[['value']].sum()
sum_Pov_all

sum_Pov_all2 = sum_Pov_all.reset_index()

country_Pov_pivot2 = sum_Pov_all2.pivot(index='year', columns='country_name', values='value')
country_Pov_pivot2

country_Pov_pivot2["Angola"] = country_Pov_pivot2["Angola"].map("{:.1f}%".format)
country_Pov_pivot2

In [None]:
Mortality_search = key_word(WB_metadata_df, 'name', 'Mortality', 'series_id')

In [None]:
Mortality_search

In [None]:
Mortality_search2= WB_metadata_df[WB_metadata_df['series_id'].isin(Mortality_search)]

In [None]:
Mortality_search2

In [None]:
#WB_Mortality_df = WB_africadata_2000_df2.loc[WB_africadata_2000_df2['series_id'] == "SH.STA.WASH.P5"]
#WB_Mortality_df

In [None]:
# Thought these indicators might provide more data for the Mortality analysis: rate is per 100000 females, males, population
#SH.DYN.MORT.FE - mortality under age 5, females
#SH.DYN.MORT.MA - mortality under age 5, males
#SH.DYN.MORT - mortality under age 5, combined

In [None]:
WB_Mortality_df = WB_africadata_2000_df2.loc[WB_africadata_2000_df2['series_id'].isin(["SH.DYN.MORT.FE", "SH.DYN.MORT.MA", "SH.DYN.MORT" ])]
WB_Mortality_df

In [None]:
Mortality_counts = WB_Mortality_df['country_name'].value_counts()

print("Counts of entries for each country name:")
print(Mortality_counts)

In [None]:
WB_Mortality_df = WB_Mortality_df.rename(columns = {'value': 'Mortality Rate < 5 yr (per 100Th)'})
WB_Mortality_df

In [None]:
WB_Mort_country_groups = WB_Mortality_df.groupby(['Year Group','country_name'])
WB_Mort_country_groups.head()

In [None]:
avg_Mortality_yr = WB_Mort_country_groups[['Mortality Rate < 5 yr (per 100Th)']].mean()
avg_Mortality_yr

In [None]:
WB_Mort_year_groups = WB_Mortality_df.groupby(['country_name','Year Group'])
WB_Mort_year_groups.head()

In [None]:
avg_Mortality_country = WB_Mort_year_groups[['Mortality Rate < 5 yr (per 100Th)']].mean()
avg_Mortality_country

In [None]:
total_Mort = WB_Mortality_df.groupby("Year Group")["Mortality Rate < 5 yr (per 100Th)"].mean()
total_Mort

In [None]:
education_search = key_word(WB_metadata_df, 'name', 'Educational', 'series_id')

In [None]:
education_search

In [None]:
len(education_search)

In [None]:
education_search2= WB_metadata_df[WB_metadata_df['series_id'].isin(education_search)]

In [None]:
education_search2

In [None]:
WB_education_df = WB_africadata_2000_df2.loc[WB_africadata_2000_df2['series_id'].isin(["SE.SEC.CUAT.LO.ZS", "SE.SEC.CUAT.UP.ZS", "SE.TER.CUAT.MS.ZS" ])]
WB_education_df

In [None]:
WB_education_df = WB_education_df.rename(columns = {'value': 'Educational Attainment (% Pop.> 25yrs)'})
WB_education_df

In [None]:
education_counts = WB_education_df['country_name'].value_counts()

print("Counts of entries for each country name:")
print(education_counts)

In [None]:
WB_education_country_groups = WB_education_df.groupby(['Year Group','country_name'])
WB_education_country_groups.head()

In [None]:
avg_education = WB_education_country_groups[['Educational Attainment (% Pop.> 25yrs)']].mean()
avg_education

In [None]:
avg_education['Educational Attainment (% Pop.> 25yrs)']

In [None]:
result = WB_education_df[WB_education_df['country_name'].str.contains('Senegal')]
result

In [None]:
corruption_search = key_word(WB_metadata_df, 'name', 'corruption', 'series_id')

In [None]:
len(corruption_search)

In [None]:
gdp_search = key_word(WB_metadata_df, 'name', 'GDP', 'series_id')

In [None]:
len(gdp_search)

In [None]:
sumPOV = totalPov

In [None]:
# example for mapping
file_df["INCOME"] = file_df["INCOME"].map("${:,.2f}".format)
file_df["COSTS"] = file_df["COSTS"].map("${:,.2f}".format)
file_df["PERCENT30"] = (file_df["PERCENT30"]*100).map("{:.1f}%".format)
file_df["PERCENT3050"] = (file_df["PERCENT3050"]*100).map("{:.1f}%".format)
file_df["PERCENT50"] = (file_df["PERCENT50"]*100).map("{:.1f}%".format)
file_df["PERCENT_NODATA"] = (file_df["PERCENT_NODATA"]*100).map("{:.1f}%".format)
file_df["PERCENT_NOBURDEN"] = (file_df["PERCENT_NOBURDEN"]*100).map("{:.1f}%".format)
file_df["TOTAL"] = file_df["TOTAL"].map("{:,}".format)
file_df.head()