# World Bank Data
Nasdaq API dowload - .csv files

In [1]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path
import requests
import matplotlib.pyplot as plt
from scipy.stats import linregress
import numpy as np
import time


# Impor the Geoapify API key
from api_keys import geoapify_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

In [2]:
# WB data set
WB_totaldata = Path("Resources/WB_DATA_d950d0cd269a601150c0afd03b234ee2.csv")

In [3]:
# Read data file with the Pandas library
# encoding?, i.e encoding="ISO-8859-1"
WB_totaldata_df = pd.read_csv(WB_totaldata)

In [4]:
WB_totaldata_df

Unnamed: 0,series_id,country_code,country_name,year,value
0,SH.HIV.INCD.TL,ITA,Italy,2017,3700.000000
1,SH.HIV.INCD.TL,JAM,Jamaica,2017,1500.000000
2,SH.HIV.INCD.TL,JOR,Jordan,2017,100.000000
3,SH.HIV.INCD.TL,KEN,Kenya,2017,34000.000000
4,SH.HIV.INCD.TL,KGZ,Kyrgyz Republic,2017,730.000000
...,...,...,...,...,...
9087997,IT.NET.USER.ZS,THA,Thailand,2022,87.977281
9087998,IT.NET.USER.ZS,TUR,Turkiye,2022,83.437166
9087999,IT.NET.USER.ZS,UMC,Upper middle income,2022,76.384692
9088000,IT.NET.USER.ZS,URY,Uruguay,2022,89.873043


# World Bank Data Source Break-down
###The file name WB_totaldata_df is the dataframe of the '.csv' original file from the Nasdaq API. It includes the Foreign Aid data for developing countries from 1960 to 2022.

###The 'WB_totaldata_df' dataframe was reduced to include only the countries of the African continent and saved to a '.csv' for further use : 'WB_africadata_df' and 'WB_africadata.csv'.

###The 'WB_africadata_df' dataframe was further reduced to include only the data from the years 2000 through 2022 and saved to a '.csv' file for further use : 'WB_africadata_2000_df' and 'WB_africadata_2000.csv'

###To help with the anlysis of the data, the data was put in bins of 5 years from 2000 to 2020, and a last bin for 2021 to 2022.

In [5]:
african_countries = ["Angola", "Botswana", "Burkina Faso", "Central African Republic", "Chad", "Djibouti", "Egypt, Arab Rep.", "Ethiopia", "Gabon", "Ghana", "Guinea" "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Madagascar", "Malawi", "Mauritania", "Mozambique", "Nigeria", "Rwanda", "Samoa", "Sao Tome and Principe", "Senegal", "Sierra Leone", "Somalia", "Sudan", "Tanzania", "Togo", "Tunisia", "Uganda", "Zambia", "Zimbabwe"]

In [6]:
WB_africadata_df = WB_totaldata_df[WB_totaldata_df["country_name"].isin(african_countries)]

In [7]:
WB_africadata_df

Unnamed: 0,series_id,country_code,country_name,year,value
3,SH.HIV.INCD.TL,KEN,Kenya,2017,34000.000000
7,SH.HIV.INCD.TL,LBR,Liberia,2017,1600.000000
9,SH.HIV.INCD.TL,LSO,Lesotho,2017,10000.000000
15,SH.HIV.INCD.TL,MDG,Madagascar,2017,7200.000000
24,SH.HIV.INCD.TL,MOZ,Mozambique,2017,130000.000000
...,...,...,...,...,...
9087966,SP.POP.DPND.YG,ZMB,Zambia,1976,104.738757
9087967,SP.POP.DPND.YG,TUN,Tunisia,1982,75.528362
9087968,SP.POP.DPND.YG,UGA,Uganda,1996,104.408663
9087969,SP.POP.DPND.YG,SEN,Senegal,1998,85.386254


In [8]:
# Save the DataFrame as a CSV
# Note: To avoid any issues later, use encoding="utf-8"
WB_africadata_df.to_csv("Resources\WB_africadata.csv", encoding="utf-8", index=False)

In [9]:
WB_africadata_2000_df = WB_africadata_df.loc[WB_africadata_df['year'] >= 2000]
WB_africadata_2000_df

Unnamed: 0,series_id,country_code,country_name,year,value
3,SH.HIV.INCD.TL,KEN,Kenya,2017,3.400000e+04
7,SH.HIV.INCD.TL,LBR,Liberia,2017,1.600000e+03
9,SH.HIV.INCD.TL,LSO,Lesotho,2017,1.000000e+04
15,SH.HIV.INCD.TL,MDG,Madagascar,2017,7.200000e+03
24,SH.HIV.INCD.TL,MOZ,Mozambique,2017,1.300000e+05
...,...,...,...,...,...
9087901,VA.NO.SRC,WSM,Samoa,2006,3.000000e+00
9087948,FM.LBL.BMNY.GD.ZS,TCD,Chad,2010,1.146678e+01
9087956,NE.CON.PRVT.CN.AD,SDN,Sudan,2000,2.199464e+10
9087958,NE.CON.PRVT.CN.AD,SDN,Sudan,2003,3.190503e+10


In [10]:
# Save the DataFrame as a CSV
# Note: To avoid any issues later, use encoding="utf-8"
WB_africadata_2000_df.to_csv("Resources\WB_africadata_2000.csv", encoding="utf-8", index=False)

In [11]:
WB_africadata_2000_df[["year"]].min()

year    2000
dtype: int64

In [12]:
WB_africadata_2000_df[["year"]].max()

year    2023
dtype: int64

In [13]:
WB_africadata_2000_df2 = WB_africadata_2000_df

In [14]:
# Create bins in which to place values based upon IMDB vote count
bins = [0, 2005, 2010, 2015, 2020, 2023]

# Create labels for these bins
group_labels = ["2000 - 2005", "2006 - 2010", "2011 - 2015", "2016 - 2020", "2021 - 2023"]

In [15]:
# Slice the data and place it into bins
pd.cut(WB_africadata_2000_df2["year"], bins, labels=group_labels)

3          2016 - 2020
7          2016 - 2020
9          2016 - 2020
15         2016 - 2020
24         2016 - 2020
              ...     
9087901    2006 - 2010
9087948    2006 - 2010
9087956    2000 - 2005
9087958    2000 - 2005
9087972    2011 - 2015
Name: year, Length: 652162, dtype: category
Categories (5, object): ['2000 - 2005' < '2006 - 2010' < '2011 - 2015' < '2016 - 2020' < '2021 - 2023']

In [16]:
WB_africadata_2000_df2["Year Group"] = pd.cut(WB_africadata_2000_df2["year"], bins, labels=group_labels)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  WB_africadata_2000_df2["Year Group"] = pd.cut(WB_africadata_2000_df2["year"], bins, labels=group_labels)


In [17]:
WB_africadata_2000_df2

Unnamed: 0,series_id,country_code,country_name,year,value,Year Group
3,SH.HIV.INCD.TL,KEN,Kenya,2017,3.400000e+04,2016 - 2020
7,SH.HIV.INCD.TL,LBR,Liberia,2017,1.600000e+03,2016 - 2020
9,SH.HIV.INCD.TL,LSO,Lesotho,2017,1.000000e+04,2016 - 2020
15,SH.HIV.INCD.TL,MDG,Madagascar,2017,7.200000e+03,2016 - 2020
24,SH.HIV.INCD.TL,MOZ,Mozambique,2017,1.300000e+05,2016 - 2020
...,...,...,...,...,...,...
9087901,VA.NO.SRC,WSM,Samoa,2006,3.000000e+00,2006 - 2010
9087948,FM.LBL.BMNY.GD.ZS,TCD,Chad,2010,1.146678e+01,2006 - 2010
9087956,NE.CON.PRVT.CN.AD,SDN,Sudan,2000,2.199464e+10,2000 - 2005
9087958,NE.CON.PRVT.CN.AD,SDN,Sudan,2003,3.190503e+10,2000 - 2005


In [18]:
country_counts = WB_africadata_2000_df2['country_name'].value_counts()

print("Counts of entries for each country name:")
print(country_counts)

Counts of entries for each country name:
Ghana                       23636
Egypt, Arab Rep.            23602
Senegal                     23478
Tanzania                    23326
Madagascar                  23133
Tunisia                     23069
Burkina Faso                22825
Togo                        22793
Kenya                       22709
Uganda                      22644
Mozambique                  22352
Rwanda                      22281
Malawi                      22053
Botswana                    22013
Lesotho                     21918
Nigeria                     21857
Ethiopia                    21735
Zambia                      21589
Angola                      21552
Sierra Leone                20680
Sudan                       20533
Zimbabwe                    20436
Mauritania                  20087
Gabon                       19102
Liberia                     19072
Chad                        18744
Samoa                       18738
Central African Republic    18315
Djibout

In [19]:
# Save the DataFrame as a CSV
# Note: To avoid any issues later, use encoding="utf-8"
WB_africadata_2000_df2.to_csv("Resources\WB_africadata_2000_bins.csv", encoding="utf-8", index=False)

# Slicing of the Data
###In order to analyze the data and to understand the impact of the Foreign Aid with regards to poverty rate, literacy and mortality, the data was sliced by the "series_id". The deifinition of the diferent "series_id" values (indicators) is described in a second file downloaded from the Nasdaq API and saved as 'WB_metadata_df'.

###To facilitate/expedite the search and filtering of the relevant a function was created to search the text strings : "key_word". This function is case sensitive and it's used is combined with the review of the search output selection to identify the indicators that best match our work objective.

In [20]:
# WB id series definition data set
WB_metadata = Path("Resources/WB_METADATA_f7ce7fba293ccc6eb39cdf15fb097982.csv")

In [21]:
# Read data file with the Pandas library
WB_metadata_df = pd.read_csv(WB_metadata)

In [22]:
WB_metadata_df

Unnamed: 0,series_id,name,description
0,DC.DAC.DEUL.CD,"Net bilateral aid flows from DAC donors, Germa...",Net bilateral aid flows from DAC donors are th...
1,RQ.STD.ERR,Regulatory Quality: Standard Error,Regulatory Quality captures perceptions of the...
2,EG.USE.PCAP.KG.OE,Energy use (kg of oil equivalent per capita),Energy use refers to use of primary energy bef...
3,EN.POP.EL5M.UR.ZS,Urban population living in areas where elevati...,Urban population below 5m is the percentage of...
4,per_lm_alllm.cov_q1_tot,Coverage of unemployment benefits and ALMP in ...,Coverage of unemployment benefits and active l...
...,...,...,...
1479,SL.TLF.BASC.FE.ZS,"Labor force with basic education, female (% of...",The ratio of the labor force with basic educat...
1480,SL.GDP.PCAP.EM.KD,GDP per person employed (constant 2021 PPP $),GDP per person employed is gross domestic prod...
1481,SL.TLF.INTM.MA.ZS,"Labor force with intermediate education, male ...",The ratio of the labor force with intermediate...
1482,SL.UEM.NEET.FE.ZS,"Share of youth not in education, employment or...","Share of youth not in education, employment or..."


In [23]:
# Save the DataFrame as a CSV
# Note: To avoid any issues later, use encoding="utf-8"
WB_metadata_df.to_csv("Resources\WB_metadata.csv", encoding="utf-8", index=False)

In [24]:
def key_word(df, search_col, word_txt, result_col):
    i=0   
    result_ls = []
    for item in df[search_col]:
        text_ls = [x.strip() for x in df[search_col][i].split()]
        if word_txt in text_ls:
            result_ls.append(df[result_col][i])
        i+=1
    if i >= df[search_col].count():
        if len(result_ls) > 0:
            return result_ls
        else:
            return print("Keyword Not found")