# Africa Economies Data Analysis
Understanding African economies buy visualising GDP, GDP per capita, population, and growth trends in order to shed light on which economies are best for businesses to take a bet on purely from the status of the economy. The data for this analysis has been sourced from [the World Bank](data.worldbank.org). Additional details on the data source and the light cleaning done prior can be found on the `README.md` file.

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## Data Exporation

### Importing Sourced Data

In [6]:
## Import population data
population = pd.read_csv("data/population.csv")
population = population.drop(['Unnamed: 65'], axis=1)
population.head()

Unnamed: 0,Country,Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Aruba,ABW,54608.0,55811.0,56682.0,57475.0,58178.0,58782.0,59291.0,59522.0,...,102880.0,103594.0,104257.0,104874.0,105439.0,105962.0,106442.0,106585.0,106537.0,106445.0
1,Africa Eastern and Southern,AFE,130692579.0,134169237.0,137835590.0,141630546.0,145605995.0,149742351.0,153955516.0,158313235.0,...,567892149.0,583651101.0,600008424.0,616377605.0,632746570.0,649757148.0,667242986.0,685112979.0,702977106.0,720859132.0
2,Afghanistan,AFG,8622466.0,8790140.0,8969047.0,9157465.0,9355514.0,9565147.0,9783147.0,10010030.0,...,31541209.0,32716210.0,33753499.0,34636207.0,35643418.0,36686784.0,37769499.0,38972230.0,40099462.0,41128771.0
3,Africa Western and Central,AFW,97256290.0,99314028.0,101445032.0,103667517.0,105959979.0,108336203.0,110798486.0,113319950.0,...,387204553.0,397855507.0,408690375.0,419778384.0,431138704.0,442646825.0,454306063.0,466189102.0,478185907.0,490330870.0
4,Angola,AGO,5357195.0,5441333.0,5521400.0,5599827.0,5673199.0,5736582.0,5787044.0,5827503.0,...,26147002.0,27128337.0,28127721.0,29154746.0,30208628.0,31273533.0,32353588.0,33428486.0,34503774.0,35588987.0


In [10]:
## Import GDP data
gdp = pd.read_csv('data/gdp.csv')
gdp = gdp.drop(['Unnamed: 65'], axis=1)
gdp.head()

Unnamed: 0,Country,Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Aruba,ABW,,,,,,,,,...,2727850000.0,2790850000.0,2962907000.0,2983635000.0,3092429000.0,3276184000.0,3395799000.0,2558906000.0,3103184000.0,3544708000.0
1,Africa Eastern and Southern,AFE,18478100000.0,19366310000.0,20506470000.0,22242730000.0,24294330000.0,26619560000.0,28732790000.0,31592960000.0,...,986343000000.0,1006990000000.0,932513000000.0,890051000000.0,1028390000000.0,1012520000000.0,1006190000000.0,928880000000.0,1086530000000.0,1185140000000.0
2,Afghanistan,AFG,537777800.0,548888900.0,546666700.0,751111200.0,800000000.0,1006667000.0,1400000000.0,1673333000.0,...,20146420000.0,20497130000.0,19134220000.0,18116570000.0,18753460000.0,18053220000.0,18799440000.0,19955930000.0,14266500000.0,
3,Africa Western and Central,AFW,10411650000.0,11135920000.0,11951710000.0,12685810000.0,13849000000.0,14874760000.0,15845580000.0,14428490000.0,...,834097000000.0,894505000000.0,769263000000.0,692115000000.0,685630000000.0,768158000000.0,823406000000.0,786962000000.0,844928000000.0,875394000000.0
4,Angola,AGO,,,,,,,,,...,132339000000.0,135967000000.0,90496420000.0,52761620000.0,73690160000.0,79450690000.0,70897960000.0,48501560000.0,66505130000.0,106783000000.0


In [11]:
## Import Country metadata
meta = pd.read_csv('data/metadata.csv')
meta.head()

Unnamed: 0,Code,UNRegion,IncomeGroup,Region,Continent
0,ABW,Latin America & Caribbean,High income,,
1,AFE,,,,
2,AFG,South Asia,Low income,,
3,AFW,,,,
4,AGO,Sub-Saharan Africa,Lower middle income,Central Africa,Africa


### Aggregating the Relevant Data

In [57]:
## Create a base dataframe filtered to African countries only
data = meta[meta['Continent'] == 'Africa']
data = data.drop(['UNRegion','Continent'], axis=1)
print(data.shape)
data.head()

(54, 3)


Unnamed: 0,Code,IncomeGroup,Region
4,AGO,Lower middle income,Central Africa
16,BDI,Low income,Eastern Africa
18,BEN,Lower middle income,West Africa
19,BFA,Low income,West Africa
33,BWA,Upper middle income,Southern Africa


In [27]:
## Create a column list filter for our target years (Past ten years)
start, end = 2013, 2022
year_cols = list()
for i in range(start,end+1):
    year_cols.append(str(i))

print(year_cols)

['2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022']


In [34]:
## Get mean GDP and average GDP growth rate our target years
gdp['mean_past_10yrs'] = gdp[year_cols].mean(axis=1) #Mean GDP for the past ten years
gdp['annual_growth_rate'] = (((gdp['2022']/gdp['2013'])**0.1)-1)*100 #Compounded annual gdp growth rate
gdp.describe()

Unnamed: 0,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2015,2016,2017,2018,2019,2020,2021,2022,mean_past_10yrs,annual_growth_rate
count,126.0,127.0,128.0,128.0,128.0,140.0,143.0,146.0,151.0,151.0,...,258.0,258.0,258.0,258.0,258.0,257.0,255.0,242.0,261.0,242.0
mean,72953400000.0,74860950000.0,78663250000.0,85180030000.0,94043100000.0,94294720000.0,104470700000.0,108743900000.0,113692200000.0,125538400000.0,...,2404723000000.0,2437760000000.0,2613242000000.0,2776038000000.0,2817910000000.0,2741965000000.0,3169612000000.0,3477949000000.0,2693374000000.0,2.508984
std,218674500000.0,228279800000.0,243862600000.0,262817400000.0,287730200000.0,300463000000.0,328223800000.0,347479100000.0,369622900000.0,406642600000.0,...,8172392000000.0,8309606000000.0,8845330000000.0,9414854000000.0,9553275000000.0,9325109000000.0,10675620000000.0,11317930000000.0,9173017000000.0,2.612216
min,12012020.0,11592020.0,9122751.0,10840100.0,12712470.0,13593930.0,14469080.0,15835110.0,14600000.0,15850000.0,...,36811940.0,41629060.0,45276600.0,48015260.0,54123200.0,51746590.0,60196410.0,59065980.0,47424190.0,-7.341342
25%,515168300.0,511142400.0,518597700.0,515086400.0,541579600.0,519933700.0,589421700.0,519031000.0,560538000.0,598861100.0,...,8752203000.0,8620984000.0,9319389000.0,10028040000.0,10355160000.0,9754600000.0,10562930000.0,13799430000.0,9739317000.0,0.909612
50%,2661058000.0,2417629000.0,2319980000.0,2680117000.0,2853831000.0,2913602000.0,3039859000.0,3145743000.0,3330372000.0,3787077000.0,...,49667670000.0,49843250000.0,54000090000.0,56097190000.0,60090990000.0,53734530000.0,64960730000.0,75067950000.0,57658590000.0,2.331409
75%,23980130000.0,23651800000.0,22892220000.0,24282850000.0,26252110000.0,25090670000.0,27934160000.0,28993890000.0,32094820000.0,36361040000.0,...,502084800000.0,505757000000.0,536924500000.0,552416000000.0,541949200000.0,547054000000.0,660530500000.0,899187000000.0,535160600000.0,3.825599
max,1381140000000.0,1446360000000.0,1546370000000.0,1670670000000.0,1832620000000.0,1994520000000.0,2161640000000.0,2308600000000.0,2491800000000.0,2745220000000.0,...,75283300000000.0,76519000000000.0,81484100000000.0,86542700000000.0,87777400000000.0,85272700000000.0,97153200000000.0,100880000000000.0,84846430000000.0,13.447604


In [35]:
## Get mean population and average population growth rate our target years
population['mean_past_10yrs'] = population[year_cols].mean(axis=1) #Mean population for the past ten years
population['annual_growth_rate'] = (((population['2022']/population['2013'])**0.1)-1)*100 #Compounded annual population growth rate
population.describe()

Unnamed: 0,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2015,2016,2017,2018,2019,2020,2021,2022,mean_past_10yrs,annual_growth_rate
count,264.0,264.0,264.0,264.0,264.0,264.0,264.0,264.0,264.0,264.0,...,265.0,265.0,265.0,265.0,265.0,265.0,265.0,265.0,265.0,265.0
mean,117271200.0,118880700.0,121051100.0,123733300.0,126437800.0,129181300.0,132040400.0,134898000.0,137835800.0,140878900.0,...,300481500.0,304326300.0,308177000.0,311964100.0,315670100.0,319259500.0,322522100.0,325473300.0,309726800.0,1.051933
std,369543900.0,374089700.0,380806100.0,389503900.0,398243900.0,407115300.0,416450400.0,425742400.0,435321800.0,445292700.0,...,941252200.0,952423800.0,963604100.0,974488400.0,985040500.0,995122500.0,1004225000.0,1012163000.0,967633300.0,1.050134
min,2646.0,2888.0,3171.0,3481.0,3811.0,4161.0,4531.0,4930.0,5354.0,5646.0,...,10877.0,10852.0,10828.0,10865.0,10956.0,11069.0,11204.0,11312.0,10978.0,-2.091314
25%,513221.2,523134.5,533759.5,544928.8,556663.0,565115.0,569147.0,577387.2,583270.0,587594.2,...,1788196.0,1777557.0,1791003.0,1797085.0,1788878.0,1790133.0,1786038.0,1761985.0,1791176.0,0.29087
50%,3757486.0,3887144.0,4023896.0,4139356.0,4224612.0,4277636.0,4331825.0,4385700.0,4450934.0,4530800.0,...,10358080.0,10325450.0,10300300.0,10395330.0,10447670.0,10606230.0,10505770.0,10486940.0,10348090.0,1.031831
75%,26706060.0,27486940.0,28302890.0,29147080.0,30016840.0,30848920.0,31630100.0,32092470.0,32499270.0,32771490.0,...,60730580.0,60627500.0,60536710.0,60421760.0,59872580.0,61704520.0,63588330.0,65497750.0,60058120.0,1.792657
max,3031474000.0,3072422000.0,3126850000.0,3193429000.0,3260442000.0,3328209000.0,3398480000.0,3468371000.0,3540164000.0,3614573000.0,...,7403850000.0,7490415000.0,7576442000.0,7660371000.0,7741775000.0,7820206000.0,7888306000.0,7950947000.0,7607865000.0,3.90434


In [58]:
## Aggregate all needed African data into one dataframe
data = pd.merge(data, gdp[['Code', 'Country','mean_past_10yrs','annual_growth_rate']], how='inner', on=['Code'])
data = pd.merge(data, population[['Code','mean_past_10yrs','annual_growth_rate']], how='inner', on=['Code'])

data.rename(columns = {'mean_past_10yrs_x':'MeanGDP','annual_growth_rate_x':'GDPGrowth','mean_past_10yrs_y':'MeanPopulation','annual_growth_rate_y':'PopulationGrowth'}, inplace = True)
data.head()

Unnamed: 0,Code,IncomeGroup,Region,Country,MeanGDP,GDPGrowth,MeanPopulation,PopulationGrowth
0,AGO,Lower middle income,Central Africa,Angola,85739250000.0,-2.122825,30791480.2,3.131034
1,BDI,Low income,Eastern Africa,Burundi,2763737000.0,3.136635,11445988.4,2.418655
2,BEN,Lower middle income,West Africa,Benin,14113750000.0,3.346069,11793723.0,2.621162
3,BFA,Low income,West Africa,Burkina Faso,15427040000.0,3.420895,20127705.8,2.544257
4,BWA,Upper middle income,Southern Africa,Botswana,16227110000.0,3.61451,2425331.3,1.722833


### Data Summary

In [59]:
## Add data on GDP per capita
data['GDPperCapita'] = data['MeanGDP']/data['MeanPopulation']
print(data.shape)

(54, 9)


In [60]:
## Check for missing data and column data types
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Code              54 non-null     object 
 1   IncomeGroup       54 non-null     object 
 2   Region            54 non-null     object 
 3   Country           54 non-null     object 
 4   MeanGDP           53 non-null     float64
 5   GDPGrowth         52 non-null     float64
 6   MeanPopulation    54 non-null     float64
 7   PopulationGrowth  54 non-null     float64
 8   GDPperCapita      53 non-null     float64
dtypes: float64(5), object(4)
memory usage: 3.9+ KB


In [61]:
## Explore the data for the observations with missing values
data[data['GDPGrowth'].isnull()==True]

Unnamed: 0,Code,IncomeGroup,Region,Country,MeanGDP,GDPGrowth,MeanPopulation,PopulationGrowth,GDPperCapita
15,ERI,Low income,Eastern Africa,Eritrea,,,3452642.2,1.118073,
42,SSD,Low income,Eastern Africa,South Sudan,14795490000.0,,10834860.3,-0.175032,1365.545452


In [69]:
## Let us query the original GDP data for South Sudan to understand these missing entries better
gdp[year_cols][gdp.Code == 'SSD']

Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
216,18426470000.0,13962210000.0,11997800000.0,,,,,,,


In [70]:
## Let us query the original GDP data for Eritrea to understand these missing entries better
gdp[year_cols][gdp.Code == 'ERI']

Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
69,,,,,,,,,,


From exploring the missing values and the original dataset, we can deduce:
- There is no GDP data on Eritrea on the original dataset for all our target years. We'll drop this observation for our analysis.
- South Sudan GDP data is only available for 2013 to 2015. Assumption: Consistent war in the country makes it difficult to collect data on GDP
- For South Sudan, we'll also drop these entries as extrapolating the data yet won't be accurate since war would affect the trend established in the 1st 3 years

In [71]:
## African economies summary statistics
data = data.dropna() #Will drop the countries with missing values
data.describe()

Unnamed: 0,MeanGDP,GDPGrowth,MeanPopulation,PopulationGrowth,GDPperCapita
count,52.0,52.0,52.0,52.0,52.0
mean,48767090000.0,2.458059,24327690.0,2.132633,2638.075177
std,93346650000.0,3.244742,34753720.0,0.725302,3002.760897
min,376944300.0,-5.836174,97723.2,0.028527,241.459017
25%,6270590000.0,0.408307,2422476.0,1.712894,762.648557
50%,14153890000.0,2.844298,13668790.0,2.273091,1516.580287
75%,47892920000.0,4.676385,29406340.0,2.635191,3324.332261
max,460964200000.0,10.281174,196222600.0,3.458938,14719.563821


In [72]:
## Explore summary statistics for all the 5 African regions
data.groupby(by='Region').describe()

Unnamed: 0_level_0,MeanGDP,MeanGDP,MeanGDP,MeanGDP,MeanGDP,MeanGDP,MeanGDP,MeanGDP,GDPGrowth,GDPGrowth,...,PopulationGrowth,PopulationGrowth,GDPperCapita,GDPperCapita,GDPperCapita,GDPperCapita,GDPperCapita,GDPperCapita,GDPperCapita,GDPperCapita
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Central Africa,9.0,25322470000.0,27103370000.0,376944300.0,11569650000.0,14194020000.0,38129390000.0,85739250000.0,9.0,1.435578,...,3.019424,3.131034,9.0,3082.109452,3292.35116,408.739088,751.220122,1794.872836,2784.51224,9446.693795
Eastern Africa,9.0,32165550000.0,35770520000.0,1438443000.0,2813783000.0,9707492000.0,57658590000.0,87329230000.0,9.0,5.375964,...,2.913966,3.192091,9.0,2595.840893,4604.931219,241.459017,773.322177,843.601389,1766.932058,14719.563821
North Africa,7.0,115844400000.0,112208200000.0,7581382000.0,51409560000.0,63809180000.0,149815200000.0,337070700000.0,7.0,0.136228,...,2.062124,2.67728,7.0,3874.546986,2469.55565,1543.427531,2540.931336,3435.157678,4037.291733,8986.797552
Southern Africa,12.0,42877520000.0,106163800000.0,1146958000.0,8724110000.0,13037890000.0,17918310000.0,379210100000.0,12.0,1.481778,...,2.336768,2.76783,12.0,3290.239357,3185.019024,497.48818,934.804157,1517.041424,5482.948424,10183.135453
West Africa,15.0,46203680000.0,116288000000.0,1332652000.0,3802477000.0,12208450000.0,19144470000.0,460964200000.0,15.0,3.185349,...,2.496444,3.458938,15.0,1298.24366,877.570057,544.470004,697.690673,865.760281,1775.811888,3555.280915
