In [2]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np
from config import (ServerName, UserName, Password, port, DataBase)

### Store CSV into DataFrame

In [3]:
csv_file = "../Resources/economic.csv"
economic_df = pd.read_csv(csv_file)
economic_df.head()

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,2000 [YR2000],2005 [YR2005],2010 [YR2010],2015 [YR2015],2016 [YR2016]
0,Afghanistan,AFG,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,..,11.1752702416587,8.43329048161792,1.31004040372002,2.36671191565063
1,Afghanistan,AFG,GDP per capita (constant 2005 US$),NY.GDP.PCAP.KD,..,389.416357791097,553.300289383064,620.05652498092,617.889972323552
2,Afghanistan,AFG,"Inflation, consumer prices (annual %)",FP.CPI.TOTL.ZG,..,12.68626872,0.892536936,-1.533846583,2.169452176
3,Albania,ALB,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,6.66662049881369,5.72081994678555,3.71005779297435,2.2187520539882,3.35215921082479
4,Albania,ALB,GDP per capita (constant 2005 US$),NY.GDP.PCAP.KD,2337.9423863713,3189.39511117692,4094.35883191918,4524.64443153606,4683.80021257139


## Choose only countries in the List

In [4]:
# Reading the csv file with list of countries
country_list_df = pd.read_csv('../Resources/countries.csv') 
country_list = country_list_df["Countries"].values
# Showing the raw dataframe to be cleanned
# Select only countries in the list
economic_df = economic_df[economic_df["Country Code"].isin(country_list)]
economic_df.head()

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,2000 [YR2000],2005 [YR2005],2010 [YR2010],2015 [YR2015],2016 [YR2016]
0,Afghanistan,AFG,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,..,11.1752702416587,8.43329048161792,1.31004040372002,2.36671191565063
1,Afghanistan,AFG,GDP per capita (constant 2005 US$),NY.GDP.PCAP.KD,..,389.416357791097,553.300289383064,620.05652498092,617.889972323552
2,Afghanistan,AFG,"Inflation, consumer prices (annual %)",FP.CPI.TOTL.ZG,..,12.68626872,0.892536936,-1.533846583,2.169452176
3,Albania,ALB,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,6.66662049881369,5.72081994678555,3.71005779297435,2.2187520539882,3.35215921082479
4,Albania,ALB,GDP per capita (constant 2005 US$),NY.GDP.PCAP.KD,2337.9423863713,3189.39511117692,4094.35883191918,4524.64443153606,4683.80021257139


### Create new data with select columns

In [5]:
new_economic_df = economic_df[['Country Name', 'Series Name', '2000 [YR2000]', '2005 [YR2005]', '2010 [YR2010]', '2015 [YR2015]', '2016 [YR2016]']].copy()
new_economic_df.head()

Unnamed: 0,Country Name,Series Name,2000 [YR2000],2005 [YR2005],2010 [YR2010],2015 [YR2015],2016 [YR2016]
0,Afghanistan,GDP growth (annual %),..,11.1752702416587,8.43329048161792,1.31004040372002,2.36671191565063
1,Afghanistan,GDP per capita (constant 2005 US$),..,389.416357791097,553.300289383064,620.05652498092,617.889972323552
2,Afghanistan,"Inflation, consumer prices (annual %)",..,12.68626872,0.892536936,-1.533846583,2.169452176
3,Albania,GDP growth (annual %),6.66662049881369,5.72081994678555,3.71005779297435,2.2187520539882,3.35215921082479
4,Albania,GDP per capita (constant 2005 US$),2337.9423863713,3189.39511117692,4094.35883191918,4524.64443153606,4683.80021257139


### Replace Countries with equivalents

In [6]:
csv_file = "../Resources/countries_problems_equivalence.csv"
count_equiv = pd.read_csv(csv_file)
count_equiv = count_equiv.dropna()
count_equiv = count_equiv[count_equiv["Country"] != count_equiv["Equivalence"]]
count_equiv = count_equiv.set_index('Country')
dict = count_equiv.to_dict()
dict = dict["Equivalence"]
# dict = {"Afghanistan": "cesar"}
new_economic_df = new_economic_df.replace({"Country": dict})
new_economic_df

Unnamed: 0,Country Name,Series Name,2000 [YR2000],2005 [YR2005],2010 [YR2010],2015 [YR2015],2016 [YR2016]
0,Afghanistan,GDP growth (annual %),..,11.1752702416587,8.43329048161792,1.31004040372002,2.36671191565063
1,Afghanistan,GDP per capita (constant 2005 US$),..,389.416357791097,553.300289383064,620.05652498092,617.889972323552
2,Afghanistan,"Inflation, consumer prices (annual %)",..,12.68626872,0.892536936,-1.533846583,2.169452176
3,Albania,GDP growth (annual %),6.66662049881369,5.72081994678555,3.71005779297435,2.2187520539882,3.35215921082479
4,Albania,GDP per capita (constant 2005 US$),2337.9423863713,3189.39511117692,4094.35883191918,4524.64443153606,4683.80021257139
...,...,...,...,...,...,...,...
721,Zambia,GDP per capita (constant 2005 US$),938.351156683685,1107.72817478721,1463.21357310715,1618.45766533614,1629.59030492169
722,Zambia,"Inflation, consumer prices (annual %)",26.03041179,18.3244397,8.501761334,10.11059289,17.86996005
723,Zimbabwe,GDP growth (annual %),-3.05918962553469,-5.7110838330468,12.5810269716888,1.68959459207842,0.615713754356918
724,Zimbabwe,GDP per capita (constant 2005 US$),1261.1630883256,811.562954376746,719.9795162923,933.50326674854,917.563719684227


### Rename Columns

In [7]:
new_economic_df = new_economic_df.rename(columns={"Country Name" : "Country", "2000 [YR2000]": 2000, "2005 [YR2005]": 2005, "2010 [YR2010]": 2010, 
                                                  "2015 [YR2015]": 2015, "2016 [YR2016]": 2016})
new_economic_df 

Unnamed: 0,Country,Series Name,2000,2005,2010,2015,2016
0,Afghanistan,GDP growth (annual %),..,11.1752702416587,8.43329048161792,1.31004040372002,2.36671191565063
1,Afghanistan,GDP per capita (constant 2005 US$),..,389.416357791097,553.300289383064,620.05652498092,617.889972323552
2,Afghanistan,"Inflation, consumer prices (annual %)",..,12.68626872,0.892536936,-1.533846583,2.169452176
3,Albania,GDP growth (annual %),6.66662049881369,5.72081994678555,3.71005779297435,2.2187520539882,3.35215921082479
4,Albania,GDP per capita (constant 2005 US$),2337.9423863713,3189.39511117692,4094.35883191918,4524.64443153606,4683.80021257139
...,...,...,...,...,...,...,...
721,Zambia,GDP per capita (constant 2005 US$),938.351156683685,1107.72817478721,1463.21357310715,1618.45766533614,1629.59030492169
722,Zambia,"Inflation, consumer prices (annual %)",26.03041179,18.3244397,8.501761334,10.11059289,17.86996005
723,Zimbabwe,GDP growth (annual %),-3.05918962553469,-5.7110838330468,12.5810269716888,1.68959459207842,0.615713754356918
724,Zimbabwe,GDP per capita (constant 2005 US$),1261.1630883256,811.562954376746,719.9795162923,933.50326674854,917.563719684227


## Melt Years

In [8]:
new_economic_df = pd.melt(new_economic_df, id_vars=['Country', "Series Name"], value_vars=[2000, 2005, 2010, 2015,2016])
new_economic_df

Unnamed: 0,Country,Series Name,variable,value
0,Afghanistan,GDP growth (annual %),2000,..
1,Afghanistan,GDP per capita (constant 2005 US$),2000,..
2,Afghanistan,"Inflation, consumer prices (annual %)",2000,..
3,Albania,GDP growth (annual %),2000,6.66662049881369
4,Albania,GDP per capita (constant 2005 US$),2000,2337.9423863713
...,...,...,...,...
3250,Zambia,GDP per capita (constant 2005 US$),2016,1629.59030492169
3251,Zambia,"Inflation, consumer prices (annual %)",2016,17.86996005
3252,Zimbabwe,GDP growth (annual %),2016,0.615713754356918
3253,Zimbabwe,GDP per capita (constant 2005 US$),2016,917.563719684227


## Rename Year Column

In [9]:
new_economic_df = new_economic_df.rename(columns={"Country Name": "Country", "variable": "Year"})
new_economic_df

Unnamed: 0,Country,Series Name,Year,value
0,Afghanistan,GDP growth (annual %),2000,..
1,Afghanistan,GDP per capita (constant 2005 US$),2000,..
2,Afghanistan,"Inflation, consumer prices (annual %)",2000,..
3,Albania,GDP growth (annual %),2000,6.66662049881369
4,Albania,GDP per capita (constant 2005 US$),2000,2337.9423863713
...,...,...,...,...
3250,Zambia,GDP per capita (constant 2005 US$),2016,1629.59030492169
3251,Zambia,"Inflation, consumer prices (annual %)",2016,17.86996005
3252,Zimbabwe,GDP growth (annual %),2016,0.615713754356918
3253,Zimbabwe,GDP per capita (constant 2005 US$),2016,917.563719684227


### Replace ".." with Zero (0)

In [10]:
new_economic_df = new_economic_df.replace("..", 0)
new_economic_df

Unnamed: 0,Country,Series Name,Year,value
0,Afghanistan,GDP growth (annual %),2000,0
1,Afghanistan,GDP per capita (constant 2005 US$),2000,0
2,Afghanistan,"Inflation, consumer prices (annual %)",2000,0
3,Albania,GDP growth (annual %),2000,6.66662049881369
4,Albania,GDP per capita (constant 2005 US$),2000,2337.9423863713
...,...,...,...,...
3250,Zambia,GDP per capita (constant 2005 US$),2016,1629.59030492169
3251,Zambia,"Inflation, consumer prices (annual %)",2016,17.86996005
3252,Zimbabwe,GDP growth (annual %),2016,0.615713754356918
3253,Zimbabwe,GDP per capita (constant 2005 US$),2016,917.563719684227


### Reorganize Rows and Columns with Pivot Tables

In [11]:
new_economic_df = pd.pivot_table(new_economic_df, values='value', index=['Country', 'Year'],
                     columns=['Series Name'], aggfunc=np.sum)
new_economic_df

Unnamed: 0_level_0,Series Name,GDP growth (annual %),GDP per capita (constant 2005 US$),"Inflation, consumer prices (annual %)"
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,2000,0,0,0
Afghanistan,2005,11.1752702416587,389.416357791097,12.68626872
Afghanistan,2010,8.43329048161792,553.300289383064,0.892536936
Afghanistan,2015,1.31004040372002,620.05652498092,-1.533846583
Afghanistan,2016,2.36671191565063,617.889972323552,2.169452176
...,...,...,...,...
Zimbabwe,2000,-3.05918962553469,1261.1630883256,55.86645207
Zimbabwe,2005,-5.7110838330468,811.562954376746,302.1169963
Zimbabwe,2010,12.5810269716888,719.9795162923,3.034478884
Zimbabwe,2015,1.68959459207842,933.50326674854,-2.398709959


### Rename Remaining Columns

In [12]:
new_economic_df = new_economic_df.rename(columns={"GDP growth (annual %)": "GDP growth", 
                                                  "GDP per capita (constant 2005 US$)": "GDP per capita",
                                                  "Inflation, consumer prices (annual %)":"Inflation"})
new_economic_df

Unnamed: 0_level_0,Series Name,GDP growth,GDP per capita,Inflation
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,2000,0,0,0
Afghanistan,2005,11.1752702416587,389.416357791097,12.68626872
Afghanistan,2010,8.43329048161792,553.300289383064,0.892536936
Afghanistan,2015,1.31004040372002,620.05652498092,-1.533846583
Afghanistan,2016,2.36671191565063,617.889972323552,2.169452176
...,...,...,...,...
Zimbabwe,2000,-3.05918962553469,1261.1630883256,55.86645207
Zimbabwe,2005,-5.7110838330468,811.562954376746,302.1169963
Zimbabwe,2010,12.5810269716888,719.9795162923,3.034478884
Zimbabwe,2015,1.68959459207842,933.50326674854,-2.398709959


### Create new data with select columns

In [13]:
new_economic_df = new_economic_df[['GDP growth', 'Inflation']].copy()
new_economic_df

Unnamed: 0_level_0,Series Name,GDP growth,Inflation
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,2000,0,0
Afghanistan,2005,11.1752702416587,12.68626872
Afghanistan,2010,8.43329048161792,0.892536936
Afghanistan,2015,1.31004040372002,-1.533846583
Afghanistan,2016,2.36671191565063,2.169452176
...,...,...,...
Zimbabwe,2000,-3.05918962553469,55.86645207
Zimbabwe,2005,-5.7110838330468,302.1169963
Zimbabwe,2010,12.5810269716888,3.034478884
Zimbabwe,2015,1.68959459207842,-2.398709959


## Database Connection

In [14]:
rds_connection_string = f'{UserName}:{Password}@{ServerName}:{port}/{DataBase}'
engine = create_engine(f'postgresql://{rds_connection_string}')

### Consult tables in the Database

In [15]:
engine.table_names()

['Hum_Dev_Ind', 'Economic', 'Suicide']

## Save dataframes in database and query it

In [16]:
df = new_economic_df
table_name = 'Economic'

df.to_sql(name= table_name, con=engine, if_exists='replace', index=True)
query = pd.read_sql_query(f'select * from "{table_name}"', con=engine).head()
query

Unnamed: 0,Country,Year,GDP growth,Inflation
0,Afghanistan,2000,0.0,0.0
1,Afghanistan,2005,11.1752702416587,12.68626872
2,Afghanistan,2010,8.43329048161792,0.892536936
3,Afghanistan,2015,1.31004040372002,-1.533846583
4,Afghanistan,2016,2.36671191565063,2.169452176


In [17]:
# Result for the main notebook
print('All the economic data was uploaded to the database:')
print(query)

All the economic data was uploaded to the database:
       Country  Year        GDP growth     Inflation
0  Afghanistan  2000                 0             0
1  Afghanistan  2005  11.1752702416587   12.68626872
2  Afghanistan  2010  8.43329048161792   0.892536936
3  Afghanistan  2015  1.31004040372002  -1.533846583
4  Afghanistan  2016  2.36671191565063   2.169452176
