In [1]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np
from config import (ServerName, UserName, Password, port, DataBase)

### Store CSV into DataFrame

In [2]:
csv_file = "../Resources/human_dev_ind.csv"
human_dev_ind_df = pd.read_csv(csv_file, header=1, encoding='latin-1')
human_dev_ind_df.head()

Unnamed: 0,HDI Rank,Country,1990,Unnamed: 3,1991,Unnamed: 5,1992,Unnamed: 7,1993,Unnamed: 9,...,2013,Unnamed: 49,2014,Unnamed: 51,2015,Unnamed: 53,2016,Unnamed: 55,2017,Unnamed: 57
0,168.0,Afghanistan,..,..,..,..,..,..,..,..,...,0.487,..,0.491,..,0.493,..,0.494,..,0.498,..
1,68.0,Albania,0.645,..,0.626,..,0.610,..,0.613,..,...,0.771,..,0.773,..,0.776,..,0.782,..,0.785,..
2,85.0,Algeria,0.577,..,0.581,..,0.587,..,0.591,..,...,0.745,..,0.747,..,0.749,..,0.753,..,0.754,..
3,35.0,Andorra,..,..,..,..,..,..,..,..,...,0.85,..,0.853,..,0.854,..,0.856,..,0.858,..
4,147.0,Angola,..,..,..,..,..,..,..,..,...,0.554,..,0.564,..,0.572,..,0.577,..,0.581,..


### Create new data with select columns

In [3]:
new_human_dev_ind_df = human_dev_ind_df[['Country', '2000', '2005', '2010', '2015', '2016']].copy()
new_human_dev_ind_df.head()

Unnamed: 0,Country,2000,2005,2010,2015,2016
0,Afghanistan,..,0.408,0.463,0.493,0.494
1,Albania,0.669,0.704,0.741,0.776,0.782
2,Algeria,0.644,0.692,0.729,0.749,0.753
3,Andorra,0.759,0.819,0.828,0.854,0.856
4,Angola,0.387,0.455,0.52,0.572,0.577


### Rename Columns

In [4]:
new_human_dev_ind_df = new_human_dev_ind_df.rename(columns={'2000': 2000, '2005': 2005, '2010' : 2010,
                                                            '2015': 2015, '2016': 2016})
new_human_dev_ind_df

Unnamed: 0,Country,2000,2005,2010,2015,2016
0,Afghanistan,..,0.408,0.463,0.493,0.494
1,Albania,0.669,0.704,0.741,0.776,0.782
2,Algeria,0.644,0.692,0.729,0.749,0.753
3,Andorra,0.759,0.819,0.828,0.854,0.856
4,Angola,0.387,0.455,0.520,0.572,0.577
...,...,...,...,...,...,...
185,Viet Nam,0.579,0.616,0.654,0.684,0.689
186,Yemen,0.443,0.474,0.498,0.483,0.462
187,Zambia,0.432,0.480,0.544,0.583,0.586
188,Zimbabwe,0.440,0.430,0.467,0.529,0.532


## Melt Years

In [5]:
new_human_dev_ind_df = pd.melt(new_human_dev_ind_df, id_vars=['Country'], value_vars=[2000, 2005, 2010, 2015, 2016])
new_human_dev_ind_df

Unnamed: 0,Country,variable,value
0,Afghanistan,2000,..
1,Albania,2000,0.669
2,Algeria,2000,0.644
3,Andorra,2000,0.759
4,Angola,2000,0.387
...,...,...,...
945,Viet Nam,2016,0.689
946,Yemen,2016,0.462
947,Zambia,2016,0.586
948,Zimbabwe,2016,0.532


### Rename Columns

In [6]:
new_human_dev_ind_df = new_human_dev_ind_df.rename(columns={"variable": "Year", "value": "human_dev_ind"})
new_human_dev_ind_df

Unnamed: 0,Country,Year,human_dev_ind
0,Afghanistan,2000,..
1,Albania,2000,0.669
2,Algeria,2000,0.644
3,Andorra,2000,0.759
4,Angola,2000,0.387
...,...,...,...
945,Viet Nam,2016,0.689
946,Yemen,2016,0.462
947,Zambia,2016,0.586
948,Zimbabwe,2016,0.532


### Drop na

In [7]:
new_human_dev_ind_df = new_human_dev_ind_df.dropna()
new_human_dev_ind_df

Unnamed: 0,Country,Year,human_dev_ind
0,Afghanistan,2000,..
1,Albania,2000,0.669
2,Algeria,2000,0.644
3,Andorra,2000,0.759
4,Angola,2000,0.387
...,...,...,...
944,Venezuela (Bolivarian Republic of),2016,0.766
945,Viet Nam,2016,0.689
946,Yemen,2016,0.462
947,Zambia,2016,0.586


## Order by Country Year

In [8]:
new_human_dev_ind_df = new_human_dev_ind_df.sort_values(by=['Country', 'Year'])
new_human_dev_ind_df

Unnamed: 0,Country,Year,human_dev_ind
0,Afghanistan,2000,..
190,Afghanistan,2005,0.408
380,Afghanistan,2010,0.463
570,Afghanistan,2015,0.493
760,Afghanistan,2016,0.494
...,...,...,...
188,Zimbabwe,2000,0.440
378,Zimbabwe,2005,0.430
568,Zimbabwe,2010,0.467
758,Zimbabwe,2015,0.529


### Eliminate first `Empty` caracter of Country 

In [9]:
new_human_dev_ind_df['Country'] = new_human_dev_ind_df['Country'].str[1:]
new_human_dev_ind_df

Unnamed: 0,Country,Year,human_dev_ind
0,Afghanistan,2000,..
190,Afghanistan,2005,0.408
380,Afghanistan,2010,0.463
570,Afghanistan,2015,0.493
760,Afghanistan,2016,0.494
...,...,...,...
188,Zimbabwe,2000,0.440
378,Zimbabwe,2005,0.430
568,Zimbabwe,2010,0.467
758,Zimbabwe,2015,0.529


### Replace Countries with Problems¶

In [10]:
csv_file = "../Resources/countries_problems_equivalence.csv"
count_equiv = pd.read_csv(csv_file)
count_equiv = count_equiv.dropna()
count_equiv = count_equiv[count_equiv["Country"] != count_equiv["Equivalence"]]
count_equiv = count_equiv.set_index('Country')
dict = count_equiv.to_dict()
dict = dict["Equivalence"]
# dict = {"Afghanistan": "cesar"}
new_human_dev_ind_df = new_human_dev_ind_df.replace({"Country": dict})
new_human_dev_ind_df

Unnamed: 0,Country,Year,human_dev_ind
0,Afghanistan,2000,..
190,Afghanistan,2005,0.408
380,Afghanistan,2010,0.463
570,Afghanistan,2015,0.493
760,Afghanistan,2016,0.494
...,...,...,...
188,Zimbabwe,2000,0.440
378,Zimbabwe,2005,0.430
568,Zimbabwe,2010,0.467
758,Zimbabwe,2015,0.529


### Drop Index

In [11]:
new_human_dev_ind_df.reset_index(drop=True)
new_human_dev_ind_df

Unnamed: 0,Country,Year,human_dev_ind
0,Afghanistan,2000,..
190,Afghanistan,2005,0.408
380,Afghanistan,2010,0.463
570,Afghanistan,2015,0.493
760,Afghanistan,2016,0.494
...,...,...,...
188,Zimbabwe,2000,0.440
378,Zimbabwe,2005,0.430
568,Zimbabwe,2010,0.467
758,Zimbabwe,2015,0.529


## Database Connection

In [12]:
rds_connection_string = f'{UserName}:{Password}@{ServerName}:{port}/{DataBase}'
engine = create_engine(f'postgresql://{rds_connection_string}')

### Consult tables in the Database

In [13]:
engine.table_names()

['Hum_Dev_Ind', 'Economic', 'Suicide']

## Save dataframes in database and query it

In [14]:
df = new_human_dev_ind_df
table_name = 'Hum_Dev_Ind'

df.to_sql(name= table_name, con=engine, if_exists='replace', index=False)
query = pd.read_sql_query(f'select * from "{table_name}"', con=engine).head()
query

Unnamed: 0,Country,Year,human_dev_ind
0,Afghanistan,2000,..
1,Afghanistan,2005,0.408
2,Afghanistan,2010,0.463
3,Afghanistan,2015,0.493
4,Afghanistan,2016,0.494


In [15]:
# Result for the main notebook
print('All the human-development-index data was uploaded to the database:')
print(query)

All the human-development-index data was uploaded to the database:
       Country  Year human_dev_ind
0  Afghanistan  2000            ..
1  Afghanistan  2005         0.408
2  Afghanistan  2010         0.463
3  Afghanistan  2015         0.493
4  Afghanistan  2016         0.494
