In [1]:
import pandas as pd
from sqlalchemy import create_engine

### Store CSV into DataFrame

In [2]:
csv_file = "../Resources/economic.csv"
economic_df = pd.read_csv(csv_file)
economic_df.head()

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,2000 [YR2000],2005 [YR2005],2010 [YR2010],2015 [YR2015],2016 [YR2016]
0,Afghanistan,AFG,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,..,11.1752702416587,8.43329048161792,1.31004040372002,2.36671191565063
1,Afghanistan,AFG,GDP per capita (constant 2005 US$),NY.GDP.PCAP.KD,..,389.416357791097,553.300289383064,620.05652498092,617.889972323552
2,Afghanistan,AFG,"Inflation, consumer prices (annual %)",FP.CPI.TOTL.ZG,..,12.68626872,0.892536936,-1.533846583,2.169452176
3,Albania,ALB,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,6.66662049881369,5.72081994678555,3.71005779297435,2.2187520539882,3.35215921082479
4,Albania,ALB,GDP per capita (constant 2005 US$),NY.GDP.PCAP.KD,2337.9423863713,3189.39511117692,4094.35883191918,4524.64443153606,4683.80021257139


### Create new data with select columns

In [3]:
new_economic_df = economic_df[['Country Name', 'Series Name', '2000 [YR2000]', '2005 [YR2005]', '2010 [YR2010]', '2015 [YR2015]', '2016 [YR2016]']].copy()
new_economic_df.head()

Unnamed: 0,Country Name,Series Name,2000 [YR2000],2005 [YR2005],2010 [YR2010],2015 [YR2015],2016 [YR2016]
0,Afghanistan,GDP growth (annual %),..,11.1752702416587,8.43329048161792,1.31004040372002,2.36671191565063
1,Afghanistan,GDP per capita (constant 2005 US$),..,389.416357791097,553.300289383064,620.05652498092,617.889972323552
2,Afghanistan,"Inflation, consumer prices (annual %)",..,12.68626872,0.892536936,-1.533846583,2.169452176
3,Albania,GDP growth (annual %),6.66662049881369,5.72081994678555,3.71005779297435,2.2187520539882,3.35215921082479
4,Albania,GDP per capita (constant 2005 US$),2337.9423863713,3189.39511117692,4094.35883191918,4524.64443153606,4683.80021257139


In [4]:
new_economic_df = new_economic_df.rename(columns={"2000 [YR2000]": 2000, "2005 [YR2005]": 2005, "2010 [YR2010]": 2010, 
                                                  "2015 [YR2015]": 2015, "2016 [YR2016]": 2016})
new_economic_df 

Unnamed: 0,Country Name,Series Name,2000,2005,2010,2015,2016
0,Afghanistan,GDP growth (annual %),..,11.1752702416587,8.43329048161792,1.31004040372002,2.36671191565063
1,Afghanistan,GDP per capita (constant 2005 US$),..,389.416357791097,553.300289383064,620.05652498092,617.889972323552
2,Afghanistan,"Inflation, consumer prices (annual %)",..,12.68626872,0.892536936,-1.533846583,2.169452176
3,Albania,GDP growth (annual %),6.66662049881369,5.72081994678555,3.71005779297435,2.2187520539882,3.35215921082479
4,Albania,GDP per capita (constant 2005 US$),2337.9423863713,3189.39511117692,4094.35883191918,4524.64443153606,4683.80021257139
...,...,...,...,...,...,...,...
726,,,,,,,
727,,,,,,,
728,,,,,,,
729,Data from database: Jobs,,,,,,


In [5]:
new_economic_df = pd.melt(new_economic_df, id_vars=['Country Name', "Series Name"], value_vars=[2000, 2005, 2010, 2015,2016])
new_economic_df

Unnamed: 0,Country Name,Series Name,variable,value
0,Afghanistan,GDP growth (annual %),2000,..
1,Afghanistan,GDP per capita (constant 2005 US$),2000,..
2,Afghanistan,"Inflation, consumer prices (annual %)",2000,..
3,Albania,GDP growth (annual %),2000,6.66662049881369
4,Albania,GDP per capita (constant 2005 US$),2000,2337.9423863713
...,...,...,...,...
3650,,,2016,
3651,,,2016,
3652,,,2016,
3653,Data from database: Jobs,,2016,


In [6]:
new_economic_df.dtypes

Country Name    object
Series Name     object
variable        object
value           object
dtype: object

In [7]:
new_economic_df = new_economic_df.dropna()
new_economic_df

Unnamed: 0,Country Name,Series Name,variable,value
0,Afghanistan,GDP growth (annual %),2000,..
1,Afghanistan,GDP per capita (constant 2005 US$),2000,..
2,Afghanistan,"Inflation, consumer prices (annual %)",2000,..
3,Albania,GDP growth (annual %),2000,6.66662049881369
4,Albania,GDP per capita (constant 2005 US$),2000,2337.9423863713
...,...,...,...,...
3645,Zambia,GDP per capita (constant 2005 US$),2016,1629.59030492169
3646,Zambia,"Inflation, consumer prices (annual %)",2016,17.86996005
3647,Zimbabwe,GDP growth (annual %),2016,0.615713754356918
3648,Zimbabwe,GDP per capita (constant 2005 US$),2016,917.563719684227


In [12]:
new_economic_df = new_economic_df.replace("..", 0)
new_economic_df

Unnamed: 0,Country Name,Series Name,variable,value
0,Afghanistan,GDP growth (annual %),2000,0
1,Afghanistan,GDP per capita (constant 2005 US$),2000,0
2,Afghanistan,"Inflation, consumer prices (annual %)",2000,0
3,Albania,GDP growth (annual %),2000,6.66662049881369
4,Albania,GDP per capita (constant 2005 US$),2000,2337.9423863713
...,...,...,...,...
3645,Zambia,GDP per capita (constant 2005 US$),2016,1629.59030492169
3646,Zambia,"Inflation, consumer prices (annual %)",2016,17.86996005
3647,Zimbabwe,GDP growth (annual %),2016,0.615713754356918
3648,Zimbabwe,GDP per capita (constant 2005 US$),2016,917.563719684227


In [15]:
new_economic_df['value'].astype(float)
new_economic_df.dtypes

Country Name    object
Series Name     object
variable         int64
value           object
dtype: object

In [8]:
new_economic_df = new_economic_df.dropna()


Unnamed: 0,Country Name,Series Name,variable,value
0,Afghanistan,GDP growth (annual %),2000,..
1,Afghanistan,GDP per capita (constant 2005 US$),2000,..
2,Afghanistan,"Inflation, consumer prices (annual %)",2000,..
3,Albania,GDP growth (annual %),2000,6.66662049881369
4,Albania,GDP per capita (constant 2005 US$),2000,2337.9423863713
...,...,...,...,...
3645,Zambia,GDP per capita (constant 2005 US$),2016,1629.59030492169
3646,Zambia,"Inflation, consumer prices (annual %)",2016,17.86996005
3647,Zimbabwe,GDP growth (annual %),2016,0.615713754356918
3648,Zimbabwe,GDP per capita (constant 2005 US$),2016,917.563719684227


In [14]:
pd.pivot_table(new_economic_df, values=['Series Name'], index=['Country Name', 'variable'], columns=['value'])


DataError: No numeric types to aggregate

In [34]:
pd.pivot_table(new_economic_df, values=['Series Name'], index=['Country Name', 'variable'], columns=['value'])


DataError: No numeric types to aggregate

In [33]:
new_economic_df['value'].astype(str).astype(float)

ValueError: could not convert string to float: '..'

In [14]:
pd.pivot_table(new_economic_df, values=['Series Name'], index=['Country Name', 'variable'], columns=['value'])


DataError: No numeric types to aggregate

DataError: No numeric types to aggregate

### Store JSON data into a DataFrame

In [6]:
json_file = "../Resources/customer_location.json"
customer_location_df = pd.read_json(json_file)
customer_location_df.head()

Unnamed: 0,id,address,longitude,latitude,us_state
0,1,043 Mockingbird Place,-86.5186,39.1682,Indiana
1,2,4 Prentice Point,-85.0707,41.0938,Indiana
2,3,46 Derek Junction,-96.7776,32.7673,Texas
3,4,11966 Old Shore Place,-94.3567,39.035,Missouri
4,5,5 Evergreen Circle,-73.9772,40.7808,New York


### Clean DataFrame

In [7]:
new_customer_location_df = customer_location_df[["id", "address", "us_state"]].copy()
new_customer_location_df.head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York


### Connect to local database

In [8]:
rds_connection_string = "postgres:postgres@localhost:5432/client_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

### Check for tables

In [9]:
engine.table_names()

['customer_name', 'customer_location']

### Use pandas to load csv converted DataFrame into database

In [10]:
new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

### Use pandas to load json converted DataFrame into database

In [11]:
new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [12]:
pd.read_sql_query('select * from customer_name', con=engine).head()

Unnamed: 0,id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen


### Confirm data has been added by querying the customer_location table

In [13]:
pd.read_sql_query('select * from customer_location', con=engine).head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York
