In [2]:
import pandas as pd
from sqlalchemy import create_engine

In [None]:
# collect user and password from config.py file
from config import user, passw

### Store CSV into DataFrame

In [3]:
csv_file = "../Resources/customer_data.csv"
customer_data_df = pd.read_csv(csv_file)
customer_data_df

Unnamed: 0,id,first_name,last_name,email,gender,car
0,1,Benetta,Cancott,bcancott0@studiopress.com,Female,Scion
1,2,Lilyan,Cherry,lcherry1@deliciousdays.com,Female,Chrysler
2,3,Ezekiel,Benasik,ebenasik2@wikia.com,Male,Mercedes-Benz
3,4,Kennedy,Atlay,katlay3@so-net.ne.jp,Male,Buick
4,5,Sanford,Salmen,ssalmen4@reuters.com,Male,Lincoln
...,...,...,...,...,...,...
995,996,Clare,Freshwater,cfreshwaterrn@fema.gov,Female,Nissan
996,997,Viole,Letrange,vletrangero@hao123.com,Female,Chevrolet
997,998,Tim,Colvill,tcolvillrp@storify.com,Female,GMC
998,999,Benny,Wafer,bwaferrq@tinypic.com,Female,Citroën


### Create new data with select columns

In [4]:
new_customer_data_df = customer_data_df[['id', 'first_name', 'last_name']].copy()
new_customer_data_df.head()

Unnamed: 0,id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen


### Store JSON data into a DataFrame

In [5]:
json_file = "../Resources/customer_location.json"
customer_location_df = pd.read_json(json_file)
customer_location_df

Unnamed: 0,id,address,longitude,latitude,us_state
0,1,043 Mockingbird Place,-86.5186,39.1682,Indiana
1,2,4 Prentice Point,-85.0707,41.0938,Indiana
2,3,46 Derek Junction,-96.7776,32.7673,Texas
3,4,11966 Old Shore Place,-94.3567,39.0350,Missouri
4,5,5 Evergreen Circle,-73.9772,40.7808,New York
...,...,...,...,...,...
995,996,6546 Waxwing Circle,-86.9292,33.5446,Alabama
996,997,604 Parkside Street,-111.6627,40.7286,Utah
997,998,44 Northwestern Court,-74.2452,40.7918,New Jersey
998,999,33 Bunting Point,-119.8225,39.5387,Nevada


### Clean DataFrame

In [6]:
new_customer_location_df = customer_location_df[["id", "address", "us_state"]].copy()
new_customer_location_df.head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York


### Connect to local database

In [7]:
# rds_connection_string = "<insert user name>:<insert password>@localhost:5432/customer_db"
# engine = create_engine(f'postgresql://{rds_connection_string}')

connection_string = (f'postgres://{user}:{passw}@localhost:5432/Homework_SQL_db')
engine = create_engine(connection_string)                     
conn = engine.connect()

### Check for tables

In [8]:
engine.table_names()

OperationalError: (psycopg2.OperationalError) FATAL:  password authentication failed for user "<insert user name>"

(Background on this error at: http://sqlalche.me/e/e3q8)

### Use pandas to load csv converted DataFrame into database

In [8]:
new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

### Use pandas to load json converted DataFrame into database

In [9]:
new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [10]:
pd.read_sql_query('select * from customer_name', con=engine).head()

Unnamed: 0,id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen


### Confirm data has been added by querying the customer_location table

In [11]:
pd.read_sql_query('select * from customer_location', con=engine).head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York
