## Note: this notebook was used for development. A script file has been created to use for implementation.

In [1]:
# dependencies
import pandas as pd
from sqlalchemy import create_engine

### Store CSV into DataFrame

In [2]:
csv_file = "data/data_clean.csv"
df = pd.read_csv(csv_file)
df.head()

Unnamed: 0,id,Address,County,State,Sale Date,Sale Year,Sale Month,Sale Price,lat,long,block
0,1,2976 WHISPERWOOD TRL,Fulton County,Georgia,1/12/17,2017,Jan,200000,33.671191,-84.485159,131210113031
1,2,3040 STONE RD,Fulton County,Georgia,1/13/17,2017,Jan,220000,33.667964,-84.487987,131210113031
2,3,2995 REDWINE RD,Fulton County,Georgia,1/27/17,2017,Jan,182000,33.660411,-84.486634,131210113031
3,4,2974 ROCKWOOD RD,Fulton County,Georgia,3/1/17,2017,Mar,149900,33.674489,-84.482433,131210113031
4,5,3105 GOLDEN DR,Fulton County,Georgia,5/5/17,2017,May,175000,33.672837,-84.489831,131210113031


In [3]:
# check datatypes
df.dtypes

id              int64
Address        object
County         object
State          object
Sale Date      object
Sale Year       int64
Sale Month     object
Sale Price      int64
lat           float64
long          float64
block           int64
dtype: object

In [4]:
# rename columns to remove capital letters and spaces
cols = {'Address': 'address',
        'County': 'county',
        'State': 'state',
        'Sale Date': 'sale_date',
        'Sale Year': 'sale_year',
        'Sale Month': 'sale_month',
        'Sale Price': 'sale_price'}
df.rename(columns = cols, inplace=True)

# preview
df.head()

Unnamed: 0,id,address,county,state,sale_date,sale_year,sale_month,sale_price,lat,long,block
0,1,2976 WHISPERWOOD TRL,Fulton County,Georgia,1/12/17,2017,Jan,200000,33.671191,-84.485159,131210113031
1,2,3040 STONE RD,Fulton County,Georgia,1/13/17,2017,Jan,220000,33.667964,-84.487987,131210113031
2,3,2995 REDWINE RD,Fulton County,Georgia,1/27/17,2017,Jan,182000,33.660411,-84.486634,131210113031
3,4,2974 ROCKWOOD RD,Fulton County,Georgia,3/1/17,2017,Mar,149900,33.674489,-84.482433,131210113031
4,5,3105 GOLDEN DR,Fulton County,Georgia,5/5/17,2017,May,175000,33.672837,-84.489831,131210113031


### Connect to local database

In [5]:
protocol = 'postgresql'
username = 'postgres'
password = 'admin'
host = 'localhost'
database_name = 'fultoncounty_db'
rds_connection_string = f'{protocol}://{username}:{password}@{host}/{database_name}'
engine = create_engine(rds_connection_string)

### Check for tables

In [6]:
engine.table_names()

  """Entry point for launching an IPython kernel.


['home_sales']

### Use pandas to load merged DataFrame into database

In [7]:
df.to_sql(name='home_sales', con=engine, if_exists='replace', index=False)

### Confirm data has been added by querying the table
* NOTE: can also check using pgAdmin

In [8]:
pd.read_sql_query('select * from home_sales', con=engine).head()

Unnamed: 0,id,address,county,state,sale_date,sale_year,sale_month,sale_price,lat,long,block
0,1,2976 WHISPERWOOD TRL,Fulton County,Georgia,1/12/17,2017,Jan,200000,33.671191,-84.485159,131210113031
1,2,3040 STONE RD,Fulton County,Georgia,1/13/17,2017,Jan,220000,33.667964,-84.487987,131210113031
2,3,2995 REDWINE RD,Fulton County,Georgia,1/27/17,2017,Jan,182000,33.660411,-84.486634,131210113031
3,4,2974 ROCKWOOD RD,Fulton County,Georgia,3/1/17,2017,Mar,149900,33.674489,-84.482433,131210113031
4,5,3105 GOLDEN DR,Fulton County,Georgia,5/5/17,2017,May,175000,33.672837,-84.489831,131210113031
