In [1]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import inspect

In [2]:
# Read in the national_parks.csv into a dataframe
national_parks_df = pd.read_csv("../Resources/national_parks.csv")
national_parks_df.head()

Unnamed: 0,Park Code,Park Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [3]:
# Look at the column names
national_parks_df.columns

Index(['Park Code', 'Park Name', 'State', 'Acres', 'Latitude', 'Longitude'], dtype='object')

In [4]:
# Look at the column data types
national_parks_df.dtypes

Park Code     object
Park Name     object
State         object
Acres          int64
Latitude     float64
Longitude    float64
dtype: object

In [5]:
# Need to rename the columns to match the table columns in nation_parks_db
national_parks_df = national_parks_df.rename(columns={"Park Code":"park_code","Park Name":"park_name",
                                                      "State":"state","Acres":"acres","Latitude":"latitude",
                                                      "Longitude":"longitude"})
national_parks_df.columns

Index(['park_code', 'park_name', 'state', 'acres', 'latitude', 'longitude'], dtype='object')

In [6]:
# Verify no duplicates
national_parks_df.drop_duplicates().shape

(63, 6)

In [7]:
national_parks_df.index.name = "id"
national_parks_df.head()

Unnamed: 0_level_0,park_code,park_name,state,acres,latitude,longitude
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [8]:
# Load this dataframe into the national_parks table
#connect to postgres db
db_conn = "postgres:postgres@localhost:5432/national_parks_db"
engine = create_engine(f"postgresql://{db_conn}")

In [9]:
#engine.table_names()
inspector = inspect(engine)
inspector.get_table_names()

['bear_attacks',
 'parks_visitation',
 'national_parks',
 'park_trails',
 'trail_activities',
 'trail_features',
 'species']

In [10]:
# Load the dataframe into the postgres table
national_parks_df.to_sql(name="national_parks", con=engine, if_exists='append', index=True)

In [11]:
# Verify the data was loaded in the table
pd.read_sql_query("select * from national_parks", con=engine).head()

Unnamed: 0,id,park_code,park_name,state,acres,latitude,longitude
0,0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [12]:
# Verify the number of rows in the dataframe and table
national_parks_df.shape

(63, 6)

In [13]:
pd.read_sql_query("select count(*) from national_parks", con=engine)

Unnamed: 0,count
0,63
