In [None]:
# Load libraries
import pandas as pd
from sqlalchemy import create_engine

In [None]:
# Open the CSV and read with Pandas
visitors_file = "../Resources/All National Parks Visitation 1904-2016.csv"
visitors_df = pd.read_csv(visitors_file)
visitors_df.head()

In [None]:
# Grab only National Parks from the DataFrame
visitors_df = visitors_df.loc[visitors_df["Unit Type"] == "National Park"]

# filter out rows that do not contain a numeric year
visitors_df = visitors_df.loc[visitors_df["YearRaw"] != "Total"]

# Create filtered dataframe with specific columns
visitors_cols = ["Region", "State", "Unit Name", "Visitors", "YearRaw"]
visitors_transformed= visitors_df[visitors_cols].copy()



# Rename the column headers for clarification
visitors_transformed = visitors_transformed.rename(columns={"Region":"region",
                                                            "State":"state",
                                                            "Unit Name": "park_name",
                                                            "Visitors":"visitors",
                                                            "YearRaw": "year"})
# Reset the index
visitors_transformed.reset_index(drop=True, inplace=True)

# Create the ID column based off of the index
visitors_transformed['id'] = visitors_transformed.index + 1

# Set the index to the ID
visitors_transformed.set_index('id',drop=True, inplace=True)

visitors_transformed.head()

In [None]:
# Create connection string to database
connection_string = "postgres:postgres@localhost:5432/national_parks_db"
engine = create_engine(f'postgresql://{connection_string}')

In [None]:
# Load the database
visitors_transformed.to_sql(name='parks_visitation', con=engine, if_exists='append', index=True)