In [160]:
# Import dependencies
import pandas as pd
from census import Census
from config import api_key
from sqlalchemy import create_engine, inspect

In [161]:
# Import beers.csv as dataframe
beers_df = pd.read_csv('./Resources/beers.csv', index_col=[0])

# Filter dataframe to desired columns
beers_cleaned_df = beers_df[['id','name','style','brewery_id','abv']]

# Display first 5 rows
beers_cleaned_df.head()

Unnamed: 0,id,name,style,brewery_id,abv
0,1436,Pub Beer,American Pale Lager,408,0.05
1,2265,Devil's Cup,American Pale Ale (APA),177,0.066
2,2264,Rise of the Phoenix,American IPA,177,0.071
3,2263,Sinister,American Double / Imperial IPA,177,0.09
4,2262,Sex and Candy,American IPA,177,0.075


In [120]:
# Export to csv
beers_cleaned_df.to_csv('./CleanedCSVs/beers_table.csv', index=False)

In [165]:
# Import breweries.csv as dataframe
breweries_df = pd.read_csv('./Resources/breweries.csv')

# Strip whitespace from state column
breweries_df['state'] = breweries_df['state'].str.strip()

# Strip whitespace from name column
breweries_df['name'] = breweries_df['name'].str.strip()

# Clean dataframe
breweries_cleaned_df = breweries_df[['name', 'city', 'state']]

# Reset and rename index as id
breweries_cleaned_df.reset_index(inplace=True)
breweries_cleaned_df = breweries_cleaned_df.rename(columns = {'index':'brewery_id'})

# Display first 5 rows
breweries_cleaned_df.head()

Unnamed: 0,brewery_id,name,city,state
0,0,NorthGate Brewing,Minneapolis,MN
1,1,Against the Grain Brewery,Louisville,KY
2,2,Jack's Abby Craft Lagers,Framingham,MA
3,3,Mike Hess Brewing Company,San Diego,CA
4,4,Fort Point Beer Company,San Francisco,CA


In [166]:
breweries_df.head()

Unnamed: 0.1,Unnamed: 0,name,city,state
0,0,NorthGate Brewing,Minneapolis,MN
1,1,Against the Grain Brewery,Louisville,KY
2,2,Jack's Abby Craft Lagers,Framingham,MA
3,3,Mike Hess Brewing Company,San Diego,CA
4,4,Fort Point Beer Company,San Francisco,CA


In [131]:
# Export to csv
breweries_cleaned_df.to_csv('./CleanedCSVs/breweries_table.csv', index=False)

#### Create Census Table by State

In [135]:
# Create Census object with Census API key from the selected year
c = Census(api_key, year=2021)

# Run Census Search to retrieve income data by county
census_data = c.acs1.get(("NAME", "B01003_001E", "B19013_001E","B19301_001E", "B01002_001E"), {'for': 'state:*'})

# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Rename columns
census_df = census_df.rename(columns={"B01003_001E": "population",
                                      "B19013_001E": "med_household_income",
                                      "B19301_001E": "per_capita_income",
                                      "B01002_001E": "median_age",
                                      "state": "state_code",
                                      "NAME": "state"
                                    }
                                    )

census_df = census_df.drop(columns=['state_code'])  

# Cast population column as int
census_df['population'] = census_df['population'].astype(int)

# Display first 5 rows
census_df.head()

Unnamed: 0,state,population,med_household_income,per_capita_income,median_age
0,Alabama,5039877,53913.0,30608.0,39.8
1,Puerto Rico,3263584,22237.0,14468.0,44.1
2,Arizona,7276316,69056.0,36295.0,38.6
3,Arkansas,3025891,52528.0,29252.0,38.5
4,California,39237836,84907.0,42396.0,37.6


In [136]:
# Create dictionary for converting state names and abbreviations
states_abbreviation = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}

In [137]:
# Convert State names to state abbreviations
census_df.replace({"state": states_abbreviation}, inplace=True)

# Export dataframe to CSV
census_df.to_csv('./Resources/census.csv')

# Display first 5 rows
census_df.head()

Unnamed: 0,state,population,med_household_income,per_capita_income,median_age
0,AL,5039877,53913.0,30608.0,39.8
1,PR,3263584,22237.0,14468.0,44.1
2,AZ,7276316,69056.0,36295.0,38.6
3,AR,3025891,52528.0,29252.0,38.5
4,CA,39237836,84907.0,42396.0,37.6


In [138]:
# Export to csv
census_df.to_csv('./CleanedCSVs/census_table.csv', index=False)

### Connect to local database

In [157]:
# Create connection to postgresql database
protocol = 'postgresql'
username = 'postgres'
password = 'postgres'
host = 'localhost'
port = 5432
database_name = 'beer_db'
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

In [158]:
# Check table names using inspector
inspector = inspect(engine)
table_names = inspector.get_table_names()

table_names

['census_data', 'breweries', 'beers']

In [159]:
# Use pandas to load csv converted DataFrames into database

# Census table
census_df.to_sql(name='census_data', con=engine, if_exists='append', index=False)

# Breweries table
breweries_cleaned_df.to_sql(name='breweries', con=engine, if_exists='append', index=False)

# Beers tables
beers_cleaned_df.to_sql(name='beers', con=engine, if_exists='append', index=False)