# Project 3 - Group 7
Barbara MacGregor, Matt Russell, Amanda Enstad and Chi Tran

## Import dependencies

In [None]:
import pandas as pd
from sqlalchemy import create_engine, inspect
from sqlalchemy.orm import session
from config import username, password

## Extract
### Read in the raw excel files

In [None]:
beerCsv = 'Resources/Data/beers.csv'
brewCsv = 'Resources/Data/breweries.csv'

In [None]:
beerDf = pd.read_csv(beerCsv)
beerDf = beerDf.drop(['Unnamed: 0'],axis = 1)
beerDf.head()

In [None]:
brewDf = pd.read_csv(brewCsv)
brewDf.head()

## Transform
### Rename columns

In [None]:
# Rename columns in beerDf
# abv: The alcoholic content by volume with 0 being no alcohol and 1 being pure alcohol.
# ibu: International bittering units, which describe how bitter a drink is.
# id: beer unique id
# brewery_id: Unique identifier for brewery that produces this beer; can use to join with brewery info.

beerDf = beerDf.rename(columns={"id": "beer_id", "name": "beer_name", "style": "beer_style" })
beerDf.head()

In [None]:
# Rename columns in brewDf
brewDf = brewDf.rename(columns={"Unnamed: 0": "brewery_id", "name": "brewery_name" })
brewDf.head()

### Visual data inspections:
Notes: running cells in this section is optional
#### Observations:
* there are "Nan" entries in abv and ibu 
* two entries for beer style per beer: American Double / Imperial IPA (important for filtering)
* 558 unique brewery_id in beerDf but 551 unique brewery_id in breDf

In [None]:
colNames = list(beerDf.columns)
colNames

In [None]:
# print out all of unique values in each columns in beerDf dataframe
# visually inspect to identify any NA or inconsistencies

colNames = list(beerDf.columns)
for col in colNames:
    print(col)
    print(beerDf[col].unique())
#     print(*beerDf[col].unique(),sep ='\n')
    print('Number of element of Unique Values: ')
    print(len(beerDf[col].unique()))
    print('-----------')


In [None]:
# print out all of unique values in each columns in breweries Dataframe
# visually inspect to identify any NA or inconsistencies 

colNames = list(brewDf.columns)
for col in colNames:
    print(col)
    print(brewDf[col].unique())
#     print(*beerDf[col].unique(),sep ='\n')
    print('Number of element of Unique Values: ')
    print(len(brewDf[col].unique()))
    print('-----------')

#### Data Cleaning:

In [None]:
# Do we want to remove brewery_id in beerDf but not in brewDf
# Are we using both tables?

# beerDf = beerDf[beerDf["brewery_id"].isin(brewDf["brewery_id"])]

## LOAD
(Proceed only if database and tables have been created in the database)
(Use: Posgres SQL)

In [None]:
# create an engine
engine = create_engine(f'postgresql://{username}:{password}@localhost:5432/beersDb')

In [None]:
# list the table names in the database
engine.table_names()

In [None]:
# insert data into the election 2016 table
brewDf.to_sql(name='breweries', con=engine, if_exists='append', index=False)

In [None]:
# insert data into the election 1996 table
beerDf.to_sql(name='beers', con=engine, if_exists='append', index=False)

In [None]:
# inspect the election 2016 table
pd.read_sql_query('select * from breweries', con=engine)

In [None]:
# inspect the election 1996 table
pd.read_sql_query('select * from beers', con=engine)