# Project 3 - Group 7
Barbara MacGregor, Matt Russell, Amanda Enstad and Chi Tran

## Import dependencies

In [None]:
import pandas as pd
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
import sqlite3

## Extract
### Read in the raw excel files

In [None]:
beerCsv = '../Resources/Data/beers.csv'
brewCsv = '../Resources/Data/breweries.csv'

In [None]:
beerDf = pd.read_csv(beerCsv)
beerDf = beerDf.drop(['Unnamed: 0'],axis = 1)
beerDf.head()

In [None]:
brewDf = pd.read_csv(brewCsv)
brewDf.head()

## Transform
### Rename columns

In [None]:
# Rename columns in beerDf
# abv: The alcoholic content by volume with 0 being no alcohol and 1 being pure alcohol.
# ibu: International bittering units, which describe how bitter a drink is.
# id: beer unique id
# brewery_id: Unique identifier for brewery that produces this beer; can use to join with brewery info.

beerDf = beerDf.rename(columns={"id": "beer_id", "name": "beer_name", "style": "beer_style" })
colNames = ['beer_id','beer_name','beer_style','brewery_id','abv','ibu','ounces']
beerDf = beerDf.reindex(columns=colNames)
beerDf.head()

In [None]:
# Rename columns in brewDf
brewDf = brewDf.rename(columns={"Unnamed: 0": "brewery_id", "name": "brewery_name" })
brewDf.head()

### Visual data inspections:
Notes: running cells in this section is optional
#### Observations:
* there are "Nan" entries in abv and ibu 
* two entries for beer style per beer: American Double / Imperial IPA (important for filtering)
* 558 unique brewery_id in beerDf but 551 unique brewery_id in brewDf

In [None]:
# print out all of unique values in each columns in beerDf dataframe
# visually inspect to identify any NA or inconsistencies

colNames = list(beerDf.columns)
for col in colNames:
    print(col)
    print(beerDf[col].unique())
#     print(*beerDf[col].unique(),sep ='\n')
    print('Number of element of Unique Values: ')
    print(len(beerDf[col].unique()))
    print('-----------')


In [None]:
# print out all of unique values in each columns in breweries Dataframe
# visually inspect to identify any NA or inconsistencies 

colNames = list(brewDf.columns)
for col in colNames:
    print(col)
    print(brewDf[col].unique())
#     print(*beerDf[col].unique(),sep ='\n')
    print('Number of element of Unique Values: ')
    print(len(brewDf[col].unique()))
    print('-----------')

## EXPORT
export clean csv for sqlite file

In [None]:
beerDf.to_csv('beers.csv', index=False,)
brewDf.to_csv('breweries.csv', index=False,)

## Create SQLITE
create sqlLite file

In [None]:
#  try sqlite3
conn = sqlite3.connect('beersDb.sqlite') 
c = conn.cursor()

In [None]:
# colNames = ['beer_id','beer_name','beer_style','brewery_id','abv','ibu','ounces']
c.execute('''CREATE TABLE beers
             ([beer_id] INTEGER PRIMARY KEY,[beer_name] text, [beer_style] text,[brewery_id] INTERGER, [abv] float ,[ibu] float ,[ounces] float )''')

c.execute('''CREATE TABLE breweries
             ([brewery_id] INTEGER PRIMARY KEY,[brewery_name] text, [city] text,[state] text)''')

conn.commit()

In [None]:
beerDf = pd.read_csv('beers.csv')
brewDf = pd.read_csv('breweries.csv')

In [None]:
# Insert the values from the csv file into the table 'beers'
beerDf.to_sql('beers', conn, if_exists='append', index = False)  

In [None]:
# Insert the values from the csv file into the table 'breweries'
brewDf.to_sql('breweries', conn, if_exists='append', index = False)  

###  Check sqlite database

In [None]:
# Check to see if db is created properly
engine = create_engine("sqlite:///beersDb.sqlite")

In [None]:
# Declare a Base using `automap_base()`
Base = automap_base()
Base.prepare(engine, reflect=True)

In [None]:
# Print all of the classes mapped to the Base
Base.classes.keys()

In [None]:
pd.read_sql_query('select * from beers', con=engine)

In [None]:
pd.read_sql_query('select * from breweries', con=engine)