In [146]:
# Import dependencies
import pandas as pd
from census import Census
from config import api_key

In [198]:
# Import beers.csv as dataframe
beers_df = pd.read_csv('./Resources/beers.csv', index_col=[0])

# Filter dataframe to desired columns
beers_cleaned_df = beers_df[['name','style','brewery_id','abv']]

# Sort by brewery_id
beers_cleaned_df = beers_cleaned_df.sort_values('brewery_id')

# Reset index
beers_cleaned_df = beers_cleaned_df.reset_index(drop=True)

# Rename index
beers_cleaned_df.index.name = 'beer_id'

# Display first 5 rows
beers_cleaned_df.head()

brewery_ids = beers_cleaned_df[['brewery_id', 'name']]

Unnamed: 0_level_0,name,style,brewery_id,abv
beer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Maggie's Leap,Milk / Sweet Stout,0,0.049
1,Wall's End,English Brown Ale,0,0.048
2,Pumpion,Pumpkin Ale,0,0.060
3,Stronghold,American Porter,0,0.060
4,Parapet ESB,Extra Special / Strong Bitter (ESB),0,0.056
...,...,...,...,...
2405,Moo Thunder Stout,Milk / Sweet Stout,556,0.049
2406,Heinnieweisse Weissebier,Hefeweizen,556,0.049
2407,Snapperhead IPA,American IPA,556,0.068
2408,Porkslap Pale Ale,American Pale Ale (APA),556,0.043


In [164]:
# Import beer_reviews.csv as dataframe
beer_reviews_df = pd.read_csv('./Resources/beer_reviews.csv')

beer_reviews_cleaned_df = beer_reviews_df[]
# Display first 5 rows
beer_reviews_cleaned_df.head()

Unnamed: 0,brewery_id,brewery_name,review_time,review_overall,review_aroma,review_appearance,review_profilename,beer_style,review_palate,review_taste,beer_name,beer_abv,beer_beerid
0,10325,Vecchio Birraio,1234817823,1.5,2.0,2.5,stcules,Hefeweizen,1.5,1.5,Sausa Weizen,5.0,47986
1,10325,Vecchio Birraio,1235915097,3.0,2.5,3.0,stcules,English Strong Ale,3.0,3.0,Red Moon,6.2,48213
2,10325,Vecchio Birraio,1235916604,3.0,2.5,3.0,stcules,Foreign / Export Stout,3.0,3.0,Black Horse Black Beer,6.5,48215
3,10325,Vecchio Birraio,1234725145,3.0,3.0,3.5,stcules,German Pilsener,2.5,3.0,Sausa Pils,5.0,47969
4,1075,Caldera Brewing Company,1293735206,4.0,4.5,4.0,johnmichaelsen,American Double / Imperial IPA,4.0,4.5,Cauldron DIPA,7.7,64883


In [197]:
# Import breweries.csv as dataframe
breweries_df = pd.read_csv('./Resources/breweries.csv', index_col=[0])

# Strip whitespace from state column
breweries_df['state'] = breweries_df['state'].str.strip()

# Display first 5 rows
breweries_df.head()

Unnamed: 0,name,city,state
0,NorthGate Brewing,Minneapolis,MN
1,Against the Grain Brewery,Louisville,KY
2,Jack's Abby Craft Lagers,Framingham,MA
3,Mike Hess Brewing Company,San Diego,CA
4,Fort Point Beer Company,San Francisco,CA


### Create States Table

In [215]:
# Create Census object with Census API key from the selected year
c = Census(api_key, year=2021)

# Run Census Search to retrieve income data by county
census_data = c.acs1.get(("NAME", "B01003_001E", "B19013_001E","B19301_001E", "B01002_001E"), {'for': 'state:*'})

# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Rename columns
census_df = census_df.rename(columns={"B01003_001E": "population",
                                      "B19013_001E": "med_household_income",
                                      "B19301_001E": "per_capita_income",
                                      "B01002_001E": "median_age",
                                      "state": "state_code"
                                      # "place": "place_code"
                                    }
                                    )
census_df.to_csv('census_df.csv')

In [145]:
# Split "place" column into "city" and "state"
# census_df[['city','state']] = census_df["place"].str.split(" city, ", expand=True)

# Drop original "place" column
census_df.drop(columns=["place"], inplace=True)

# Display first 5 rows
census_df.head()

KeyError: "['place'] not found in axis"

In [82]:
# Create dictionary for converting state names and abbreviations

states_abbreviation = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}

In [98]:
# Convert State names to state abbreviations
census_df.replace({"state": states_abbreviation}, inplace=True)

census_df.groupby()

Unnamed: 0,population,med_household_income,per_capita_income,median_age,state_code,place_code,city,state
0,78552.0,48531.0,29770.0,27.8,1,3076,Auburn,AL
1,196410.0,36614.0,27211.0,37.2,1,7000,Birmingham,AL
2,71283.0,45088.0,31168.0,40.5,1,21184,Dothan,AL
3,92588.0,99276.0,50046.0,38.9,1,35896,Hoover,AL
4,215482.0,70757.0,42232.0,38.0,1,37000,Huntsville,AL


In [99]:
cities = breweries_df.groupby(['city', 'state'], as_index=False).count()['city']
states = breweries_df.groupby(['city', 'state'], as_index=False).count()['state']


cities_df = pd.concat([cities, states], axis=1)

cities_df.head()

Unnamed: 0,city,state
0,Abingdon,VA
1,Abita Springs,LA
2,Ada,MI
3,Afton,VA
4,Airway Heights,WA


In [100]:
cities_df = cities_df.merge(census_df, on=["city","state"])

In [101]:
cities_df.head()

Unnamed: 0,city,state,population,med_household_income,per_capita_income,median_age,state_code,place_code
0,Albuquerque,NM,562591.0,58512.0,35738.0,38.9,35,2000
1,Ann Arbor,MI,121541.0,68864.0,46956.0,28.2,26,3000
2,Arvada,CO,123829.0,92647.0,46680.0,39.7,8,3455
3,Asheville,NC,94070.0,59758.0,38068.0,41.6,37,2140
4,Atlanta,GA,496480.0,74107.0,55051.0,34.3,13,4000


In [199]:
# Import beers.csv as dataframe
beers_df = pd.read_csv('./Resources/beers.csv', index_col=[0])

beer_reviews_df = pd.read_csv('./Resources/beer_reviews.csv')

breweries_df = pd.read_csv('./Resources/breweries.csv', index_col=[0])

In [200]:
beers_df.head()

Unnamed: 0,abv,ibu,id,name,style,brewery_id,ounces
0,0.05,,1436,Pub Beer,American Pale Lager,408,12.0
1,0.066,,2265,Devil's Cup,American Pale Ale (APA),177,12.0
2,0.071,,2264,Rise of the Phoenix,American IPA,177,12.0
3,0.09,,2263,Sinister,American Double / Imperial IPA,177,12.0
4,0.075,,2262,Sex and Candy,American IPA,177,12.0


In [201]:
beer_reviews_df.head()

Unnamed: 0,brewery_id,brewery_name,review_time,review_overall,review_aroma,review_appearance,review_profilename,beer_style,review_palate,review_taste,beer_name,beer_abv,beer_beerid
0,10325,Vecchio Birraio,1234817823,1.5,2.0,2.5,stcules,Hefeweizen,1.5,1.5,Sausa Weizen,5.0,47986
1,10325,Vecchio Birraio,1235915097,3.0,2.5,3.0,stcules,English Strong Ale,3.0,3.0,Red Moon,6.2,48213
2,10325,Vecchio Birraio,1235916604,3.0,2.5,3.0,stcules,Foreign / Export Stout,3.0,3.0,Black Horse Black Beer,6.5,48215
3,10325,Vecchio Birraio,1234725145,3.0,3.0,3.5,stcules,German Pilsener,2.5,3.0,Sausa Pils,5.0,47969
4,1075,Caldera Brewing Company,1293735206,4.0,4.5,4.0,johnmichaelsen,American Double / Imperial IPA,4.0,4.5,Cauldron DIPA,7.7,64883


In [202]:
breweries_df.head()

Unnamed: 0,name,city,state
0,NorthGate Brewing,Minneapolis,MN
1,Against the Grain Brewery,Louisville,KY
2,Jack's Abby Craft Lagers,Framingham,MA
3,Mike Hess Brewing Company,San Diego,CA
4,Fort Point Beer Company,San Francisco,CA


In [206]:
combined_df = beers_df.merge(beer_reviews_df, how='inner', left_on='name', right_on='beer_name')

combined_df = combined_df.merge(breweries_df, how='inner', left_on='brewery_name', right_on='name')

In [214]:
combined_df = combined_df.merge()

Unnamed: 0,abv,ibu,id,name_x,style,brewery_id_x,ounces,brewery_id_y,brewery_name,review_time,...,review_profilename,beer_style,review_palate,review_taste,beer_name,beer_abv,beer_beerid,name_y,city,state
0,0.079,45.0,1024,Fireside Chat,Winter Warmer,368,12.0,735,21st Amendment Brewery,1294033731,...,rootbeerman,Winter Warmer,3.5,4.0,Fireside Chat,7.9,62732,21st Amendment Brewery,San Francisco,CA
1,0.079,45.0,1024,Fireside Chat,Winter Warmer,368,12.0,735,21st Amendment Brewery,1294023948,...,zeff80,Winter Warmer,4.0,4.0,Fireside Chat,7.9,62732,21st Amendment Brewery,San Francisco,CA
2,0.079,45.0,1024,Fireside Chat,Winter Warmer,368,12.0,735,21st Amendment Brewery,1292115036,...,kingcrowing,Winter Warmer,3.0,2.0,Fireside Chat,7.9,62732,21st Amendment Brewery,San Francisco,CA
3,0.079,45.0,1024,Fireside Chat,Winter Warmer,368,12.0,735,21st Amendment Brewery,1294278300,...,ZAP,Winter Warmer,2.5,1.5,Fireside Chat,7.9,62732,21st Amendment Brewery,San Francisco,CA
4,0.079,45.0,1024,Fireside Chat,Winter Warmer,368,12.0,735,21st Amendment Brewery,1296172766,...,jbriggs7036,Winter Warmer,2.0,1.5,Fireside Chat,7.9,62732,21st Amendment Brewery,San Francisco,CA
