In [12]:
import requests
import json
import pandas as pd

from pprint import pprint
from config import beer_key

In [13]:
# API call for beermapping.com to get the data for breweries in each state
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]


base_url = "http://beermapping.com/webservice/locstate/"

beer_id = []
brewery_state = []
zipcode = []
brewery_name = []
brewery_city = []
brewery_type = []

# Iterating through the states list to return all the results for each state
# For responses that are for breweries, appending the lists to get the desired data
for state in states:
    url = base_url + beer_key + "/" + state  + "&s=json"
    print(url)
    state_data = requests.get(url).json()
    for response in state_data:
        if response["status"] == "Brewery":
            beer_id.append(response["id"])
            brewery_state.append(response["state"])
            zipcode.append(response["zip"])
            brewery_name.append(response["name"])
            brewery_city.append(response["city"])
            brewery_type.append(response["status"])

        
brewery_df = pd.DataFrame({"Brew Mapping Id": beer_id,
             "State": brewery_state,
             "Zipcode": zipcode,
             "Brewery Name": brewery_name,
             "City": brewery_city,
                "Type": brewery_type})

brewery_df.head()

http://beermapping.com/webservice/locstate/c0323ad9e2ee5235d3f2d8c61989fe39/AL&s=json
http://beermapping.com/webservice/locstate/c0323ad9e2ee5235d3f2d8c61989fe39/AK&s=json
http://beermapping.com/webservice/locstate/c0323ad9e2ee5235d3f2d8c61989fe39/AZ&s=json
http://beermapping.com/webservice/locstate/c0323ad9e2ee5235d3f2d8c61989fe39/AR&s=json
http://beermapping.com/webservice/locstate/c0323ad9e2ee5235d3f2d8c61989fe39/CA&s=json
http://beermapping.com/webservice/locstate/c0323ad9e2ee5235d3f2d8c61989fe39/CO&s=json
http://beermapping.com/webservice/locstate/c0323ad9e2ee5235d3f2d8c61989fe39/CT&s=json
http://beermapping.com/webservice/locstate/c0323ad9e2ee5235d3f2d8c61989fe39/DC&s=json
http://beermapping.com/webservice/locstate/c0323ad9e2ee5235d3f2d8c61989fe39/DE&s=json
http://beermapping.com/webservice/locstate/c0323ad9e2ee5235d3f2d8c61989fe39/FL&s=json
http://beermapping.com/webservice/locstate/c0323ad9e2ee5235d3f2d8c61989fe39/GA&s=json
http://beermapping.com/webservice/locstate/c0323ad9e2e

Unnamed: 0,Brew Mapping Id,Brewery Name,City,State,Type,Zipcode
0,15706,Avondale Brewing Company,Birmingham,AL,Brewery,35222
1,15004,Back Forty Beer Co,Gadsden,AL,Brewery,35901
2,20646,Band of Brothers Brewing Co,Tuscaloosa,AL,Brewery,35401
3,20843,Big Beach Brewing Company,Gulf Shores,AL,Brewery,36542
4,18433,Black Warrior Brewing,Tuscaloosa,AL,Brewery,35401


In [20]:
for ix,row in brewery_df.iterrows():
    if row["State"] == "Mi":
        brewery_df.at[ix,"State"]="MI"
    elif row ["State"] == "tx":
        brewery_df.at[ix,"State"]="TX"
    else: pass

In [26]:
brewery_df.to_csv('breweries_df.csv', index=False)

In [21]:
# Creating groups by state
state_groups = brewery_df.groupby(["State"])
brew_state_count = state_groups["Brew Mapping Id"].count()
brew_state_count_df = pd.DataFrame({"brewery_count": brew_state_count})
brew_state_count_df.reset_index(inplace=True)

In [24]:
brew_state_count_df.head()

Unnamed: 0,State,brewery_count
0,AK,19
1,AL,22
2,AR,14
3,AZ,37
4,CA,315


In [25]:
brew_state_count_df.to_excel('breweries_state.xlsx', sheet_name = "count", index=False)
brew_state_count_df.to_csv('breweries_state.csv', index=False)

In [None]:
# Creating groups by zipcode
zip_groups = brewery_df.groupby(["Zipcode"])
brew_count = zip_groups["Brew Mapping Id"].count()
brew_count_df = pd.DataFrame({"brewery_count": brew_count})
brew_count_df.reset_index(inplace=True)
brew_count_df.head()

In [None]:
# Creating a new column keeping only valid 5 digit zipcodes and converting 10 digit zipcodes to 5 digit
for ix,row in brew_count_df.iterrows():
    if len(row['Zipcode'])== 5 or(len(row['Zipcode'])== 10 and '-' in row['Zipcode']):
        brew_count_df.at[ix,'new zipcode'] = row['Zipcode'][0:5]

In [None]:
# Deleting row with data that does not have a valid zipcode
clean_brew_count = brew_count_df.loc[pd.notna(brew_count_df['new zipcode']),:]
clean_brew_count.head()

In [None]:
# Regrouping data to get count by zipcode and creating a new dataframe with just zipcode and brewery count
zip_groups2 = clean_brew_count.groupby(["new zipcode"])
brew_count2 = zip_groups2["brewery_count"].sum()
brew_count_df2 = pd.DataFrame(brew_count2)
brew_count_df2.head()
# To move zipcode to a column if needed later: 
brew_count_df2.reset_index(inplace=True)


brew_count_df2.head()

In [None]:
brew_count_df2.to_excel('breweries_zip.xlsx', sheet_name = "count", index=False)
brew_count_df2.to_csv('breweries_zip.csv', index=False)