In [9]:
# Dependencies
import numpy as np
import pandas as pd
import requests
import json
from pprint import pprint
import time
from datetime import timedelta,datetime,date

# Graphing Dependencies
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# Libraries to support access to the  Census API wrapper
from config import census_api_key as api_key
from config import census_api_key
from config import beer_key
from config import g_web

# Import Census Data

In [10]:
#setting an API object for the census data, with the desired year; 2016 is the latest data available at this level of detail from teh ACS5
c = census_api_key(api_key, year = 2016)

#Setting the path for a sheet that contains codes and mapping for different categories of Census data
census_terms_path = ('Census_search_terms.xlsx')

#Creating dataframes from the XLSX which contain the codes for 59 different metrics and the FIPS state codes
census_terms_df = pd.read_excel(census_terms_path, sheetname = "Sheet1")
fips_codes_df = pd.read_excel(census_terms_path, sheetname = "Sheet2", dtype = str)

TypeError: 'str' object is not callable

In [None]:
#grabbing census data from the Census API and putting into a list
census_data = []

for row in census_terms_df['code']:
    try:
        acs5_data = c.acs5.state(row, Census.ALL)
        census_data.append(acs5_data)
        print(f"grabbing {row}")
    except:
        print(f"grabbing {row} failed")

In [None]:
#taking all data from the list and adding it together into a single list
#then create a dataframe out of each and appending together

for i,data in enumerate(census_data):
    if i == 0:
        all_census_data_df = pd.DataFrame(data)
        all_census_data_df.set_index('state', inplace = True)
    else:
        all_census_data_df_2 = pd.DataFrame(data)
        all_census_data_df_2.set_index('state', inplace = True)
        all_census_data_df = all_census_data_df.merge(all_census_data_df_2, left_index = True, right_index = True)

In [None]:
#create a dictionary with variable keys as the census code and variables as the variable names
columns = dict(zip(census_terms_df['code'], census_terms_df['var_name']))

In [None]:
#rename columns to the human friendly variable names & then reset the index
all_census_data_df = all_census_data_df.rename(columns = columns)
all_census_data_df.reset_index(inplace = True)

#check out the dataset
all_census_data_df.info()

In [None]:
#merge the state codes into the list so that we have a human readable State
all_census_data_df_with_states = pd.merge(all_census_data_df, fips_codes_df, left_on = 'state', right_on = "FIPS", how = "inner")

In [None]:
#Perform some cleansing on the file to remove unwanted columns and to ensure that one of population metrics is numeric data
all_census_data_df_with_states.drop('state', axis = 1, inplace = True)
all_census_data_df_with_states['tot_pop'] = all_census_data_df_with_states['tot_pop'].apply(pd.to_numeric)

#recheck the data
all_census_data_df_with_states.info()

# Acquire breweries data

In [None]:
# API call for beermapping.com to get the data for breweries in each state
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]


base_url = "http://beermapping.com/webservice/locstate/"

beer_id = []
brewery_state = []
zipcode = []
brewery_name = []
brewery_city = []
brewery_type = []

# Iterating through the states list to return all the results for each state
# For responses that are for breweries, appending the lists to get the desired data
for state in states:
    url = base_url + beer_key + "/" + state  + "&s=json"
    print(url)
    state_data = requests.get(url).json()
    for response in state_data:
        if response["status"] == "Brewery":
            beer_id.append(response["id"])
            brewery_state.append(response["state"])
            zipcode.append(response["zip"])
            brewery_name.append(response["name"])
            brewery_city.append(response["city"])
            brewery_type.append(response["status"])

        
brewery_df = pd.DataFrame({"Brew Mapping Id": beer_id,
             "State": brewery_state,
             "Zipcode": zipcode,
             "Brewery Name": brewery_name,
             "City": brewery_city,
                "Type": brewery_type})

brewery_df.head()

In [None]:
# Clean some of the state abbreviations in the dataset
for ix,row in brewery_df.iterrows():
    if row["State"] == "Mi":
        brewery_df.at[ix,"State"]="MI"
    elif row ["State"] == "tx":
        brewery_df.at[ix,"State"]="TX"
    else: pass

In [None]:
# Export data to csv to investigate the data
brewery_df.to_csv('breweries_df.csv', index=False)

In [None]:
# Creating groups by state to integrate with census data
state_groups = brewery_df.groupby(["State"])
brew_state_count = state_groups["Brew Mapping Id"].count()
brew_state_count_df = pd.DataFrame({"brewery_count": brew_state_count})
brew_state_count_df.reset_index(inplace=True)

In [None]:
brew_state_count_df.rename(columns = {"State": "Abbrev"}, inplace = True)
brew_state_count_df.head()

In [None]:
fips_codes_df.head()

In [None]:
# Merge the FIPS data into the dataframe
brew_state_count_df = pd.merge(brew_state_count_df, fips_codes_df, how = "left", on = "Abbrev")
brew_state_count_df

In [None]:
# Export to csv and to excel to check
brew_state_count_df.to_excel('breweries_state.xlsx', sheet_name = "count", index=False)
brew_state_count_df.to_csv('breweries_state.csv', index=False)

# Merge breweries data with census data

In [None]:
all_census_data_df_with_states.head()

In [None]:
#merge the breweries count into a dataframe
census_and_breweries = pd.merge(all_census_data_df_with_states, brew_state_count_df, how = 'left', on = ["FIPS", "State", "Abbrev"])
census_and_breweries.head()

In [None]:
#export to excel to check
census_and_breweries.to_excel('census_and_breweries_state.xlsx', sheet_name = 'data')

## Merge census & brewery data with Winery Data

In [None]:
# reading the bonded wine producers data into a CSV
wineries_path = ('bonded-wine-producers-by-state-2017.csv')
wineries_df = pd.read_csv(wineries_path)
wineries_df.info()

In [None]:
wineries_df.rename(columns = {'State(abbrev)': "Abbrev"}, inplace = True)
wineries_df.head()

In [None]:
#merge the wineries count into the dataframe
census_breweries_wineries = pd.merge(census_and_breweries, wineries_df, how = 'left', on = ["State", "Abbrev"])
len(census_breweries_wineries)

In [None]:
census_breweries_wineries.head()

In [None]:
#export data to an excel document to check
census_breweries_wineries.to_excel('census_breweries_wineries_state.xlsx', sheet_name = 'data')

## Merge Census & brewery data with distillery data

In [None]:
#read distilleries document into a dataframe
distillery_path = ('operating-craft-distilleries-us-2016-by-state.csv')
distillery_df = pd.read_csv(distillery_path)
distillery_df.head()

In [None]:
#merge the distilleries data into the dataframe containing census data, brewery data and winery data
census_breweries_wineries_distilleries = pd.merge(census_breweries_wineries, distillery_df, how = 'left', on = "State")
census_breweries_wineries_distilleries.head()

In [None]:
census_breweries_wineries_distilleries.replace(np.NaN, 0, inplace = True)

In [None]:
census_breweries_wineries_distilleries['tot_k_pop'] = census_breweries_wineries_distilleries['tot_pop'] / 1000

In [None]:
census_breweries_wineries_distilleries['breweries_k_pop'] = census_breweries_wineries_distilleries['brewery_count'] / census_breweries_wineries_distilleries['tot_k_pop']
census_breweries_wineries_distilleries['wineries_k_pop'] = census_breweries_wineries_distilleries['winery_count'] / census_breweries_wineries_distilleries['tot_k_pop']
census_breweries_wineries_distilleries['distilleries_k_pop'] = census_breweries_wineries_distilleries['craft_distillery_count'] / census_breweries_wineries_distilleries['tot_k_pop']
census_breweries_wineries_distilleries.head()

In [None]:
#Export dataframe to a excel
census_breweries_wineries_distilleries.to_excel('census_breweries_wineries_distilleries_state.xlsx', sheet_name = 'data')

## Adding the Google data

In [None]:
#Creating the search terms for the GMAPs API
region_id = "US"

#group search term lists
winery_l=["winery","vineyard","wine+spirits","wine+garden"]
distillery_l=["distillery","distill+spirit","distiller"]
brewery_l=["brewery","brew+pub","taphouse","beer+garden"]

#combined lists
term_search= winery_l+distillery_l+brewery_l

In [None]:
#Create empty lists
name_data=[]
address_data=[]
lon_data=[]
lat_data=[]
place_id=[]
json_urls=[]
rating=[]
state_abr=[]
place_search=[]
# set up a parameters dictionary

# base url
base_url = "https://maps.googleapis.com/maps/api/place/textsearch/json?"

for items in term_search:
    start_time=time.time()
    print("Query terms: "+items)
    for state in states:
        params = {"key": g_web,"query": items +"+"+state+"+"+region_id}
        response = requests.get(base_url, params=params)
        place_info=response.json()
        for res in place_info["results"]:
            name_data.append(res["name"])
            address_data.append(res["formatted_address"])
            lon_data.append(res["geometry"]["location"]["lng"])
            lat_data.append(res["geometry"]["location"]["lat"])
            place_id.append(res["place_id"])
            state_abr.append(state)
            place_search.append(items)
            json_urls.append(response.url)
        #time.sleep(1.5)
    print("        API DATA RETRIEVAL COMPLETE for search term: %s. (elapsed time: %s seconds)" %((items), round(time.time()-start_time,3)))

In [None]:
# joins all lists into one dataframe
data_output = pd.DataFrame(np.column_stack([place_search,name_data,address_data,state_abr,lon_data,lat_data,place_id,json_urls]),
                       columns=["query","name","address","state","lon","lat","place_id","response_url"])

In [None]:
data_output["establishment"] = data_output["query"]

In [None]:
# Creates a category by establishment, replacing search term by the lists
for term in winery_l:
    for i in range(data_output.establishment.count()):
        data_output.establishment.i= data_output["establishment"].replace(
        to_replace=term,
        value="winery",
        inplace=True
        )

for term in brewery_l:
    for i in range(data_output.establishment.count()):
        data_output.establishment.i= data_output["establishment"].replace(
        to_replace=term,
        value="brewery",
        inplace=True
        )
        
for term in distillery_l:
    for i in range(data_output.establishment.count()):
        data_output.establishment.i= data_output["establishment"].replace(
        to_replace=term,
        value="distillery",
        inplace=True
        )

In [None]:
#cleanses the addresses and find zip codes
data_output["address"]=data_output["address"].str.replace("United States","USA")

In [None]:
#cleanses based on place_ids
data_output=data_output.drop_duplicates(subset="place_id")
data_output.set_index("place_id")

In [None]:
len(data_output)

In [None]:
establishment_types = data_output['establishment'].unique()

In [None]:
data_output_grouped = pd.DataFrame(data_output.groupby(by = ['establishment', 'state']).count())
data_output_grouped.drop(['name', 'address', 'lon', 'lat', 'place_id', 'response_url'], axis = 1, inplace = True)
clean_data_output = data_output_grouped.loc['brewery']
clean_data_output.rename(columns = {"query": "google_brewery_count"}, inplace = True)
clean_data_output = clean_data_output.add(data_output_grouped.loc['winery'], fill_value=0)
clean_data_output.rename(columns = {"query": "google_winery_count"}, inplace = True)
clean_data_output = clean_data_output.add(data_output_grouped.loc['distillery'], fill_value = 0)
clean_data_output.rename(columns = {"query": "google_distillery_count"}, inplace = True)
clean_data_output.head()

In [None]:
census_breweries_wineries_distilleries.head()

In [None]:
all_data_df = pd.merge(census_breweries_wineries_distilleries, clean_data_output, how = "left", left_on = "Abbrev", right_index = True)

In [None]:
all_data_df.head()

In [None]:
all_data_df.to_excel("all_data.xls", sheet_name = "data")