# [San Diego Brewers Guild](https://www.sdbeer.com)

This is a notebook to scrape websites for data and export as CSV

In [36]:
import requests
import folium
import numpy as np
from folium import plugins
from bs4 import BeautifulSoup

In [3]:
# Get the page and parse
url = "https://www.sdbeer.com/breweries"
page = requests.get(url)
soup = BeautifulSoup(page.text)

In [4]:
# Select all of the brewery items
items = soup.select('div.grid.block-4 .item')
print(f"There are {len(items)} breweries listed at {url}.")

There are 126 breweries listed at https://www.sdbeer.com/breweries.


In [5]:
# Create a list of dictionaries
breweries = []

for brewery in items:
    name = brewery.find('a')['aria-label']
    brewery_path = brewery.find('a')['href']
    logo_url = brewery.find('img')['src']
    breweries.append({'name': name, 'website_url': url + brewery_path, 'logo_url': logo_url})
                     
print(f"{len(breweries)} breweries found and added to our list.")

126 breweries found and added to our list.


In [6]:
from pathlib import Path

# Download logos, if they don't already exist
print("Downloading new logos...")
for brewery in breweries:
    image_file_path = Path(f"data/logos/CA - San Diego - {brewery['name']}.png")
    
    # Don't download logos we already have     
    if not image_file_path.is_file():
        r = requests.get(brewery['logo_url'])
        file = open(image_file_path, "wb")
        file.write(r.content)
        file.close()
        print("Downloaded {image_file_path}")
        
print("Complete!")

Downloading new logos...
Complete!


In [7]:
# Select the first brewery and GET the page
brewery = breweries[0]
r = requests.get(brewery['website_url'])

In [8]:
# SD Brewer's Guild is using Next.js
soup = BeautifulSoup(r.content)
# Get all of the <script> tags and verify the number of <script> tags
scripts = soup.find_all('script')
# Verify length of the first one (~510k)
len(scripts[0].contents[0])

506507

In [9]:
import re

# We're only interested in the first script tag
contents = str(scripts[0].contents[0])
# Create a pattern to look for the Next.js data variable
pattern = r"__NEXT_DATA__ = ({.*};)"
# Create a Match object and verify the length of the match (~510k)
next_data = re.match(pattern, contents)
# Verify the length of the group (~510k, but less than above)
len(next_data.group(1))

506396

In [10]:
import json

# Parse as JSON, but omit the semi-colon
json_parsed = json.loads(next_data.group(1)[:-1])

# Good for inspection
# print(json.dumps(json_parsed, indent=4, sort_keys=True))

# Drill down to grab the breweries
breweries_list = json_parsed["props"]["initialState"]["sdbgBreweries"]["list"]['_list']

# Determine number of locations
locations = 0
for brewery in breweries_list:
    for location in brewery['locations']:
        locations += 1

print(f"Scraped {len(breweries_list)} breweries and {locations} locations.")

Scraped 165 breweries and 430 locations.


In [47]:
from pprint import PrettyPrinter

# Pretty print for verification
pp = PrettyPrinter()
pp.pprint(breweries_list[:3])

[{'_id': '5a822075b9cee17a0adff28e',
  'betterpress': True,
  'created': 1468282975000,
  'deactivated': False,
  'facebook': 'https://www.facebook.com/southparkbrewco',
  'image': 'https://s3-us-west-1.amazonaws.com/paradeigm-social/qJzF6dQmTQukygOMI1Bk_SP '
           'brewing.png',
  'instagram': 'https://www.instagram.com/southparkbrewco/',
  'locations': [{'_id': '5a8bb1df1c05c867bedca3e7',
                 'betterpress': True,
                 'brewery': None,
                 'breweryUuid': 'WyBROCGvMw52',
                 'city': 'San Diego',
                 'coords': [-117.1301127, 32.7216668],
                 'country': 'US',
                 'events': None,
                 'lat': 32.7216668,
                 'lng': -117.1301127,
                 'name': 'South Park Brewing Company',
                 'profileLocation': True,
                 'state': 'CA',
                 'street': '1517 30th St',
                 'tags': {'dogFriendly': True,
                          'f

In [23]:
# Now that we have ALL of the breweries, let's recompile the breweries
breweries = []

for brewery in breweries_list:
    for location in brewery['locations']:
        tag_dogFriendly = False
        tag_familyFriendly = False
        tag_foodService = False
        tag_foodTrucks = False
        tag_growlerFills = False
        tag_over21 = False
        if location.get('tags', False):
            tags = location['tags']
            tag_dogFriendly = tags.get('dogFriendly', False)
            tag_familyFriendly = tags.get('familyFriendly', False)
            tag_foodService = tags.get('foodService', False)
            tag_foodTrucks = tags.get('foodTrucks', False)
            tag_growlerFills = tags.get('growlerFills', False)
            tag_over21 = tags.get('over21', False)
        new_brewery = {
            'name_brewery': brewery['name'],
            'name_location': location['name'],
            'social_twitterUrl': brewery.get('twitter', ''),
            'social_facebookUrl': brewery.get('facebook', ''),
            'social_instagramUrl': brewery.get('instagram', ''),
            'social_untappdId': brewery.get('untappdId', ''),
            'lat': location['lat'],
            'lng': location['lng'],
            'coords': location['coords'],
            'street': location['street'],
            'city': location['city'],
            'state': 'California',
            'country': 'US',
            'brewery_type': brewery['type'],
            'visible': brewery.get('visible', False),
            'deactivated': brewery.get('deactivated', True),
            'created': brewery['created'],
            'website': brewery['website'],
            'uuid': brewery['uuid'],
            'logo_url': brewery['image'], 
            'profile_location': location.get('profileLocation', False),
            'tag_dogFriendly': tag_dogFriendly,
            'tag_familyFriendly': tag_familyFriendly,
            'tag_foodService': tag_foodService,
            'tag_foodTrucks': tag_foodTrucks,
            'tag_growlerFills': tag_growlerFills,
            'tag_over21': tag_over21
        }
        breweries.append(new_brewery)
    
print(f"{len(breweries)} brewery locations processed.")

430 brewery locations processed.


In [24]:
import pandas as pd

df = pd.DataFrame(breweries)
df

Unnamed: 0,name_brewery,name_location,social_twitterUrl,social_facebookUrl,social_instagramUrl,social_untappdId,lat,lng,coords,street,...,website,uuid,logo_url,profile_location,tag_dogFriendly,tag_familyFriendly,tag_foodService,tag_foodTrucks,tag_growlerFills,tag_over21
0,South Park Brewing Company,South Park Brewing Company,https://twitter.com/SouthParkBrewCo,https://www.facebook.com/southparkbrewco,https://www.instagram.com/southparkbrewco/,189064,32.721667,-117.130113,"[-117.1301127, 32.7216668]",1517 30th St,...,http://www.southparkbrewing.com/,WyBROCGvMw52,https://s3-us-west-1.amazonaws.com/paradeigm-s...,True,True,True,True,False,True,False
1,Rouleur Brewing Company,Rouleur Brewing Company,https://twitter.com/rouleurbrewing,https://www.facebook.com/rouleurbrewing,https://www.instagram.com/rouleurbrewing/,332529,33.134462,-117.269810,"[-117.2698102, 33.1344625]","5840 El Camino Real, Suite 101",...,http://rouleurbrewing.com/,mq5pOQdvyAmg,https://s3-us-west-1.amazonaws.com/sdbeer2/llb...,True,True,True,False,True,True,False
2,Rouleur Brewing Company,Carlsbad Brewing Company,https://twitter.com/rouleurbrewing,https://www.facebook.com/rouleurbrewing,https://www.instagram.com/rouleurbrewing/,332529,33.128196,-117.260355,"[-117.260355, 33.1281963]",6133 Innovation Way,...,http://rouleurbrewing.com/,mq5pOQdvyAmg,https://s3-us-west-1.amazonaws.com/sdbeer2/llb...,False,False,False,False,False,False,False
3,Rouleur Brewing Company,Burger Bench,https://twitter.com/rouleurbrewing,https://www.facebook.com/rouleurbrewing,https://www.instagram.com/rouleurbrewing/,332529,33.122610,-117.079242,"[-117.0792423, 33.1226102]",237 E. Grand Ave,...,http://rouleurbrewing.com/,mq5pOQdvyAmg,https://s3-us-west-1.amazonaws.com/sdbeer2/llb...,False,False,False,False,False,False,False
4,Rouleur Brewing Company,7 Mile Kitchen,https://twitter.com/rouleurbrewing,https://www.facebook.com/rouleurbrewing,https://www.instagram.com/rouleurbrewing/,332529,33.132775,-117.310714,"[-117.3107141, 33.132775]",5420 Grand Pacific Dr,...,http://rouleurbrewing.com/,mq5pOQdvyAmg,https://s3-us-west-1.amazonaws.com/sdbeer2/llb...,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425,TapRoom Beer Company,TapRoom Beer Company,,https://www.facebook.com/taproomnp,https://www.instagram.com/taproombeerco,,32.755364,-117.143779,"[-117.1437794, 32.7553645]",2000 El Cajon Blvd,...,http://taproombeerco.com,faa935e61353b8b9c307,https://s3-us-west-1.amazonaws.com/paradeigm-s...,True,True,True,True,False,True,False
426,Five Suits Brewing,Five Suits Brewing,https://twitter.com/5suitsbrewing,https://www.facebook.com/fivesuitsbrewing,https://www.instagram.com/fivesuitsbrewing/,,33.148874,-117.221588,"[-117.2215877, 33.1488745]",2575 Pioneer Ave #104,...,https://fivesuits.com/,6bd92692b0c4e459a265,https://s3-us-west-1.amazonaws.com/paradeigm-s...,True,False,False,False,False,False,False
427,Groundswell Brewing Company,Groundswell Brewing Company,https://twitter.com/groundswellbrew,https://www.facebook.com/groundswellbrew,https://www.instagram.com/groundswellbrew,,32.830799,-116.976369,"[-116.9763687, 32.8307987]",10151 Prospect Avenue,...,www.groundswellbrew.com,6ec3a7ed398fb6bf0df2,https://s3-us-west-1.amazonaws.com/paradeigm-s...,True,True,True,False,True,True,False
428,BALLAST POINT BREWING COMPANY,BALLAST POINT BREWING COMPANY,https://twitter.com/ballastpointbrewing,https://www.facebook.com/BallastPoint/,https://www.instagram.com/ballastpointbrewing,,32.887957,-117.157909,"[-117.1579086, 32.8879565]",9045 Carrol Way,...,www.ballastpoint.com,9a94f9107885312a7f72,https://s3-us-west-1.amazonaws.com/paradeigm-s...,True,True,True,True,False,True,False


In [92]:
df[(df['tag_dogFriendly'] == True) & (df['tag_familyFriendly'] == False)]

Unnamed: 0,name_brewery,name_location,social_twitterUrl,social_facebookUrl,social_instagramUrl,social_untappdId,lat,lng,street,city,state,country,tag_dogFriendly,tag_familyFriendly,tag_foodService,tag_foodTrucks,tag_growlerFills,tag_over21
110,Bottlecraft,Bottlecraft — Solana Beach,https://twitter.com/bottlecraft,https://www.facebook.com/Bottlecraft,https://www.instagram.com/bottlecraft/,,32.986173,-117.271078,437 S Hwy 101,Solana Beach,California,US,True,False,True,False,False,True
113,Bottlecraft,Bottlecraft — Little Italy,https://twitter.com/bottlecraft,https://www.facebook.com/Bottlecraft,https://www.instagram.com/bottlecraft/,,32.728049,-117.170413,2252 India Street,San Diego,California,US,True,False,False,False,False,True
114,Bottlecraft,Bottlecraft — Sorrento Valley,https://twitter.com/bottlecraft,https://www.facebook.com/Bottlecraft,https://www.instagram.com/bottlecraft/,,32.905022,-117.228892,11055 Roselle Street,San Diego,California,US,True,False,False,False,False,True
259,Belching Beaver Brewery,Belching Beaver Brewery North Park,,https://www.facebook.com/pages/The-Belching-Be...,https://twitter.com/belchingbeaver,43025.0,32.75475,-117.130072,4223 30th Street,San Diego,California,US,True,False,False,True,True,False
309,Knotty Brewing,Knotty Brewing Co.,https://twitter.com/knottybrewing,https://www.facebook.com/knottybrewingco/,https://www.instagram.com/knottybrewing/,276993.0,32.711777,-117.156852,842 Market Street,San Diego,California,US,True,False,False,False,True,True
315,Arcana Brewing Company,Arcana Brewing Company,https://twitter.com/ArcanaBrewing,https://www.facebook.com/ArcanaBrewing,http://instagram.com/arcanabrewing,30892.0,33.140246,-117.27387,5621 Palmer Way,Carlsbad,California,US,True,False,False,True,True,True
366,Live Wire,Live Wire,https://twitter.com/livewirebar?lang=en,https://www.facebook.com/livewirebar/,https://www.instagram.com/livewirebar/?hl=en,,32.754955,-117.142371,2103 El Cajon Blvd,San Diego,California,US,True,False,False,False,False,False
379,Rose's Tasting Room,Rose's Tasting Room,,https://www.facebook.com/RosesTastingRoom/,https://www.instagram.com/roses_tasting_room/,,32.754923,-117.197624,2754 Calhoun St. Ste G,San Diego,California,US,True,False,False,False,False,False
381,Tap That Tap Room,Tap That Tap Room,https://twitter.com/TapThatSD,https://www.facebook.com/tapthatkeg?fref=ts,,,33.215954,-117.350182,"3207 Roymar Rd, Ste E",Oceanside,California,US,True,False,False,True,False,False


In [29]:
df[(df['deactivated'] == False) & (df['visible'] == True) & (df['profile_location'] == True)]

Unnamed: 0,name_brewery,name_location,social_twitterUrl,social_facebookUrl,social_instagramUrl,social_untappdId,lat,lng,coords,street,...,website,uuid,logo_url,profile_location,tag_dogFriendly,tag_familyFriendly,tag_foodService,tag_foodTrucks,tag_growlerFills,tag_over21
0,South Park Brewing Company,South Park Brewing Company,https://twitter.com/SouthParkBrewCo,https://www.facebook.com/southparkbrewco,https://www.instagram.com/southparkbrewco/,189064,32.721667,-117.130113,"[-117.1301127, 32.7216668]",1517 30th St,...,http://www.southparkbrewing.com/,WyBROCGvMw52,https://s3-us-west-1.amazonaws.com/paradeigm-s...,True,True,True,True,False,True,False
1,Rouleur Brewing Company,Rouleur Brewing Company,https://twitter.com/rouleurbrewing,https://www.facebook.com/rouleurbrewing,https://www.instagram.com/rouleurbrewing/,332529,33.134462,-117.269810,"[-117.2698102, 33.1344625]","5840 El Camino Real, Suite 101",...,http://rouleurbrewing.com/,mq5pOQdvyAmg,https://s3-us-west-1.amazonaws.com/sdbeer2/llb...,True,True,True,False,True,True,False
5,Thorn Brewing Co.,Thorn Brewing Co. - Barrio Logan,https://twitter.com/ThornStBrewery,https://www.facebook.com/thornstreetbrew,https://www.instagram.com/thornstreetbrewery,47013,32.702261,-117.147937,"[-117.1479369, 32.702261]",1735 National Avenue,...,http://thorn.beer,iEulEhPtjS6G,https://s3-us-west-1.amazonaws.com/sdbeer2/oDP...,True,True,True,False,True,True,False
6,Thorn Brewing Co.,Thorn Street Brewery,https://twitter.com/ThornStBrewery,https://www.facebook.com/thornstreetbrew,https://www.instagram.com/thornstreetbrewery,47013,32.739399,-117.125499,"[-117.1254989, 32.7393987]",3176 Thorn Street,...,http://thorn.beer,iEulEhPtjS6G,https://s3-us-west-1.amazonaws.com/sdbeer2/oDP...,True,True,True,False,True,True,False
9,Thorn Brewing Co.,Thorn Brewing Co. - Mission Hills,https://twitter.com/ThornStBrewery,https://www.facebook.com/thornstreetbrew,https://www.instagram.com/thornstreetbrewery,47013,32.750411,-117.172590,"[-117.1725897, 32.7504106]",4026 Hawk St Suite A,...,http://thorn.beer,iEulEhPtjS6G,https://s3-us-west-1.amazonaws.com/sdbeer2/oDP...,True,True,True,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,The Lost Abbey,"The Lost Abbey, Cardiff Confessional",https://twitter.com/lostabbey,https://www.facebook.com/port.lostabbey,http://instagram.com/lostabbey,2712,33.022186,-117.282216,"[-117.282216, 33.0221861]",2007 San Elijo Ave,...,http://lostabbey.com/,j2eGt62XrP7f,https://s3-us-west-1.amazonaws.com/sdbeer2/qly...,True,True,True,False,False,True,False
173,The Lost Abbey,"The Lost Abbey, San Marcos",https://twitter.com/lostabbey,https://www.facebook.com/port.lostabbey,http://instagram.com/lostabbey,2712,33.141627,-117.149267,"[-117.1492671, 33.1416273]","155 Mata Way, Ste #104",...,http://lostabbey.com/,j2eGt62XrP7f,https://s3-us-west-1.amazonaws.com/sdbeer2/qly...,True,False,True,False,True,True,False
174,Duck Foot Brewing Co.,Duck Foot Brewing Co. Taproom & Brewery | Miramar,https://twitter.com/duckfootbeer,https://www.facebook.com/DuckFootBeer/,https://www.instagram.com/duckfootbeer/,105084,32.885499,-117.156392,"[-117.1563922, 32.8854988]","8920 Kenamar Drive, Suite #210",...,http://duckfootbeer.com/,k3yuuQ3DADQK,https://s3-us-west-1.amazonaws.com/sdbeer2/VsK...,True,True,True,False,False,True,False
176,Duck Foot Brewing Co.,Duck Foot Brewing Co. Taproom & Kitchen | East...,https://twitter.com/duckfootbeer,https://www.facebook.com/DuckFootBeer/,https://www.instagram.com/duckfootbeer/,105084,32.710897,-117.154195,"[-117.1541951, 32.7108972]",550 Park Boulevard Ste 2104,...,http://duckfootbeer.com/,k3yuuQ3DADQK,https://s3-us-west-1.amazonaws.com/sdbeer2/VsK...,True,True,True,True,False,True,False


In [119]:
# df.to_csv('data/sd_brewers_guild.csv')

In [54]:
# Initialize the SD map
sdMap = folium.Map(location=[32.71533, -117.15726], tiles='Stamen Toner', zoom_start=9)
dogFriendly_df = df[df['tag_dogFriendly'] == True]

# For each row in the dataset, plot the corresponding latitude and longitude on the map
for i,row in dogFriendly_df.iterrows():
    folium.CircleMarker((row.lat,row.lng), radius=3, weight=2, color='red', fill_color='red', fill_opacity=.5).add_to(sdMap)

#add the heatmap. The core parameters are:
#--data: a list of points of the form (latitude, longitude) indicating locations of Starbucks stores
#--radius: how big each circle will be around each Starbucks store
#--blur: the degree to which the circles blend together in the heatmap

sdMap.add_child(plugins.HeatMap(data=dogFriendly_df[['lat', 'lng']].to_numpy(), radius=10, blur=3))

# Save the map as an html
# sdMap.save('sdHeatmap.html')
sdMap