In [None]:
# Import dependencies

import pandas as pd
from bs4 import BeautifulSoup
import requests
from urllib.request import urlopen
from splinter import Browser
import time
from googlemaps import Client as GoogleMaps

In [None]:
# Set the URL that will be scraped to a variable
# https://sf.eater.com with filters for San Francisco dog friendly restaurants

url = "https://sf.eater.com/maps/san-francisco-dog-friendly-bars-restaurants"

# Create function to get html from website using Beautiful Soup

def getHTMLContent(link):
    html = urlopen(link)
    soup = BeautifulSoup(html, "html.parser")
    return soup

# Use the function created above and pass through the URL defined earlier

content = getHTMLContent(url)

# From the content collected, find all tags "div" with class "c-mapstack__card-hed"
# Each of the restaurant data is found within the html tag and class

rest_data = content.find_all("div", class_="c-mapstack__card-hed")

In [None]:
# Check the amount of restaurants to see if data makes sense

len(rest_data)

In [None]:
# Create empty list to store restaurant names

list_names = []

# Create for loop to get the restaurant names without the extra characters

for name in rest_names:
    item = name[4:]
    list_names.append(item)
    
# Display the names to check if data was correctly gathered

list_names

In [None]:
# Data has unwanted characters
# Create a new empty list to store corrected names

clean_list = []

# Create a for loop to go through each restaurant name and get rid of "\xa0"

for name in list_names:
    item = name.replace("\xa0", " ")
    clean_list.append(item)

# Display the restaurant names    
    
clean_list

In [None]:
# Find the restaurant address and which tag and class the text falls between

rest_address = content.find_all("div", class_="c-mapstack__info")

# Create empty list to store the restaurant addresses

address_list_items = []

# Create a for loop to find the address for each restaurant and append it to the empty list created above

for i in rest_address:
    address = i.find("div", class_="c-mapstack__address").contents
    address_list_items.append(address)
    
# Display the addresses

address_list_items

In [None]:
# Clean up the address list
# Create a new empty list

new_address_list=[]

# Create for loop to separate out the street address and city, state, zip
# Use f-string to get the address into one line

for i in address_list_items:
    data = f"{i[0]}, {i[2]}"
    new_address_list.append(data)
    
# Display the addresses

new_address_list

In [None]:
# Create an empty dictionary to store the data

final_df = {}

# Set the values collected above to the appropriate key
# For the addresses, there are duplicates. Select only the addresses needed

final_df["name"] = clean_list
final_df["address"] = new_list[0:12]


In [None]:
# Convert the dictionary to a Pandas Data Frame

final_data = pd.DataFrame.from_dict(final_df)

# Display the data frame

final_data

In [None]:
# Since the website did not have lat and long information, I will use google API to get the lat and long for each restaurant

google_API = "API KEY HERE!"

# Use GoogleMaps function to get gmaps

gmaps = GoogleMaps(google_API)

# Add empty columns to the final_data data frame for the latitude and longitude

final_data["latitude"] = ""
final_data["longitude"] = ""

In [None]:
# Create a for loop to get the latitude and longitude given the address

for i in range(len(final_data["address"])):
    geocode_results = gmaps.geocode(final_data["address"][i])
    final_data["latitude"][i] = geocode_results[0]["geometry"]["location"]["lat"]
    final_data['longitude'][i] = geocode_results[0]['geometry']['location']['lng']

In [None]:
# Check to see if data was collected correctly

final_data

In [None]:
# Save the data to a CSV file to be used later

final_data.to_csv("eater_dog_rest.csv", index=False)