In [152]:
import requests
from config import api_key
import pandas as pd
import pathlib
from pprint import pprint
from bs4 import BeautifulSoup as bs
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import time
import numpy as np
import random

In [153]:
# import location data
location = pd.read_csv(pathlib.Path("../resources/locations.csv"))
location.head()


Unnamed: 0,lon,lat
0,-118.25941,34.145179
1,-84.131825,33.95592
2,-122.509156,37.871872
3,-112.0287,33.378501
4,-122.299943,37.900616


In [154]:
# endpoint
business_endpoint = "https://api.yelp.com/v3/businesses/search"


In [155]:
# construct request for business id
header ={"Authorization": "Bearer %s" % api_key}
params = {
    "term":"Panda Express",
    "latitude":"34.145179",
    "longitude":"-118.259410",
    "limit":1
}

response = requests.get(business_endpoint, params=params, headers = header).json()

business_id_list = []
review_dict = {
    "reviews":[],
    "rating":[],
    "review_count":[]
}

In [156]:
def get_business_id(response)->str:
    """get business id from response, and store into a lit
    """
    try:
        business_id = response["businesses"][0]["id"]
    except: 
        business_id = "Error"
        
    return business_id

def get_url(response)->str:
    """gets business url
    """
    try: 
        url = response["reviews"][0]["url"]
    except: 
        url = "Error"
    return url

def get_rating(response)->int:
    """gets ratings
    """
    try:
        rating = response["businesses"][0]["rating"]
    except:
        rating = "Error"
    return rating

def get_review_count(response)->int:
    """gets review count
    """
    try:
        review_count = response["businesses"][0]["review_count"]
    except:
        review_count = "Error"
    return review_count




In [157]:
# randomly select 1000 stores and get the reviews
random.seed(2)
index = random.sample(list(location.index),k = 1000)

# rating stars
stars = [f"{int(star)} star rating" for star in list(np.arange(1, 5.5, 0.5))]

# loop through each dataset
for i in index:
    # construct params
    params["latitude"] = location.iloc[i,1]
    params["longitude"] = location.iloc[i,0]
    # request response
    response = requests.get(business_endpoint, params = params, headers = header).json()
    
    # create business id and append to a list, and get url
    business_id = get_business_id(response)
    business_id_list.append(business_id)
    # get store rating and review count
    review_count = get_review_count(response)
    
    
    # if anything is error, jump into next iteration, otherwise use review endpoint to get the reviews
    if business_id =="Error":
        next
    else:
        review_endpoint = f"https://api.yelp.com/v3/businesses/{business_id}/reviews"
        response_review = requests.get(review_endpoint, headers = header).json()
        url = get_url(response_review)
        
        if url =="Error":
            next
        else:
            response_html = requests.get(url)
            time.sleep(1.5)
            soup = bs(response_html.text, 'html.parser')

            # list of reviews for each business id
            reviews = soup.find_all("p","lemon--p__373c0__3Qnnj text__373c0__2Kxyz comment__373c0__3EKjH text-color--normal__373c0__3xep9 text-align--left__373c0__2XGa-")
            ratings_bucket = soup.find_all("div","lemon--div__373c0__1mboc review__373c0__13kpL sidebarActionsHoverTarget__373c0__2kfhE arrange__373c0__2C9bH gutter-2__373c0__1DiLQ grid__373c0__1Pz7f layout-stack-small__373c0__27wVp border-color--default__373c0__3-ifU")
            ratings = [rating.find_all(attrs = {"aria-label":stars}) for rating in ratings_bucket]
            # check if has reviews
            if len(reviews)>1:
                for review, rating in zip(reviews, ratings):
                    review_dict["reviews"].append(review.text)
                    review_dict["rating"].append(rating[0]["aria-label"])
                    review_dict["review_count"].append(review_count)
    
                
        

In [162]:
# create dataframe
review_data = pd.DataFrame(data = review_dict)
review_data.to_csv("../resources/reviews.csv")
