# resaurantRating

## ETL Group Project

    Germaine Johnson, Jeremy Jones, Reza Abasaltian
    October 27, 2020

### Google Places API - Text Search

In [1]:
# Import dependencies
import requests
from pprint import pprint
import pandas as pd
from datetime import datetime, timedelta

# Google developer API key
from api_key import gkey

# Set today as current date and time
t = datetime.now()

# Print todays date formatted as mm/dd/yy
date = t.strftime('%m/%d/%y')

# format time to round to the nearest hour in hundreds
time = (t.replace(second=0, microsecond=0, minute=0, hour=t.hour)
                        + timedelta(hours=t.minute//30))
hour = time.strftime('%H'+'00')

print(f'Today is {date} @ {hour} hour.')

Today is 10/23/20 @ 1700 hour.


In [2]:
# assign zip code for the base of our search
target_zip = "77056"

# distance, IN METERS, within which the place results must live from assigned zip code
target_radius = 11111

# type of establishment to filter place results
target_type = "restaurant"

In [3]:
# base url
base_url = "https://maps.googleapis.com/maps/api/place/textsearch/json"

In [4]:
# set up a dictionary to store all query parameters - for initial page and first 20 listings
params = {
    "query": target_zip,
    "radius": target_radius,
    "type": target_type,
    "key": gkey
}

# run a request using our params dictionary
response = requests.get(base_url, params=params)

# print the response status code
print(f'first response: {response.status_code}')

first response: 200


In [5]:
# convert the response to json
response_json = response.json()

In [6]:
print(f'Total {target_type} listings retrieved on first response: {len(response_json["results"])}')

Total restaurant listings retrieved on first response: 20


In [7]:
# set up a dictionary to store all query parameters - next page and next 20 listings, second response
params = {
    "query": target_zip,
    "radius": target_radius,
    "type": target_type,
    "key": gkey,
    "pagetoken": response_json['next_page_token']
}

# run a request using our params dictionary
response2 = requests.get(base_url, params=params)

# print the response status code
print(f'second response: {response2.status_code}')

# convert the response to json
response_json2 = response2.json()

second response: 200


In [8]:
# set up a dictionary to store all query parameters - next page and next 20 listings, third response
params = {
    "query": target_zip,
    "radius": target_radius,
    "type": target_type,
    "key": gkey,
    "pagetoken": response_json2['next_page_token']
}

# run a request using our params dictionary
response3 = requests.get(base_url, params=params)

# print the response status code
print(f'third response: {response3.status_code}')

# convert the response to json
response_json3 = response3.json()

third response: 200


In [9]:
def getPlaces(response_json, i, date, hour):
    places = []
    for result in response_json['results']:
        place = {}
        try:
            place['id'] = i
            place['name'] = result['name']
            address = result['formatted_address']
            s = address.split(', ')
            s2 = s[2].split(' ')   
            place['street'] = s[0]
            place['city'] = s[1]
            place['state'] = s2[0]
            place['zip code'] = s2[1]
            place['avg rating'] = result['rating']
            place['total ratings'] = result['user_ratings_total']
            place['price level'] = result['price_level']
            place['date'] = date
            place['hour'] = hour
            places.append(place)
            i+=1
        
        except (KeyError, IndexError) as e:
            if str(e) == "'price_level'":
                place['price level'] = "NA"                  
                places.append(place)
                print(f'Missing field/result... set NA. {str(e)}, listing {i}')
                i+=1
            else:
                print(f'Missing field/result... skipping. {str(e)}')        
    return places

In [10]:
# call get place function for each response
df_places1 = pd.DataFrame(getPlaces(response_json,0,date,hour))
df_places2 = pd.DataFrame(getPlaces(response_json2,len(df_places1),date,hour))
df_places3 = pd.DataFrame(getPlaces(response_json3,(len(df_places1)+len(df_places2)),date,hour))

Missing field/result... set NA. 'price_level', listing 2
Missing field/result... set NA. 'price_level', listing 7
Missing field/result... skipping. list index out of range
Missing field/result... set NA. 'price_level', listing 18
Missing field/result... set NA. 'price_level', listing 24
Missing field/result... set NA. 'price_level', listing 26
Missing field/result... set NA. 'price_level', listing 27
Missing field/result... set NA. 'price_level', listing 28
Missing field/result... set NA. 'price_level', listing 31
Missing field/result... set NA. 'price_level', listing 39
Missing field/result... set NA. 'price_level', listing 44
Missing field/result... set NA. 'price_level', listing 49
Missing field/result... skipping. list index out of range


In [11]:
# Concatenate all 3 API responses
df_places = pd.concat([df_places1, df_places2, df_places3], axis=0)
df_places = df_places.reset_index(drop=True)
df_places.head(10)

Unnamed: 0,id,name,street,city,state,zip code,avg rating,total ratings,price level,date,hour
0,0,Murphy's Deli,2800 Post Oak Blvd,Houston,TX,77056,5.0,15,1.0,10/23/20,1700.0
1,1,Peli Peli South African Kitchen - Galleria,5085 Westheimer Rd B2515,Houston,TX,77056,4.5,1203,3.0,10/23/20,1700.0
2,2,Bazille,5192 Hidalgo St,Houston,TX,77056,4.4,211,,,
3,3,Alexander the Great,3055 Sage Rd,Houston,TX,77056,4.5,383,2.0,10/23/20,1700.0
4,4,Caracol Restaurant,2200 Post Oak Blvd #160,Houston,TX,77056,4.6,2117,3.0,10/23/20,1700.0
5,5,The Oceanaire Seafood Room,5061 Westheimer Rd,Houston,TX,77056,4.4,826,3.0,10/23/20,1700.0
6,6,Piatto Ristorante,4925 W Alabama St,Houston,TX,77056,4.4,198,2.0,10/23/20,1700.0
7,7,Musaafer,5115 Westheimer Rd Suite C-3500,Houston,TX,77056,4.6,138,,,
8,8,FIG & OLIVE Tasting Kitchen & Bar,5115 Westheimer Rd Suite C2500,Houston,TX,77056,4.3,550,3.0,10/23/20,1700.0
9,9,Morton's The Steakhouse,5000 Westheimer Rd Suite 190,Houston,TX,77056,4.6,594,4.0,10/23/20,1700.0


In [12]:
# sorted dataframe by total ratings
df_ratings = df_places.sort_values(by='total ratings', ascending=False)
df_ratings = df_ratings.reset_index(drop=True)
df_ratings.head(10)

Unnamed: 0,id,name,street,city,state,zip code,avg rating,total ratings,price level,date,hour
0,20,Grand Lux Cafe,5000 Westheimer Rd,Houston,TX,77056,4.2,2882,2,10/23/20,1700
1,25,The Cheesecake Factory,5015 Westheimer Rd,Houston,TX,77056,4.1,2681,2,10/23/20,1700
2,40,Maggiano's Little Italy,2019 Post Oak Blvd,Houston,TX,77056,4.5,2653,2,10/23/20,1700
3,29,North Italia,1700 Post Oak Blvd Ste 190,Houston,TX,77056,4.6,2193,2,10/23/20,1700
4,4,Caracol Restaurant,2200 Post Oak Blvd #160,Houston,TX,77056,4.6,2117,3,10/23/20,1700
5,48,Kenny & Ziggy's New York Delicatessen,2327 Post Oak Blvd,Houston,TX,77056,4.6,1947,2,10/23/20,1700
6,51,Bubba's Texas Burger Shack,5230 Westpark Dr,Houston,TX,77056,4.6,1696,1,10/23/20,1700
7,16,Truluck's,5350 Westheimer Rd,Houston,TX,77056,4.6,1519,3,10/23/20,1700
8,15,Moxie's,5000 Westheimer Rd,Houston,TX,77056,4.2,1385,2,10/23/20,1700
9,42,Dimassi's Mediterranean Buffet,5160 Richmond Ave,Houston,TX,77056,4.4,1273,2,10/23/20,1700
