### YelpDataCollection
Queries the Yelp search API to get those businesses that 'gluten_free' category for a given zip code.  
https://www.yelp.com/developers/documentation/v3/business_search

In [8]:
#-- Import Libraries
import pandas as pd
import os
import requests

# Yelp API key in secrets.py; .gitignore prevents the secrets.py from being pushed to GitHub
from secrets import yelpKey


In [17]:
def getDataForZipcode(isGlutenFreeSearch, searchZipCode):
    ''' Searches the Yelp API to get the business that satisfy the filter
    
    Accepts : isGlutenFreeSearcj (bool) TRUE- search for gluten free term FALSE- search just for restaurant
                searchZipCode (str) zip code to search for records within
    
    Returns : (dictionary) contains information of business for the zip code
                ID: Unique Yelp ID for the business
                Name: Name of the business
                ZipCode: Location of the business
                Latitude: coordinate of the business location
                Longitude: coordinate of the business location
                Price: Price level of the business. Value is one of $, $$, $$$, $$$$ and NA
                Rating: Rating for this business (value ranges from 1, 1.5, ... 4.5, 5)
    '''
    
    #- Prepare Search
    # Source Url
    baseYelpUrl = "https://api.yelp.com/v3/businesses/search"

    # API Key passed through header
    headers = {
            'Authorization': 'Bearer %s' % yelpKey,
    }
    
    # Search Term
    searchTerm = 'restaurant'
    
    if (isGlutenFreeSearch == True):
        searchTermin = 'gluten_free,restaurant'
    
    
    # Dictionary stores data
    yelpData = {
        'ID': [],
        'Name': [],
        'Zipcode': [],
        'Latitude': [],
        'Longitude': [],
        'Price' : [],
        'Rating' : []
    }
    
    
    #- Search
    #  API limits 50 records being returned at once; must loop and request offset of results to get all records
    recordLimit = 50
    currentOffset = 0
    hasMoreData = True
    
    while hasMoreData == True:
        
        #- Prepare Parameters
        parameters = {
            'location': searchZipCode,
            'term': searchTerm,
            'limit': recordLimit,
            'radius': 3000,
            'offset': currentOffset,
            }
        
        
        #- Request
        print(f"  Requesting data. Offset: {currentOffset}")
        
        response = requests.request('GET', baseYelpUrl, headers=headers, params=parameters)
        
        
        #- Check Response
        if (response.status_code == requests.codes.ok):
            
            # Get Json from Response
            responseJson = response.json()
            
            
            # Search Businesses
            for business in responseJson['businesses']:
                
                # Determine Use Business
                useBusiness = checkBusinessForUsage(business, searchZipCode)
                
                if (useBusiness == True):
                    
                    # Populate Dictionary with Business Information
                    yelpData['ID'].append(business['id'])
                    yelpData['Name'].append(business['name'])
                    yelpData['Zipcode'].append(business['location']['zip_code'])
                    
                    yelpData['Latitude'].append(business['coordinates']['latitude'])
                    yelpData['Longitude'].append(business['coordinates']['longitude'])
                    
                    yelpData['Price'].append(getPriceForBusiness(business))
                    yelpData['Rating'].append(business['rating'])
          
        
        #- Prepare for Next search
        # API only supports 50 records at a time; must query with offset
        currentOffset = (currentOffset + recordLimit)
        
        if (currentOffset > responseJson['total']):
            print(f"Collected all data. Current Offset: {currentOffset}  Total: {responseJson['total']}")
            hasMoreData = False
    
                  
    #- Metadata on Data
    print(f"Search Zipcode: {searchZipCode}")
    print(f"Total businesses: {len(yelpData['ID'])}")
               
          
    #- Return data from function
    return yelpData


In [14]:
def checkBusinessForUsage(businessInfo, searchZipcode):
    ''' Determines if the business can be used in the Analysis
    
    Accepts : businessInfo (dictionary) contains the metadata for individual business 
                searchZipCode (str) zip code searching for data within
    
    Return : bool TRUE- business meets critera, able to use FALSE- unable to use business
    '''
    
    #- Check Within Search Zipcode
    businessZipCode = businessInfo['location']['zip_code']
    
    if (businessZipCode != searchZipcode):
        return False
    
    
    return True
     

In [15]:
def getPriceForBusiness(businessInfo):
    ''' Gets the price for a business; not all businesses contain this property within the JSON;
    when not found just uses NA.
    
    Accepts : businessInfo (dictionary) metadata on an individual business
    
    Returns : (num) value from price tag
    '''
    try:
        
        return businessInfo['price']
    
    except:
        return 'NA'

In [20]:
#-- Collect Data, Multiple Datasets

#- UserName
userName = "Scott"

#- Get File of Random Zipcodes
print("--->")
print("Getting random zipcodes...")


fileName = f'RandomZipCodes_{userName}.csv'
randomZipcodesPath = os.path.join(".", "Output_2", fileName)

randomZipcodes_df = pd.read_csv(randomZipcodesPath)


#- Collect Data
for index, row in randomZipcodes_df.iterrows():
    
    #- Get Zipcode
    searchZipCode = str(row[1])
    
    print(" ")
    print(">>--")
    print(searchZipCode)
    
    
    #-- Gluten Free
    print("-Gluten Free")
    
    #- Get Data from Yelp: Gluten Free Search
    yelpDataForZipCode = getDataForZipcode(True, searchZipCode)

    #- Create DataFrame
    yelpData_df = pd.DataFrame(yelpDataForZipCode)
    
    #- Export Data
    dataExportPath = os.path.join(".", "Output_2", f"YelpData_{searchZipCode}.csv")

    yelpData_df.to_csv(dataExportPath)
    
    
    #-- All Restaurants
    print("-All Restaurants")
    
    #- Get Data
    yelpDataRestaurantForZipCode = getDataForZipcode(False, searchZipCode)
    
    #- Create Data Frame
    yelpDataRestaurant_df = pd.DataFrame(yelpDataRestaurantForZipCode)
    
    #- Export Data
    dataRestaurantExportPath = os.path.join(".", "Output_2", f"YelpDataAll_{searchZipCode}.csv")
    
    yelpDataRestaurant_df.to_csv(dataRestaurantExportPath)
    
    
    print(f"Exported data to disk. Path: {dataExportPath}")

    
#- Complete Message
print(" ")
print("Completed getting information for zipcodes")


--->
Getting random zipcodes...
 
>>--
2878
-Gluten Free
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 0
Search Zipcode: 2878
Total businesses: 0
-All Restaurants
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 0
Search Zipcode: 2878
Total businesses: 0
Exported data to disk. Path: ./Output_2/YelpData_2878.csv
 
>>--
44875
-Gluten Free
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 16
Search Zipcode: 44875
Total businesses: 16
-All Restaurants
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 16
Search Zipcode: 44875
Total businesses: 16
Exported data to disk. Path: ./Output_2/YelpData_44875.csv
 
>>--
97442
-Gluten Free
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 1
Search Zipcode: 97442
Total businesses: 1
-All Restaurants
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 1
Search Zipcode: 97442
Total businesses: 1
E

In [None]:
#-- Collect Data

# Zip Code to search
# Able to pass this as function in future
searchZipCode = "92104"


#- Get Data From Yelp
yelpDataForZipCode = getDataForZipcode(searchZipCode)


#- Create DataFrame
yelpData_df = pd.DataFrame(yelpDataForZipCode)

In [None]:
#-- Export Data
dataExportPath = os.path.join(".", "Output", f"YelpData_{searchZipCode}.csv")

yelpData_df.to_csv(dataExportPath)

print(f"Exported data to disk. Path: {dataExportPath}")

In [None]:
#-- Preview Data
yelpData_df.head(200)