### YelpDataCollection
Queries the Yelp search API to get those businesses that 'gluten_free' category for a given zip code.  
https://www.yelp.com/developers/documentation/v3/business_search

In [1]:
#-- Import Libraries
import pandas as pd
import os
import requests

# Yelp API key in secrets.py; .gitignore prevents the secrets.py from being pushed to GitHub
from secrets import yelpKey


In [2]:
def getDataForZipcode(searchZipCode):
    ''' Searches the Yelp API to get the business that satisfy the 'gluten_free' search term
    
    Accepts : searchZipCode (str) zip code to search for records within
    
    Returns : (dictionary) contains information of business for the zip code
                ID: Unique Yelp ID for the business
                Name: Name of the business
                ZipCode: Location of the business
                Latitude: coordinate of the business location
                Longitude: coordinate of the business location
                Price: Price level of the business. Value is one of $, $$, $$$, $$$$ and NA
                Rating: Rating for this business (value ranges from 1, 1.5, ... 4.5, 5)
    '''
    
    #- Prepare Search
    # Source Url
    baseYelpUrl = "https://api.yelp.com/v3/businesses/search"

    # API Key passed through header
    headers = {
            'Authorization': 'Bearer %s' % yelpKey,
    }
    
    # Search Term
    searchTerm = 'gluten_free'
    
    # Dictionary stores data
    yelpData = {
        'ID': [],
        'Name': [],
        'Zipcode': [],
        'Latitude': [],
        'Longitude': [],
        'Price' : [],
        'Rating' : []
    }
    
    
    #- Display Message
    print(" ")
    print(f"Started search of Yelp API zip code: {searchZipCode}...")
    
    
    #- Search
    #  API limits 50 records being returned at once; must loop and request offset of results to get all records
    recordLimit = 50
    currentOffset = 0
    hasMoreData = True
    
    while hasMoreData == True:
        
        #- Prepare Parameters
        parameters = {
            'location': searchZipCode,
            'term': 'gluten_free',
            'limit': recordLimit,
            'radius': 3000,
            'offset': currentOffset,
            }
        
        
        #- Request
        print(f"  Requesting data. Offset: {currentOffset}")
        
        response = requests.request('GET', baseYelpUrl, headers=headers, params=parameters)
        
        
        #- Check Response
        if (response.status_code == requests.codes.ok):
            
            # Get Json from Response
            responseJson = response.json()
            
            
            # Search Businesses
            for business in responseJson['businesses']:
                
                # Determine Use Business
                useBusiness = checkBusinessForUsage(business, searchZipCode, searchTerm)
                
                if (useBusiness == True):
                    
                    # Populate Dictionary with Business Information
                    yelpData['ID'].append(business['id'])
                    yelpData['Name'].append(business['name'])
                    yelpData['Zipcode'].append(business['location']['zip_code'])
                    
                    yelpData['Latitude'].append(business['coordinates']['latitude'])
                    yelpData['Longitude'].append(business['coordinates']['longitude'])
                    
                    yelpData['Price'].append(getPriceForBusiness(business))
                    yelpData['Rating'].append(business['rating'])
          
        
        #- Prepare for Next search
        # API only supports 50 records at a time; must query with offset
        currentOffset = (currentOffset + recordLimit)
        
        if (currentOffset > responseJson['total']):
            print(f"Collected all data. Current Offset: {currentOffset}  Total: {responseJson['total']}")
            hasMoreData = False
    
                  
    #- Metadata on Data
    print(" ")
    print(f"Search Zipcode: {searchZipCode}")
    print(f"Total businesses: {len(yelpData['ID'])}")
               
          
    #- Return data from function
    return yelpData


In [3]:
def checkBusinessForUsage(businessInfo, searchZipcode, searchTerm):
    ''' Determines if the business can be used in the Analysis
    
    Accepts : businessInfo (dictionary) contains the metadata for individual business 
                searchZipCode (str) zip code searching for data within
                searchTerm (str) category used with the search; ensure found
    
    Return : bool TRUE- business meets critera, able to use FALSE- unable to use business
    '''
    
    #- Check Within Search Zipcode
    businessZipCode = businessInfo['location']['zip_code']
    
    if (businessZipCode != searchZipcode):
        return False
    
    
    
    # NOTE: Found that when checking for the category there were very few records that satisfy this
    
    #- Check Search Term
#     foundSearchTerm = False
    
#     for category in businessInfo['categories']:
        
#         if (category['alias'] == searchTerm):
#             foundSearchTerm = True
#             break
    
    
    return True
     

In [4]:
def getPriceForBusiness(businessInfo):
    ''' Gets the price for a business; not all businesses contain this property within the JSON;
    when not found just uses NA.
    
    Accepts : businessInfo (dictionary) metadata on an individual business
    
    Returns : (num) value from price tag
    '''
    try:
        
        return businessInfo['price']
    
    except:
        return 'NA'

In [None]:
#-- Collect Data, Multiple Datasets

#- UserName
userName = "Scott"

#- Get File of Random Zipcodes
print("--->")
print("Getting random zipcodes...")


fileName = f'RandomZipCodes_{userName}.csv'
randomZipcodesPath = os.path.join(".", "Output", fileName)

randomZipcodes_df = pd.read_csv(randomZipcodesPath)


#- Collect Data
for index, row in randomZipcodes_df.iterrows():
    
    #- Get Zipcode
    searchZipCode = str(row[1])

    
    #- Get Data from Yelp
    yelpDataForZipCode = getDataForZipcode(searchZipCode)


    #- Create DataFrame
    yelpData_df = pd.DataFrame(yelpDataForZipCode)

    
    #-- Export Data
    dataExportPath = os.path.join(".", "Output", f"YelpData_{searchZipCode}.csv")

    yelpData_df.to_csv(dataExportPath)

    print(f"Exported data to disk. Path: {dataExportPath}")

    
#- Complete Message
print(" ")
print("Completed getting information for zipcodes")


--->
Getting random zipcodes...
 
Started search of Yelp API zip code: 11364...
  Requesting data. Offset: 0
  Requesting data. Offset: 50
  Requesting data. Offset: 100
Collected all data. Current Offset: 150  Total: 117
 
Search Zipcode: 11364
Total businesses: 7
Exported data to disk. Path: ./Output/YelpData_11364.csv
 
Started search of Yelp API zip code: 92203...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 3
 
Search Zipcode: 92203
Total businesses: 1
Exported data to disk. Path: ./Output/YelpData_92203.csv
 
Started search of Yelp API zip code: 42035...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 0
 
Search Zipcode: 42035
Total businesses: 0
Exported data to disk. Path: ./Output/YelpData_42035.csv
 
Started search of Yelp API zip code: 75110...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 0
 
Search Zipcode: 75110
Total businesses: 0
Exported data to disk. Path: ./Output/YelpData_75110.

In [None]:
#-- Collect Data

# Zip Code to search
# Able to pass this as function in future
searchZipCode = "92104"


#- Get Data From Yelp
yelpDataForZipCode = getDataForZipcode(searchZipCode)


#- Create DataFrame
yelpData_df = pd.DataFrame(yelpDataForZipCode)

In [None]:
#-- Export Data
dataExportPath = os.path.join(".", "Output", f"YelpData_{searchZipCode}.csv")

yelpData_df.to_csv(dataExportPath)

print(f"Exported data to disk. Path: {dataExportPath}")

In [None]:
#-- Preview Data
yelpData_df.head(200)