### YelpDataCollection
Queries the Yelp search API to get those businesses that 'gluten_free' category for a given zip code.  
https://www.yelp.com/developers/documentation/v3/business_search

In [8]:
#-- Import Libraries
import pandas as pd
import os
import requests

# Yelp API key in secrets.py; .gitignore prevents the secrets.py from being pushed to GitHub
from secrets import yelpKey


In [17]:
def getDataForZipcode(isGlutenFreeSearch, searchZipCode):
    ''' Searches the Yelp API to get the business that satisfy the filter
    
    Accepts : isGlutenFreeSearcj (bool) TRUE- search for gluten free term FALSE- search just for restaurant
                searchZipCode (str) zip code to search for records within
    
    Returns : (dictionary) contains information of business for the zip code
                ID: Unique Yelp ID for the business
                Name: Name of the business
                ZipCode: Location of the business
                Latitude: coordinate of the business location
                Longitude: coordinate of the business location
                Price: Price level of the business. Value is one of $, $$, $$$, $$$$ and NA
                Rating: Rating for this business (value ranges from 1, 1.5, ... 4.5, 5)
    '''
    
    #- Prepare Search
    # Source Url
    baseYelpUrl = "https://api.yelp.com/v3/businesses/search"

    # API Key passed through header
    headers = {
            'Authorization': 'Bearer %s' % yelpKey,
    }
    
    # Search Term
    searchTerm = 'restaurant'
    
    if (isGlutenFreeSearch == True):
        searchTermin = 'gluten_free,restaurant'
    
    
    # Dictionary stores data
    yelpData = {
        'ID': [],
        'Name': [],
        'Zipcode': [],
        'Latitude': [],
        'Longitude': [],
        'Price' : [],
        'Rating' : []
    }
    
    
    #- Search
    #  API limits 50 records being returned at once; must loop and request offset of results to get all records
    recordLimit = 50
    currentOffset = 0
    hasMoreData = True
    
    while hasMoreData == True:
        
        #- Prepare Parameters
        parameters = {
            'location': searchZipCode,
            'term': searchTerm,
            'limit': recordLimit,
            'radius': 3000,
            'offset': currentOffset,
            }
        
        
        #- Request
        print(f"  Requesting data. Offset: {currentOffset}")
        
        response = requests.request('GET', baseYelpUrl, headers=headers, params=parameters)
        
        
        #- Check Response
        if (response.status_code == requests.codes.ok):
            
            # Get Json from Response
            responseJson = response.json()
            
            
            # Search Businesses
            for business in responseJson['businesses']:
                
                # Determine Use Business
                useBusiness = checkBusinessForUsage(business, searchZipCode)
                
                if (useBusiness == True):
                    
                    # Populate Dictionary with Business Information
                    yelpData['ID'].append(business['id'])
                    yelpData['Name'].append(business['name'])
                    yelpData['Zipcode'].append(business['location']['zip_code'])
                    
                    yelpData['Latitude'].append(business['coordinates']['latitude'])
                    yelpData['Longitude'].append(business['coordinates']['longitude'])
                    
                    yelpData['Price'].append(getPriceForBusiness(business))
                    yelpData['Rating'].append(business['rating'])
          
        
        #- Prepare for Next search
        # API only supports 50 records at a time; must query with offset
        currentOffset = (currentOffset + recordLimit)
        
        if (currentOffset > responseJson['total']):
            print(f"Collected all data. Current Offset: {currentOffset}  Total: {responseJson['total']}")
            hasMoreData = False
    
                  
    #- Metadata on Data
    print(f"Search Zipcode: {searchZipCode}")
    print(f"Total businesses: {len(yelpData['ID'])}")
               
          
    #- Return data from function
    return yelpData


In [14]:
def checkBusinessForUsage(businessInfo, searchZipcode):
    ''' Determines if the business can be used in the Analysis
    
    Accepts : businessInfo (dictionary) contains the metadata for individual business 
                searchZipCode (str) zip code searching for data within
    
    Return : bool TRUE- business meets critera, able to use FALSE- unable to use business
    '''
    
    #- Check Within Search Zipcode
    businessZipCode = businessInfo['location']['zip_code']
    
    if (businessZipCode != searchZipcode):
        return False
    
    
    return True
     

In [15]:
def getPriceForBusiness(businessInfo):
    ''' Gets the price for a business; not all businesses contain this property within the JSON;
    when not found just uses NA.
    
    Accepts : businessInfo (dictionary) metadata on an individual business
    
    Returns : (num) value from price tag
    '''
    try:
        
        return businessInfo['price']
    
    except:
        return 'NA'

In [20]:
#-- Collect Data, Multiple Datasets

#- UserName
userName = "Connie"

#- Get File of Random Zipcodes
print("--->")
print("Getting random zipcodes...")


fileName = f'RandomZipCodes_{userName}.csv'
randomZipcodesPath = os.path.join(".", "Output_2", fileName)

randomZipcodes_df = pd.read_csv(randomZipcodesPath)


#- Collect Data
for index, row in randomZipcodes_df.iterrows():
    
    #- Get Zipcode
    searchZipCode = str(row[1])
    
    print(" ")
    print(">>--")
    print(searchZipCode)
    
    
    #-- Gluten Free
    print("-Gluten Free")
    
    #- Get Data from Yelp: Gluten Free Search
    yelpDataForZipCode = getDataForZipcode(True, searchZipCode)

    #- Create DataFrame
    yelpData_df = pd.DataFrame(yelpDataForZipCode)
    
    #- Export Data
    dataExportPath = os.path.join(".", "Output_2", f"YelpData_{searchZipCode}.csv")

    yelpData_df.to_csv(dataExportPath)
    
    
    #-- All Restaurants
    print("-All Restaurants")
    
    #- Get Data
    yelpDataRestaurantForZipCode = getDataForZipcode(False, searchZipCode)
    
    #- Create Data Frame
    yelpDataRestaurant_df = pd.DataFrame(yelpDataRestaurantForZipCode)
    
    #- Export Data
    dataRestaurantExportPath = os.path.join(".", "Output_2", f"YelpDataAll_{searchZipCode}.csv")
    
    yelpDataRestaurant_df.to_csv(dataRestaurantExportPath)
    
    
    print(f"Exported data to disk. Path: {dataExportPath}")

    
#- Complete Message
print(" ")
print("Completed getting information for zipcodes")


--->
Getting random zipcodes...
 
Started search of Yelp API zip code: 91322...
  Requesting data. Offset: 0
  Requesting data. Offset: 50
Collected all data. Current Offset: 100  Total: 72
 
Search Zipcode: 91322
Total businesses: 0
Exported data to disk. Path: .\Output\YelpData_91322.csv
 
Started search of Yelp API zip code: 30307...
  Requesting data. Offset: 0
  Requesting data. Offset: 50
  Requesting data. Offset: 100
Collected all data. Current Offset: 150  Total: 111
 
Search Zipcode: 30307
Total businesses: 55
Exported data to disk. Path: .\Output\YelpData_30307.csv
 
Started search of Yelp API zip code: 4975...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 0
 
Search Zipcode: 4975
Total businesses: 0
Exported data to disk. Path: .\Output\YelpData_4975.csv
 
Started search of Yelp API zip code: 15354...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 12
 
Search Zipcode: 15354
Total businesses: 0
Exported data to disk.

Collected all data. Current Offset: 50  Total: 0
 
Search Zipcode: 45302
Total businesses: 0
Exported data to disk. Path: .\Output\YelpData_45302.csv
 
Started search of Yelp API zip code: 47367...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 1
 
Search Zipcode: 47367
Total businesses: 0
Exported data to disk. Path: .\Output\YelpData_47367.csv
 
Started search of Yelp API zip code: 96128...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 0
 
Search Zipcode: 96128
Total businesses: 0
Exported data to disk. Path: .\Output\YelpData_96128.csv
 
Started search of Yelp API zip code: 18248...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 0
 
Search Zipcode: 18248
Total businesses: 0
Exported data to disk. Path: .\Output\YelpData_18248.csv
 
Started search of Yelp API zip code: 74865...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 0
 
Search Zipcode: 74865
Total businesses: 0

  Requesting data. Offset: 50
  Requesting data. Offset: 100
  Requesting data. Offset: 150
  Requesting data. Offset: 200
  Requesting data. Offset: 250
  Requesting data. Offset: 300
  Requesting data. Offset: 350
  Requesting data. Offset: 400
  Requesting data. Offset: 450
  Requesting data. Offset: 500
  Requesting data. Offset: 550
  Requesting data. Offset: 600
  Requesting data. Offset: 650
  Requesting data. Offset: 700
  Requesting data. Offset: 750
Collected all data. Current Offset: 800  Total: 799
 
Search Zipcode: 20579
Total businesses: 0
Exported data to disk. Path: .\Output\YelpData_20579.csv
 
Started search of Yelp API zip code: 37015...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 0
 
Search Zipcode: 37015
Total businesses: 0
Exported data to disk. Path: .\Output\YelpData_37015.csv
 
Started search of Yelp API zip code: 95364...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 0
 
Search Zipcode: 95364
Total 

Collected all data. Current Offset: 50  Total: 0
 
Search Zipcode: 24831
Total businesses: 0
Exported data to disk. Path: .\Output\YelpData_24831.csv
 
Started search of Yelp API zip code: 42762...
  Requesting data. Offset: 0
Collected all data. Current Offset: 50  Total: 0
 
Search Zipcode: 42762
Total businesses: 0
Exported data to disk. Path: .\Output\YelpData_42762.csv
 
Completed getting information for zipcodes


In [6]:
#-- Collect Data

# Zip Code to search
# Able to pass this as function in future
searchZipCode = "92104"


#- Get Data From Yelp
yelpDataForZipCode = getDataForZipcode(searchZipCode)


#- Create DataFrame
yelpData_df = pd.DataFrame(yelpDataForZipCode)

 
Started search of Yelp API zip code: 92104...
  Requesting data. Offset: 0
  Requesting data. Offset: 50
  Requesting data. Offset: 100
  Requesting data. Offset: 150
  Requesting data. Offset: 200
  Requesting data. Offset: 250
Collected all data. Current Offset: 300  Total: 272
 
Search Zipcode: 92104
Total businesses: 120


In [None]:
#-- Export Data
dataExportPath = os.path.join(".", "Output", f"YelpData_{searchZipCode}.csv")

yelpData_df.to_csv(dataExportPath)

print(f"Exported data to disk. Path: {dataExportPath}")

In [None]:
#-- Preview Data
yelpData_df.head(200)