In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import requests
import time
from pprint import pprint
from scipy.stats import linregress
from datetime import datetime
from yelpapi import YelpAPI

# Impor the API key
from config import api_key

In [None]:
# Old version, hit many issues with offset
# yelp_api = YelpAPI(api_key)
# search_term = 'restaurants'
# location = 'New York City, NY'
# limit = 50  # maximum number of results to return
# daily_results_limit = 12500 # update this by 12500 every day
# offset = 0 # update this by 12500 every day

# # response = yelp_api.search_query(term=search_term, location=location, limit=limit)
# # pprint(response)
# results = []
# api_call_limit = 10  # total API call limit for the day
# calls_made = 0  # counter for API calls made

# while offset < daily_results_limit and calls_made < api_call_limit:
#     response = yelp_api.search_query(term=search_term, location=location, limit=limit, offset=offset)
#     results.extend(response['businesses'])  # Extend the list with new businesses
#     offset += limit  # Increment the offset by the limit
#     calls_made += 1  # Increment the API call counter
    
#     # Print progress
#     print(f'Retrieved {len(results)} results with {calls_made} API calls')

#     # Avoid hitting rate limits by adding a delay
#     time.sleep(1)  # Delay for 1 second

In [None]:
# NYC boundaries (lat, lon)
northwest = (40.893079105643025, -73.9128920326231)
northeast = (40.879497836080574, -73.82796941055598)
southwest = (40.62030456986446, -74.03292689266031)
southeast = (40.65979392491511, -73.74149354297585)

# Step size for approx 2 km
lat_step = 0.018
lon_step = 0.024

# Generate grid points
lat_points = np.arange(southwest[0], northwest[0], lat_step)
lon_points = np.arange(southwest[1], southeast[1], lon_step)

# Create list of points with increased radius
grid_points_with_radius = [{"lat": lat, "lon": lon, "radius": 1000} for lat in lat_points for lon in lon_points]

# Filter points to ensure they fall within the specified quadrilateral
def is_within_boundaries(lat, lon):
    return (southwest[0] <= lat <= northwest[0]) and (southwest[1] <= lon <= southeast[1])
filtered_grid_points = [point for point in grid_points_with_radius if is_within_boundaries(point["lat"], point["lon"])]

# Number of points generated
num_points = len(filtered_grid_points)

# Split points into four parts for four days
num_days = 4
points_per_day = num_points // num_days

# Ensure all points are covered, even if not divisible by 4
split_grid_points = [filtered_grid_points[i:i + points_per_day] for i in range(0, num_points, points_per_day)]

# Handle any remaining points if num_points is not perfectly divisible by num_days
if len(split_grid_points) > num_days:
    split_grid_points[num_days - 1].extend(split_grid_points.pop())

print(f"Generated {num_points} grid points.")
for day in range(num_days):
    starting_point = split_grid_points[day][0]
    print(f"Day {day + 1}, {len(split_grid_points[day])} grid points, starting point: Latitude {starting_point['lat']}, Longitude {starting_point['lon']}")

# split_grid_points

Generated 208 grid points.
Day 1, 52 grid points, starting point: Latitude 40.62030456986446, Longitude -74.03292689266031
Day 2, 52 grid points, starting point: Latitude 40.69230456986446, Longitude -74.03292689266031
Day 3, 52 grid points, starting point: Latitude 40.764304569864464, Longitude -74.03292689266031
Day 4, 52 grid points, starting point: Latitude 40.83630456986447, Longitude -74.03292689266031


In [None]:
#Test API Calls
# headers = {
#     "accept": "application/json",
#     "Authorization": f"Bearer {api_key}"
# }
# url_first_call = f"https://api.yelp.com/v3/businesses/search?latitude=40.62030456986446&longitude=-74.03292689266031&term=restaurants&radius=1000&sort_by=distance&limit=50"
# first_response = requests.get(url_first_call, headers=headers).json()
# pprint(first_response)

In [None]:
day = 0 # change this +1 next day
calls_made = 0 # keep track of calls made for display and to esnure we are not going past limit
search_term = 'restaurants' # variable used for api url
limit = 50 # variable used for api url, makes sure we get 50 restaurants per call
offset_limit = 200 # ensure we check the first 200 restaurants within radius of each point
sort_by = 'distance' # variable used for api url
headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {api_key}"
}
results = []
# loop over every coordiante point built above for the selected day
print('-----------Starting with API calls for the Day-----------')  
for coord in split_grid_points[day]:
    offset = 0 # used to make sure we look at different restaurants on next api call at same coord
    # call api 4 times per coord
    while offset < offset_limit:
        url = f"https://api.yelp.com/v3/businesses/search?latitude={coord['lat']}&longitude={coord['lon']}&term={search_term}&radius={coord['radius']}&sort_by={sort_by}&limit={limit}&offset={offset}"
        response = requests.get(url, headers=headers).json()
        results.extend(response['businesses'])
        offset += 50 # increase offset for next call
        calls_made += 1 # keep track of calls made for display
        time.sleep(1) # sleep to not get timed out for too many fast calls
    print(f'Retrieved {len(results)} results with {calls_made} API calls') 
print('-----------Finished with API calls for the Day-----------')  

In [None]:
# This code was for testing and looking at how we wanted to pull the data

# print(response['businesses'][0]['name'])
# print(f"{response['businesses'][0]['rating']} ({response['businesses'][0]['review_count']})")
# print(response['businesses'][0]['price'])
# print(response['businesses'][0]['display_phone'])
# print(f"Latitiude: {response['businesses'][0]['coordinates']['latitude']}, Longitude: {response['businesses'][0]['coordinates']['longitude']}")

# test_categories = ''
# for alias in response['businesses'][0]['categories']:
#     test_categories = test_categories + '/' + alias['title']
# print(test_categories)

# test_transactions = ''
# for type in response['businesses'][0]['transactions']:
#     test_transactions = test_transactions + '/' + type
# print(test_transactions)

In [None]:
restaurant_data = [] # List to hold restaurant dictionaries
for restaurant in results:
    name = restaurant['name'] # get restaurant name
    rating = restaurant['rating'] # get restaurant rating
    review_count = restaurant['review_count'] # get restaurant review count
    try:
        price = restaurant['price']  # get restaurant price, if it fails ...
    except KeyError:
        price = '???' # set price to ???
    phone_number = restaurant['display_phone'] # get restaurant phone number
    latitiude = restaurant['coordinates']['latitude'] # get restaurant lat
    longitude = restaurant['coordinates']['longitude'] # get restaurant lon

    # merge all categories into one line seperated by /
    categories = ''
    x = 0
    for alias in restaurant['categories']:
        if x == 0:
            categories = alias['title']
            x += 1
        else:
            categories = categories + '/' + alias['title']

    # merge all transaction types into one line seperated by /
    transactions = ''
    x = 0
    for type in restaurant['transactions']:
        if x == 0:
            transactions = type
            x += 1
        else:
            transactions = transactions + '/' + type

    # Build a dictionary for the restaurant and add it to the list
    restaurant_data.append({'Name':name,
                            'Rating':rating,
                            'Review Count':review_count,
                            'Price':price,
                            'Phone Number':phone_number,
                            'Latitiude':latitiude,
                            'Longitude':longitude,
                            'Categories':categories,
                            'Transactions':transactions})

# Create a DF from list
restaurant_data_df = pd.DataFrame(restaurant_data)
restaurant_data_df

Unnamed: 0,Name,Rating,Review Count,Price,Phone Number,Latitiude,Longitude,Categories,Transactions
0,Nino's Pizza,3.7,234,$,(718) 680-0222,40.619812,-74.032867,Pizza,pickup/delivery
1,Al Horno Lean Mexican Kitchen,4.3,51,$$,(347) 578-7492,40.619730,-74.032760,Mexican,pickup/delivery
2,Foodtown,2.9,130,$$,(718) 680-2884,40.619980,-74.032320,Grocery/Delis,
3,Paneantico Bakery,3.5,475,$$,(718) 680-2347,40.619499,-74.033012,Bakeries/Sandwiches,pickup/delivery
4,Bridgeview Diner,2.5,343,$$,(718) 680-9818,40.620640,-74.031900,Diners/American/Breakfast & Brunch,pickup/delivery
...,...,...,...,...,...,...,...,...,...
4741,IHOP,2.8,25,???,(347) 676-5020,40.679300,-73.756100,Breakfast & Brunch/Burgers/American,pickup/delivery
4742,Henrica's Restaurant,2.9,236,$$,(718) 527-7355,40.666813,-73.736520,Chinese/Caribbean,pickup/delivery
4743,Golden Krust,2.4,31,$,(718) 341-1600,40.665333,-73.754012,Caribbean,pickup/delivery
4744,Jy Springfield Deli Mini,3.3,3,???,(718) 481-9289,40.665413,-73.754239,Delis/Grocery,


In [None]:
# Check for duplicates and display them
duplicates = restaurant_data_df.duplicated(keep=False)
restaurant_data_df[duplicates].sort_values('Name')

Unnamed: 0,Name,Rating,Review Count,Price,Phone Number,Latitiude,Longitude,Categories,Transactions
718,Anna's Cafe,2.9,31,$,(718) 951-7617,40.629386,-73.947525,Cafes,pickup/delivery
1730,Anna's Cafe,2.9,31,$,(718) 951-7617,40.629386,-73.947525,Cafes,pickup/delivery
717,Anna's Luncheonette,3.2,6,???,(718) 951-7617,40.62927,-73.94752,Breakfast & Brunch/Cafes,
1729,Anna's Luncheonette,3.2,6,???,(718) 951-7617,40.62927,-73.94752,Breakfast & Brunch/Cafes,
771,Boston Market - 1589,3.2,52,$$,(718) 513-1983,40.629295,-73.918816,Comfort Food/American/Sandwiches,delivery
1829,Boston Market - 1589,3.2,52,$$,(718) 513-1983,40.629295,-73.918816,Comfort Food/American/Sandwiches,delivery
3007,D and M Caribbean Cuisine,0.0,0,???,(347) 715-2589,40.66547,-73.88834,Caribbean,pickup
4191,D and M Caribbean Cuisine,0.0,0,???,(347) 715-2589,40.66547,-73.88834,Caribbean,pickup
3161,Foo-An Kitchen,3.4,21,$,(718) 276-2725,40.665365,-73.754434,Chinese,pickup
4745,Foo-An Kitchen,3.4,21,$,(718) 276-2725,40.665365,-73.754434,Chinese,pickup


In [None]:
# Remove second copy of duplicates
restaurant_data_clean = restaurant_data_df.drop_duplicates()
restaurant_data_clean

Unnamed: 0,Name,Rating,Review Count,Price,Phone Number,Latitiude,Longitude,Categories,Transactions
0,Nino's Pizza,3.7,234,$,(718) 680-0222,40.619812,-74.032867,Pizza,pickup/delivery
1,Al Horno Lean Mexican Kitchen,4.3,51,$$,(347) 578-7492,40.619730,-74.032760,Mexican,pickup/delivery
2,Foodtown,2.9,130,$$,(718) 680-2884,40.619980,-74.032320,Grocery/Delis,
3,Paneantico Bakery,3.5,475,$$,(718) 680-2347,40.619499,-74.033012,Bakeries/Sandwiches,pickup/delivery
4,Bridgeview Diner,2.5,343,$$,(718) 680-9818,40.620640,-74.031900,Diners/American/Breakfast & Brunch,pickup/delivery
...,...,...,...,...,...,...,...,...,...
4740,Soup Man,0.0,0,???,(718) 525-0600,40.679672,-73.753929,Soup,
4741,IHOP,2.8,25,???,(347) 676-5020,40.679300,-73.756100,Breakfast & Brunch/Burgers/American,pickup/delivery
4742,Henrica's Restaurant,2.9,236,$$,(718) 527-7355,40.666813,-73.736520,Chinese/Caribbean,pickup/delivery
4743,Golden Krust,2.4,31,$,(718) 341-1600,40.665333,-73.754012,Caribbean,pickup/delivery


In [None]:
# Append dataframe data to a csv file
restaurant_data_clean.to_csv("data/restaurant_data.csv",mode='a',header=False)

In [29]:
# Read in data from the new york city health csv file
health_df = pd.read_csv("data/DOHMH_New_York_City_Restaurant_Inspection_Results_20241120.csv")
health_df.head()

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,...,INSPECTION TYPE,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA,Location Point1
0,50161678,CHIPOTLE MEXICAN GRILL #3056,Brooklyn,1746,ATLANTIC AVENUE,11213.0,6143187413,,01/01/1900,,...,,40.677506,-73.93234,308.0,36.0,30900.0,3251126.0,3013360000.0,BK61,
1,50153724,ALICE'S TEA CUP CHAPTER II,Manhattan,156,EAST 64 STREET,10065.0,6464107205,,01/01/1900,,...,,40.7652,-73.965568,108.0,4.0,12000.0,1042114.0,1013980000.0,MN40,
2,50154473,MONSIEUR BISTRO,Manhattan,853,LEXINGTON AVENUE,10065.0,3476076861,,01/01/1900,,...,,40.765661,-73.965661,108.0,4.0,12000.0,1042380.0,1013990000.0,MN40,
3,50157000,HEA SOUTHEAST ASIAN STREET FOOD,Queens,3636,PRINCE ST,11354.0,9177431029,,01/01/1900,,...,,40.761392,-73.832895,407.0,20.0,86900.0,4534920.0,4049708000.0,QN22,
4,50122996,MARTINY'S,Manhattan,121,EAST 17 STREET,10003.0,1646644923,,01/01/1900,,...,,40.735975,-73.98762,105.0,2.0,5000.0,1082518.0,1008730000.0,MN21,


In [None]:
# Make sure phones numbers match bewteen both dataframes, rename columns and get rid of columns not needed
health_df['PHONE'] = health_df['PHONE'].astype(str).str.replace(r'(\d{3})(\d{3})(\d{4})', r'(\1) \2-\3', regex=True)
health_df[['Phone Number','Health Grade']] = health_df[['PHONE','GRADE']]
columns_to_keep = ['Phone Number', 'Health Grade']
health_df_cleaned = health_df.loc[:, columns_to_keep]
health_df_cleaned

Unnamed: 0,Phone Number,Health Grade
0,(614) 318-7413,
1,(646) 410-7205,
2,(347) 607-6861,
3,(917) 743-1029,
4,(164) 664-4923,
...,...,...
259052,(718) 893-0660,
259053,(718) 629-3555,A
259054,(718) 325-2800,
259055,(718) 651-2060,B


In [None]:
# Get health grade for phone numbers from restaurant_data_clean found in health_df_cleaned
restaurant_data_clean.loc[:,'Health Grade'] = restaurant_data_clean['Phone Number'].map(health_df_cleaned['Health Grade'])
restaurant_data_clean

Unnamed: 0,Name,Rating,Review Count,Price,Phone Number,Latitiude,Longitude,Categories,Transactions,Health Grade
0,Nino's Pizza,3.7,234,$,(718) 680-0222,40.619812,-74.032867,Pizza,pickup/delivery,
1,Al Horno Lean Mexican Kitchen,4.3,51,$$,(347) 578-7492,40.619730,-74.032760,Mexican,pickup/delivery,
2,Foodtown,2.9,130,$$,(718) 680-2884,40.619980,-74.032320,Grocery/Delis,,
3,Paneantico Bakery,3.5,475,$$,(718) 680-2347,40.619499,-74.033012,Bakeries/Sandwiches,pickup/delivery,
4,Bridgeview Diner,2.5,343,$$,(718) 680-9818,40.620640,-74.031900,Diners/American/Breakfast & Brunch,pickup/delivery,
...,...,...,...,...,...,...,...,...,...,...
4740,Soup Man,0.0,0,???,(718) 525-0600,40.679672,-73.753929,Soup,,
4741,IHOP,2.8,25,???,(347) 676-5020,40.679300,-73.756100,Breakfast & Brunch/Burgers/American,pickup/delivery,
4742,Henrica's Restaurant,2.9,236,$$,(718) 527-7355,40.666813,-73.736520,Chinese/Caribbean,pickup/delivery,
4743,Golden Krust,2.4,31,$,(718) 341-1600,40.665333,-73.754012,Caribbean,pickup/delivery,
