In [62]:
# Imports
import requests as re
import json
import pandas as pd
from statistics import mean

## Yelp

In [2]:
# Load master csv for lat-long data
df = pd.read_csv("../processed_data/master.csv")
# Separate lat-long from df
nta_ll = df.iloc[:, :2]

In [3]:
# Create lists of lat and long
lat = nta_ll['NTA_lat'].to_list()
long = nta_ll['NTA_long'].to_list()

In [4]:
#initialize request variables
root_endpoint = "https://api.yelp.com/v3/businesses/search"
headers = {"Accept": "application/json", 
           "Authorization": ('YELP_API_KEY')}

In [5]:
#pulling and saving requests
results_list = []
for i in range(195):
    response = re.get(root_endpoint, params={'latitude': lat[i], 'longitude': long[i], 'radius': 500, 
                                             'categories': 'Food', 'limit': 50}, headers=headers)
    results_list.append(response.json())

In [39]:
#checking for failed requests
fails = []
for i in range(len(results_list)):
    if 'error' in results_list[i]:
        fails.append(i)

In [26]:
#rety failed requests
retry_list = []
for i in range(len(fails)):
    response = re.get(root_endpoint, params={'latitude': lat[(fails[i])], 'longitude': long[(fails[i])], 
                                             'radius': 500, 'categories': 'Food', 'limit': 50}, headers=headers)
    retry_list.append(response.json())

In [38]:
#replace failed results
for i in range(len(fails)):
    results_list[(fails[i])] = retry_list[i]

In [43]:
#save as JSON
with open('../raw_data/yelp/yelp.json', 'w', encoding='utf-8') as f:
    json.dump(results_list, f)

In [45]:
#load json for parsing
file = open("../raw_data/yelp/yelp.json", "r")
yelp = json.load(file)

In [67]:
#JSON parsing
yelp_list = []
for i in range(195):
    rating = []
    price = []
    for j in range(50):
        try:
            rating.append(yelp[i]['businesses'][j]['rating'])
        except:
            rating.append(2.5)
        try:
            if yelp[i]['businesses'][j]['price'] == '$':
                price.append(1)
            elif yelp[i]['businesses'][j]['price'] == '$$':
                price.append(2)
            elif yelp[i]['businesses'][j]['price'] == '$$$':
                price.append(3)
            elif yelp[i]['businesses'][j]['price'] == '$$$$':
                price.append(4)
        except:
            price.append(2)
    try:
        rating.remove(0)
    except:
        pass
    rating_mean = mean(rating)
    try:
        price.remove(0)
    except:
        pass
    price_mean = mean(price) 
    yelp_list.append((rating_mean, price_mean))

In [71]:
# Create final dataframe to save
df = pd.DataFrame(yelp_list, columns=['rating', 'price'])

In [54]:
dfnta = pd.read_csv("../processed_data/NTA_coords.csv")
dfyelp = pd.read_csv('../processed_data/restaurants/yelp.csv')

In [57]:
df = pd.concat([dfnta, dfyelp], axis=1)

In [60]:
df.rename(columns={"NTA_lat": "latitude", "NTA_long": "longitude"}, inplace=True)

In [61]:
# Save dataframe to CSV
df.to_csv('../processed_data/restaurants/yelp.csv', index=False, encoding='utf-8')

## Restaurant Inspections

In [65]:
df_r = pd.read_csv("/home/brian/Documents/temp midterm/restaurant_inspections.csv")

In [67]:
df_r = df_r[["Latitude", "Longitude", "SCORE", "INSPECTION DATE"]]

In [68]:
df_r.dropna(inplace=True)

In [69]:
df_r = df_r[df_r["INSPECTION DATE"].str.contains('2021')]

In [70]:
df_r = df_r[df_r.Latitude != 0]
df_r = df_r[df_r.Longitude != 0]
df_r = df_r[df_r.SCORE != 0]

In [71]:
df_r.rename(columns={"Latitude": "latitude", "Longitude": "longitude", "SCORE": "rest_insp_score"}, inplace=True)

In [72]:
df_r.to_csv('../processed_data/restaurants/rest_insp.csv', index=False, encoding='utf-8')