In [15]:
import numpy as np
import pandas as pd
import requests
import json
import folium
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
import time
import math

Load hardcoded variables and secrets

In [2]:
with open('variables/city_centers.json') as f_in:
    city_centers = json.load(f_in)

sf_lat = city_centers['sf'][0]
sf_lon = city_centers['sf'][1]
chicago_lat = city_centers['chicago'][0]
chicago_lon = city_centers['chicago'][1]
nyc_lat = city_centers['nyc'][0]
nyc_lon = city_centers['nyc'][1]

In [3]:
with open('secrets/foursquare_secrets.json') as f_in: # Load foursquare API credentials
    fsq_secrets = json.load(f_in)

CLIENT_SECRET = fsq_secrets['CLIENT_SECRET']
CLIENT_ID = fsq_secrets['CLIENT_ID']
VERSION = '20180605'

In [4]:
with open('variables/fsq_categories.json') as f_in:
    fsq_categories = json.load(f_in)

In [5]:
with open('json_data/nyc_venues_results.json') as f_in:
    fsq_results_list_nyc = json.load(f_in)

Load data

In [6]:
keep_chicago_df = (pd.read_csv("csv_data/final_grid_chicago.csv")
                   .drop(columns = ['Unnamed: 0'])
                   .rename(columns = {'0': 'latitude', '1': 'longitude'}))

keep_nyc_df = (pd.read_csv("csv_data/final_grid_nyc.csv")
                   .drop(columns = ['Unnamed: 0'])
                   .rename(columns = {'0': 'latitude', '1': 'longitude'}))

Analysis

In [7]:
def get_nearby_venues(lat, lon, category_ids, radius = 500, LIMIT = 100):
    # Foursquare API url for venue queries
    base_url = "https://api.foursquare.com/v2/venues/explore"
    
    # Set up query parameters
    params = {
        'client_id': CLIENT_ID,
        'client_secret': CLIENT_SECRET,
        'v': VERSION,
        'll': '',
        'categoryId': '',
        'radius': radius,
        'limit': LIMIT
    }

    params['ll'] = str(lat) + ',' + str(lon) # Update latitude and longitude
    params['categoryId'] = ','.join(category_ids)
    query = requests.get(base_url, params = params)
    
    return query

In [10]:
print(len(keep_chicago_df))

595


In [8]:
print(len(keep_nyc_df))

1036


In [12]:
fsq_results_list_chicago = []

for lat, lon in zip(keep_chicago_df['latitude'], keep_chicago_df['longitude']):
    query = get_nearby_venues(lat, lon, [fsq_categories['Food & Drink Shop']], radius = 750)
    if query.status_code != 200:
        break
    fsq_results_list_chicago.append(query.json())

In [15]:
with open('json_data/chicago_venues_results.json', 'w') as f_out:
    json.dump(fsq_results_list_chicago, f_out)

In [20]:
# fsq_results_list_nyc = []

# for i, (lat, lon) in enumerate(zip(keep_nyc_df['latitude'], keep_nyc_df['longitude'])):
#     if i % 50 == 0:
#         print(i)
#     query = get_nearby_venues(lat, lon, [fsq_categories['Food & Drink Shop']], radius = 750)
#     if query.status_code != 200:
#         break
#     fsq_results_list_nyc.append(query.json())

0
50
100
150
200
250
300
350


In [25]:
query.text

'{"meta":{"code":429,"errorType":"quota_exceeded","errorDetail":"Quota exceeded","requestId":"60ba86a144d5ef55c6a6bd3b"},"response":{}}'

In [28]:
len(fsq_results_list_nyc)

354

In [27]:
with open('json_data/nyc_venues_results.json', 'w') as f_out:
    json.dump(fsq_results_list_nyc, f_out)

Run after 1:00pm Jun 5

Load _fsq_results_list_nyc_ above first

In [9]:
starting_point = len(fsq_results_list_nyc)
assert(starting_point == 354)

In [10]:
print(len(keep_nyc_df) - starting_point)

682


In [11]:
df_to_query = keep_nyc_df.loc[starting_point:]
for i, (lat, lon) in enumerate(zip(df_to_query['latitude'], df_to_query['longitude'])):
    if i % 50 == 0:
        print(i)
    query = get_nearby_venues(lat, lon, [fsq_categories['Food & Drink Shop']], radius = 750)
    if query.status_code != 200:
        break
    fsq_results_list_nyc.append(query.json())

0
50
100
150
200
250
300
350
400
450
500
550
600
650


In [14]:
print(len(fsq_results_list_nyc))

1036


In [13]:
with open('json_data/nyc_venues_results.json', 'w') as f_out:
    json.dump(fsq_results_list_nyc, f_out)

Define functions to calculate distance between latitude/longitude points

In [17]:
def deg2rad(deg):
    return deg * math.pi / 180

In [37]:
def kil2mil(km):
    return km * 0.621371

In [31]:
# Returns (approximate) distance in kilometers or miles.
def calc_dist_latlon(lat_1, lon_1, lat_2, lon_2, unit = 'km'):
    r_earth = 6371 # avg radius of Earth in km
    d_lat = deg2rad(lat_2 - lat_1)
    d_lon = deg2rad(lon_2 - lon_1)
    
#     a = ((math.sin(d_lat/2))**2 + 
#          (math.sin(d_lon/2))**2 + 
#          (math.cos(deg2rad(lat_1)) * math.cos(deg2rad(lat_2))))
    
#     print(math.cos(deg2rad(lat_1)))
#     print(math.cos(deg2rad(lat_2)))
#     print(a)

    a = ((math.sin(d_lat/2) * math.sin(d_lat/2)) + 
         (math.cos(deg2rad(lat_1)) * math.cos(deg2rad(lat_2)) * 
         math.sin(d_lon/2) * math.sin(d_lon/2)))
    
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    d = r_earth * c
    
    if unit == 'km':
        return d
    elif unit == 'mi':
        return kil2mil(d)
    else:
        print("Units not recognized.")
        return 0

In [24]:
fsq_results_list_nyc[0]

{'meta': {'code': 200, 'requestId': '60ba8617795c990530d2b4ff'},
  'headerLocation': 'Current map view',
  'headerFullLocation': 'Current map view',
  'headerLocationGranularity': 'unknown',
  'query': 'food drink',
  'totalResults': 0,
  'suggestedBounds': {'ne': {'lat': 40.504596406750004,
    'lng': -74.24095881238445},
   'sw': {'lat': 40.49109639325, 'lng': -74.25867878761555}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': []}]}}

In [25]:
lat_1 = 40.504596406750004
lat_2 = 40.49109639325
lon_1 = -74.24095881238445
lon_2 = -74.25867878761555

In [34]:
calc_dist_latlon(lat_1, lon_1, lat_2, lon_2)

2.1209417094572642

In [33]:
print(sf_lat)
print(sf_lon)
print(nyc_lat)
print(nyc_lon)

37.7749
-122.4194
40.73061
-73.935242


In [38]:
calc_dist_latlon(sf_lat, sf_lon, nyc_lat, nyc_lon, 'mi')

2569.0739969635174