In [214]:
import json, requests
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import yaml
import ast

### Import secret Keys to use with Foursquare API

In [2]:
with open('/Users/ReddingSkinnyRobot/.secrets/foursquare_api.yaml') as f:
    secrets = yaml.load(f)

### Calculate bounding limits from http://boundingbox.klokantech.com/ TSV format

In [3]:
westlimit=-122.459696 #Longitude
southlimit=47.491912  #Latitude
eastlimit=-122.224433 #Longitude
northlimit=47.734145  #Latitude

In [4]:
km_east_west = (eastlimit - westlimit) * 97
km_north_south = (northlimit - southlimit) * 111

In [5]:
km_east_west/100*1000 #meters in each grid east west

228.20510999998945

In [6]:
km_north_south/100*1000 #meters in each grid north south

268.8786299999987

Difference between degrees of Longitude is about 60 miles (97km) 

Difference between degrees of Latitude is about 69 miles (111km)

### Define search functions

In [41]:
def populate_search_params(lat, long):
    search_params = dict(
        client_id=secrets['client_id'],
        client_secret=secrets['client_secret'],
        ll = '{}, {}'.format(lat, long),
        intent='browse',
        radius='200', #Meters
        limit='50',
        categoryId='4bf58dd8d48988d1e0931735',#Coffee shop
        llAcc='1',#Accuracy of lat & long in meters
        v='20180113' #Date of current version
        )
    return search_params

In [43]:
def get_venue_data(longitude_group, latitude_group):
    '''
    Takes in lists of longitudes and latitudes and performs a grid search of them, returning a max of 50 coffee
    shops per each intersection formatted as a list of json objects.
    '''
    search_url = 'https://api.foursquare.com/v2/venues/search'
    search_list = []
    for i, long in enumerate(longitude_group):
        for lat in latitude_group:
            search_params = populate_search_params(lat, long)
            try:
                search_resp = requests.get(url=search_url, params=search_params)
                search_data = json.loads(search_resp.text)
                if search_data['meta']['code'] == 403:
                    print('403 error - Exceeded rate limit')
                    print(search_data)
                    return search_list
                search_list.append(search_data)
            except:
                search_list.append((lat, long))
        print('Step {} of {}'.format(i+1, len(longitude_group)))
    return search_list

In [44]:
def write_venue_info_to_file(search_data, filename):
    '''
    Takes in a list of loaded json objects and writes them to a text file.
    '''
    venues = []
    for item in search_data:
        # Checks for empty response
        try:
            if item['response']['venues'] != []:
                for venue in item['response']['venues']:
                    venues.append(venue)
        except:
            continue
    with open(filename, 'a') as f:
        for item in venues:
            f.write("{}\n".format(item))
    print('Done!')

### Generate longitude & latitude grids and divide search into smaller groups to avoid breakage during API GET requests

#### This needs to be broken into three groups to avoid Foursquare's 5000 requests limit

In [10]:
number_of_gridlines = 100
longitude_grid = np.linspace(westlimit, eastlimit, number_of_gridlines)
latitude_group = np.linspace(northlimit, southlimit, number_of_gridlines)

In [11]:
longitude_group1 = longitude_grid[:33]
longitude_group2 = longitude_grid[33:66]
longitude_group3 = longitude_grid[66:]

### Test single GET from venues/search api

In [12]:
test_lat = latitude_group[49]

In [13]:
test_long = longitude_group2[15]

<img src="images/test_venues_GET_map.jpeg">

In [61]:
get_venue_data([test_long], [test_lat])

Step 1 of 1


[{'meta': {'code': 200, 'requestId': '5a6127009fb6b76466b01fb9'},
  'response': {'venues': [{'allowMenuUrlEdit': True,
     'beenHere': {'lastCheckinExpiredAt': 0},
     'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/breakfast_',
        'suffix': '.png'},
       'id': '4bf58dd8d48988d143941735',
       'name': 'Breakfast Spot',
       'pluralName': 'Breakfast Spots',
       'primary': True,
       'shortName': 'Breakfast'}],
     'contact': {'formattedPhone': '(206) 728-2219',
      'phone': '2067282219',
      'twitter': 'biscuitbitch'},
     'hasPerk': False,
     'id': '5510b894498e3dbab70b5d8d',
     'location': {'address': '2303 3rd Ave',
      'cc': 'US',
      'city': 'Seattle',
      'country': 'United States',
      'crossStreet': '3rd Ave & Bell St',
      'distance': 80,
      'formattedAddress': ['2303 3rd Ave (3rd Ave & Bell St)',
       'Seattle, WA 98121',
       'United States'],
      'labeledLatLngs': [{'label': 'display',
        'la

### Search for all venues that match my search params & write them to txt file

#### Instantiate an empty venue search list which the following for loops and GET requests will populate

In [None]:
venue_search_list = []
search_url = 'https://api.foursquare.com/v2/venues/search'

##### Longitude_group 1

In [15]:
search_list = get_venue_data(longitude_group1, latitude_group)

Step 1 of 33
Step 2 of 33
Step 3 of 33
Step 4 of 33
Step 5 of 33
Step 6 of 33
Step 7 of 33
Step 8 of 33
Step 9 of 33
Step 10 of 33
Step 11 of 33
Step 12 of 33
Step 13 of 33
Step 14 of 33
Step 15 of 33
Step 16 of 33
Step 17 of 33
Step 18 of 33
Step 19 of 33
Step 20 of 33
Step 21 of 33
Step 22 of 33
Step 23 of 33
Step 24 of 33
Step 25 of 33
Step 26 of 33
Step 27 of 33
Step 28 of 33
Step 29 of 33
Step 30 of 33
Step 31 of 33
Step 32 of 33
Step 33 of 33


In [32]:
write_venue_info_to_file(search_list, 'seattle_venues.txt')

Done!


##### Longitude_group 2

In [33]:
search_list = get_venue_data(longitude_group2, latitude_group)

Step 1 of 33
Step 2 of 33
Step 3 of 33
Step 4 of 33
Step 5 of 33
Step 6 of 33
Step 7 of 33
Step 8 of 33
Step 9 of 33
Step 10 of 33
Step 11 of 33
Step 12 of 33
Step 13 of 33
Step 14 of 33
Step 15 of 33
Step 16 of 33
Step 17 of 33
Step 18 of 33
Step 19 of 33
Step 20 of 33
Step 21 of 33
Step 22 of 33
Step 23 of 33
Step 24 of 33
Step 25 of 33
Step 26 of 33
Step 27 of 33
Step 28 of 33
Step 29 of 33
Step 30 of 33
Step 31 of 33
Step 32 of 33
Step 33 of 33


In [39]:
write_venue_info_to_file(search_list, 'seattle_venues.txt')

Done!


##### Longitude_group 3

In [62]:
search_list = get_venue_data(longitude_group3, latitude_group)

Step 1 of 34
Step 2 of 34
Step 3 of 34
Step 4 of 34
Step 5 of 34
Step 6 of 34
Step 7 of 34
Step 8 of 34
Step 9 of 34
Step 10 of 34
Step 11 of 34
Step 12 of 34
Step 13 of 34
Step 14 of 34
Step 15 of 34
Step 16 of 34
Step 17 of 34
Step 18 of 34
Step 19 of 34
Step 20 of 34
Step 21 of 34
Step 22 of 34
Step 23 of 34
Step 24 of 34
Step 25 of 34
Step 26 of 34
Step 27 of 34
Step 28 of 34
Step 29 of 34
Step 30 of 34
Step 31 of 34
Step 32 of 34
Step 33 of 34
Step 34 of 34


In [67]:
write_venue_info_to_file(search_list, 'test2.txt')

Done!


### Iterate through file and extract json info into database

In [226]:
with open('seattle_venues.txt') as f:
    all_venues = []
    for item in f:
        all_venues.append(ast.literal_eval(item))

In [231]:
data = json_normalize(all_venues)

### Clean database

In [262]:
columns_to_drop = ['allowMenuUrlEdit', 'beenHere.lastCheckinExpiredAt', 'categories',
       'contact.facebook', 'contact.facebookName', 'contact.facebookUsername',
       'contact.formattedPhone', 'contact.instagram', 'contact.phone',
       'contact.twitter', 'delivery.id', 'delivery.provider.name',
       'delivery.url', 'hasMenu', 'hasPerk', 'location.crossStreet', 'location.distance',
       'location.formattedAddress', 'location.labeledLatLngs', 
       'location.neighborhood', 'menu.anchor', 'menu.externalUrl', 'menu.label',
       'menu.mobileUrl', 'menu.type', 'menu.url', 'referralId', 'specials.count',
       'specials.items', 'stats.checkinsCount', 'venueChains', 'venuePage.id',
       'venueRatingBlacklisted', 'verified', 'location.cc', 'location.country', 'storeId', 'url']

In [314]:
data_with_dropped_columns = data.drop(labels=columns_to_drop, axis=1)

In [315]:
data_filtered = data_with_dropped_columns.drop_duplicates(subset='id').reset_index().drop('index', axis=1)

In [318]:
data_filtered.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 870 entries, 0 to 869
Data columns (total 10 columns):
id                     870 non-null object
location.address       746 non-null object
location.city          861 non-null object
location.lat           870 non-null float64
location.lng           870 non-null float64
location.postalCode    795 non-null object
location.state         870 non-null object
name                   870 non-null object
stats.tipCount         870 non-null int64
stats.usersCount       870 non-null int64
dtypes: float64(2), int64(2), object(6)
memory usage: 68.0+ KB


### Define tips functions

In [327]:
tips_params = dict(
    client_id=secrets['client_id'],
    client_secret=secrets['client_secret'],
    limit=500,
    v='20180113')

In [333]:
def get_tips_data(venue_id):
    '''
    Takes in a venue id and performs a GET request to Foursquare's API to retrieve a max of 500 tips formatted
    as a json object. Returns a list of strings of tips.
    '''
    tips_list = []
        tips_url = 'https://api.foursquare.com/v2/venues/{}/tips'.format(venue_id)
        try:
            tips_resp = requests.get(url=tips_url, params=tips_params)
            tips_data = json.loads(tips_resp.text)
            if tips_data['meta']['code'] == 403:
                print('403 error - Exceeded rate limit')
                print(tips_data)
                return tips_list
            for tip in tips_data['response']['tips']['items']:
                tips_list.append(tip['text'])
        except:
            tips_list.append(venue_id)
    return tips_list

### Test single GET from tips api


In [322]:
test_id = data_filtered.iloc[0]['id']

In [323]:
test_id

'49f2b061f964a5205e6a1fe3'

In [338]:
get_tips_data(test_id)

['Absolutely incredible view, in my opinion the best in all of Seattle! Just take some time to get your coffee and relax. You can sit outside or near one of the sliding windows!',
 'Great views of the beach! Lots of places to plug in laptops. Lots of seating, even sit by the fireplace.',
 "Sunday's on alki in the summer are very busy, this place is no exception. Sorely understaffed on a Sunday night. Long wait just to order. 😕",
 'Nicest people work this Starbucks. Best view in town & Relaxing atmosphere.',
 "As a Londoner, this is definitely the best view I've ever seen from a Starbucks window!",
 'A fantastic location with a great atmosphere on a Saturday night.',
 'Really friendly customer service! Loved it!',
 'Nice spot to watch the water while reading a book or working on your laptop.',
 'This location is great! Only thing better would be a Starbucks on the top of the Space Needle.  Like Dr. Evil.',
 "Try the iced passionfruit lemonade. You won't be disappointed.",
 'The big over

### Get tips into database

In [346]:
data_filtered['tips'] = data_filtered.apply(lambda row: get_tips_data(row['id']), axis=1)

In [351]:
data_filtered.to_csv('seattle_coffeeshops_foursquare.csv')