In [16]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
from json import JSONDecodeError

import requests # library to handle requests

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you don't have the folium package
import folium # map rendering library

import math
import random

In [2]:
#To execute this notebook, you will need a verified Foursquare developer account
CLIENT_ID = 'fill in your Foursquare ID here' 
CLIENT_SECRET = 'fill in your Foursquare Secret here'
VERSION = '20180605' # Foursquare API version

Our region of study will be a square, 25 kilometers by 25 kilometers, centered at 34.052200° N, 118.243700° W. These coordinates were obtained by searching Google for "los angeles lat/lon". 

We will use a spherical earth approximation to compute the latitudes and longitudes of the boundaries of this square, and the latitudes and longitudes of the boundaries of random points within this square. According to this approximation, one degree of latitude is 10<sup>7</sup>/90 meters, and one degree of longitude is cos(latitude)\*10<sup>7</sup>/90 meters.

In [3]:
la_lat, la_lon = (34.052200, -118.243700)
square_half_side = 12500 #meters

In [4]:
def north_east_of_la(north, east):
    '''Compute the latitude and longitude of a point in our
    study area, given its distance north/south and east/west
    of the center point
    
    Inputs:
        north: float; distance north of center point, in meters. 
            Use negative for points south of the center.
        east: float; distance east of center point, in meters. 
            Use negative for points west of the center.
            
    Returns: tuple (latitude, longitude), in degrees
    '''
    lat = la_lat + north*90/10**7
    lon = la_lon + east*90/(10**7*math.cos(lat*3.14159265359/180))
    return (lat, lon)

In [5]:
nw_corner = north_east_of_la(square_half_side, -square_half_side)
ne_corner = north_east_of_la(square_half_side, square_half_side)
sw_corner = north_east_of_la(-square_half_side, -square_half_side)
se_corner = north_east_of_la(-square_half_side, square_half_side)

In [6]:
map_of_study = folium.Map(location=[la_lat, la_lon], zoom_start=11)
folium.PolyLine(
    [nw_corner, ne_corner, se_corner, sw_corner, nw_corner]
    ).add_to(map_of_study)
map_of_study

#### Let's create a function to query Foursquare for venues

In [7]:
def get_nearby_venues(x, y, radius=500, limit=100):
    '''
    Query Foursquare for venues near a particular point within the 
    study area, and return the IDs and categories of the venues. 
    Note that if limit > 100, Foursquare will return at most 100 venues.
    
    Inputs: 
        x: (float) x coordinate of the query point, in meters east 
            of the study center point
        y: (float) y coordinate of the query point, in meters north 
            of the study center point
        radius: (float) query radius, in meters
        limit: (int) maximum number of results to return
        
    Returns: a Series with venue IDs as the index and venue
        categories as the values
    '''
        
    (lat, lon) = north_east_of_la(y, x)
    venues_list=[]
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat,
            lon,
            radius, 
            limit)
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    nearby_venues = pd.Series()
        
    # include only relevant information for each nearby venue
    for v in results:
        nearby_venues.loc[v['venue']['id']] = v['venue']['categories'][0]['name']
        
    return(nearby_venues)

We test `get_nearby_venues` at 50 random points within the study area:

In [8]:
for i in range(50):
    nv = get_nearby_venues(random.uniform(-square_half_side, square_half_side), 
                           random.uniform(-square_half_side, square_half_side), 
                           radius=1000, limit=150)
    print(len(nv))

71
10
17
42
49
60
13
26
62
32
43
29
56
13
13
37
36
12
45
20
23
40
6
52
23
77
18
100
28
100
14
21
24
57
84
100
67
52
16
37
11
15
74
13
30
28
24
39
57
48


We see that several times, Foursquare returned 100 venues. Most likely, this means there were *n* > 100 venues within the query radius. We know that Foursquare is limited to returning 100 venues, but we don't know how Foursquare chooses 100 out of the _n_ venues. Its selection rule may cause an unknown bias. Therefore, when a query returns 100 venues, we do not use these results. However, if we  simply ignore these results, we are undersampling areas with a high density of venues. To avoid this problem, whenever a query returns 100 venues, we replace it with four more queries with half the radius. These four queries cover the same total area as the original query.

In [9]:
def get_nearby_venues_unbiased(x, y, radius=1000):
    '''
    Query Foursquare for venues near a particular point within the 
    study area, and return the IDs and categories of the venues. 
    Note that Foursquare will return at most 100 venues. 
    
    To ensure that Foursquare does not bias the results with
    unknown selection rules, we ignore the results of queries
    that return 100 venues. Instead, we execute queries for 
    four nearby points, using a radius half as large.
    
    Inputs: 
        x: (float) x coordinate of the query point, in meters east 
            of the study center point
        y: (float) y coordinate of the query point, in meters north 
            of the study center point
        radius: (float) query radius, in meters
        
    Returns: a DataFrame with a row for each venue found, with
        columns 'id' and 'category'
    '''
        
    limit = 100
    (lat, lon) = north_east_of_la(y, x)
    venues_list=[]
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat,
            lon,
            radius, 
            limit)
    # make the GET request
    json_received = None
    results = []
    try:
        json_received = requests.get(url).json()
    except JSONDecodeError:
        print('query failed')
    if json_received is not None:
        try:
            results = json_received["response"]['groups'][0]['items']
        except JSONDecodeError:
            print(json_received)
                
    if len(results) < limit:
        nearby_venues = pd.DataFrame(columns=['id', 'category'])
        
        # include only relevant information for each nearby venue
        for v in results:
            nearby_venues = nearby_venues.append(
                {'id': v['venue']['id'],
                 'category': v['venue']['categories'][0]['name']
                }, ignore_index=True)        
        print(len(nearby_venues))
    else:
        quadrants = [get_nearby_venues_unbiased(x + i*radius/2, y + j*radius/2, radius/2)
                     for i in [-1, 1] for j in [-1, 1]
                    ]
        nearby_venues = (pd.concat(quadrants, axis=0, ignore_index=True)
                         .drop_duplicates(subset=['id'], keep='first')
                        )
    
    return(nearby_venues)

We call `get_nearby_venues_unbiased` repeatedly at random locations until we have at least 10,000 distinct venues. We use `drop_duplicates` to ensure they are distinct.

In [10]:
#Collect required number of venues
venues_to_find = 10000
radius = 1000

#Query centers lie within a smaller square, so that the query
#circle stays within the study area
query_square_half_side = square_half_side - radius

venues = pd.DataFrame(columns=['id', 'category'])
centers_chosen = 0
while len(venues) < venues_to_find:
    random_x = random.uniform(-query_square_half_side,
                              query_square_half_side)
    random_y = random.uniform(-query_square_half_side,
                              query_square_half_side)
    centers_chosen += 1
    new_venues = get_nearby_venues_unbiased(random_x, random_y, radius)
    venues = (venues.append(new_venues, ignore_index=True, sort=False)
              .drop_duplicates(subset=['id'], keep='first')
             )
    print((centers_chosen, len(new_venues), len(venues)))

5
13
89
14
(1, 121, 121)
65
(2, 65, 186)
21
(3, 21, 207)
13
(4, 13, 220)
98
(5, 98, 318)
12
67
30
19
27
29
45
(6, 229, 547)
26
(7, 26, 573)
15
(8, 15, 588)
50
(9, 50, 634)
69
(10, 69, 703)
99
(11, 99, 802)
17
(12, 17, 819)
12
(13, 12, 828)
16
(14, 16, 844)
21
(15, 21, 865)
23
(16, 23, 888)
24
(17, 24, 912)
52
(18, 52, 964)
56
(19, 56, 1020)
59
(20, 59, 1079)
9
(21, 9, 1088)
66
(22, 66, 1142)
13
(23, 13, 1155)
31
(24, 31, 1186)
36
(25, 36, 1222)
43
(26, 43, 1265)
59
(27, 59, 1318)
22
(28, 22, 1335)
85
(29, 85, 1344)
72
(30, 72, 1416)
42
(31, 42, 1446)
43
(32, 43, 1489)
16
(33, 16, 1505)
38
(34, 38, 1529)
24
(35, 24, 1553)
33
11
1
95
11
4
47
(36, 202, 1667)
27
(37, 27, 1694)
38
29
54
44
(38, 165, 1859)
98
(39, 98, 1923)
18
(40, 18, 1941)
58
61
12
32
(41, 163, 2104)
14
(42, 14, 2111)
60
(43, 60, 2171)
84
(44, 84, 2254)
16
(45, 16, 2261)
16
(46, 16, 2277)
59
49
3
10
(47, 121, 2398)
16
(48, 16, 2413)
46
(49, 46, 2459)
32
(50, 32, 2491)
14
51
1
53
29
14
31
(51, 193, 2572)
11
11
19
25
19
20
4

17
(408, 17, 9930)
24
(409, 24, 9930)
33
(410, 33, 9930)
97
(411, 97, 9944)
19
(412, 19, 9945)
5
(413, 5, 9946)
88
(414, 88, 9975)
10
84
42
6
(415, 142, 9995)
22
(416, 22, 10000)


In [11]:
##Save venues to a file
venues.to_csv('venues.csv')

In [12]:
venues.describe()

Unnamed: 0,id,category
count,10000,10000
unique,10000,454
top,4c27dc1de19720a1f720f758,Mexican Restaurant
freq,1,455


We have found a total of 10,000 venues. It will be useful to know how many are in each category:

In [13]:
categories = venues['category'].value_counts()

#Save category counts to a file
categories.to_csv('categories.csv')

In [14]:
venues = venues.reset_index()

In the cell below, for each of these 10,000 venues, we query Foursquare for its "next venues," which means the most common venues that a user checks in at after checking in at this venue. This takes about two hours.

Occasionally, while the cell below is executing, Foursquare fails to return a valid result. In this case, the code below will print the JSON and continue. Allow the cell to execute until it finishes. Then replace `for i in venues.index:` with `for i in venues[pd.isna(venues['num next'])].index:` and re-execute the cell. This will repeat the queries that were not successful.

In [18]:
#for i in venues.index:
    
for i in venues[pd.isna(venues['num next'])].index:

    venue_id = venues.loc[i, 'id']
    url = 'https://api.foursquare.com/v2/venues/{}/nextvenues?&client_id={}&client_secret={}&v={}'.format(
            venue_id,
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION)
    json_received = None
    try:
        json_received = requests.get(url).json()
    except JSONDecodeError:
        print(i)
        print('query failed')
    if json_received is not None:
        try:
            num_next_venues = json_received['response']['nextVenues']['count']
            next_venues = json_received['response']['nextVenues']['items']
            venues.loc[i, 'num next'] = num_next_venues
            for j in range(num_next_venues):
                venues.loc[i, 'next' + str(j + 1)] = next_venues[j]['categories'][0]['name']
        except KeyError:
            print(i)
            print(json_received)
        
    if i%100 == 0:
        print(i)
        #Save progress to a file so we can recover from a crash
        venues.to_csv('venuesWithNext.csv')

In [19]:
venues.describe(include='all')

Unnamed: 0,index,id,category,num next,next1,next2,next3,next4,next5
count,10000.0,10000,10000,10000.0,4955,3914,3288,2848,2560
unique,,10000,454,,235,234,225,216,219
top,,4c27dc1de19720a1f720f758,Mexican Restaurant,,Coffee Shop,Coffee Shop,Coffee Shop,Coffee Shop,Coffee Shop
freq,,1,455,,375,238,214,167,166
mean,4999.5048,,,1.7565,,,,,
std,2886.904001,,,2.127548,,,,,
min,0.0,,,0.0,,,,,
25%,2499.75,,,0.0,,,,,
50%,4999.5,,,0.0,,,,,
75%,7499.25,,,5.0,,,,,


In [36]:
venues['num next'].value_counts().sort_index()

0.0    5045
1.0    1041
2.0     626
3.0     440
4.0     288
5.0    2560
Name: num next, dtype: int64

Let's look at venues in the Sports Bar category.

In [20]:
category = 'Sports Bar'
group = venues[venues['category'] == category]
group

Unnamed: 0,index,id,category,num next,next1,next2,next3,next4,next5
333,333,5a29fc9f9d6a191b4ec7caf4,Sports Bar,2.0,Basketball Stadium,Basketball Court,,,
370,370,4f31d5efe4b057434ce9c989,Sports Bar,1.0,Basketball Stadium,,,,
377,377,52f7dd7111d22027eea4c47f,Sports Bar,5.0,Basketball Stadium,General Entertainment,Movie Theater,Rock Club,Bar
890,890,5875af81d4ab736b8e785bac,Sports Bar,2.0,College Football Field,Soccer Stadium,,,
1983,1983,54642cba498e50ec78b74e43,Sports Bar,4.0,Cocktail Bar,Shopping Plaza,Burger Joint,Dumpling Restaurant,
2034,2034,4ac5aa87f964a52064b220e3,Sports Bar,5.0,Performing Arts Venue,Cocktail Bar,Shopping Plaza,Bakery,Gym / Fitness Center
2584,2584,561db7a3498e594adb2fa222,Sports Bar,1.0,Basketball Stadium,,,,
3188,3188,4c2035fdb306c928eaac69b7,Sports Bar,2.0,Casino,Outlet Mall,,,
4147,4147,4c33afec452620a10d02240f,Sports Bar,4.0,Baseball Stadium,Baseball Field,Clothing Store,Lounge,
4153,4153,535c92de498eacad03e66b58,Sports Bar,1.0,Baseball Stadium,,,,


Each row is a venue. Where do people go after visiting this venue? For each venue, Foursquare has given us up to five likely answers. We don't care about the identities of these venues, so we have saved their categories only. These categories are in columns next1 through next5.

We start by counting occurrences of categories in the column next1:

In [21]:
group['next1'].value_counts()

Basketball Stadium        4
College Football Field    2
Baseball Stadium          2
Casino                    1
Cocktail Bar              1
Korean Restaurant         1
Shopping Mall             1
Concert Hall              1
Performing Arts Venue     1
Name: next1, dtype: int64

We repeat this for columns next2 through next5, and combine the results:

In [22]:
MNPV = 5 #Maximum number of results from a nextvenues query (determined by Foursquare)
group_scores = pd.concat(
    [group['next' + str(i + 1)].value_counts()
     for i in range(MNPV)],
    axis=1, sort=False)
group_scores

Unnamed: 0,next1,next2,next3,next4,next5
Basketball Stadium,4.0,,,,
College Football Field,2.0,,1.0,,
Baseball Stadium,2.0,,,,
Casino,1.0,,,,
Cocktail Bar,1.0,1.0,,,
Korean Restaurant,1.0,,1.0,,
Shopping Mall,1.0,,,,
Concert Hall,1.0,,,,
Performing Arts Venue,1.0,,,,
Asian Restaurant,,1.0,,,


We'd like to add up the total number of times people went to these other venues after a sports bar. Foursquare doesn't give us these numbers, but it does order the next venues using these numbers in descending order. Therefore we give higher weight to those earlier in the list. We make a wild guess that the best weights for columns next1 through next5 are 10, 9, 8, 7, and 6 respectively. Here are the weighted scores: 

In [23]:
WFFR = 10 #Weight given to the first result of a nextvenues query (my choice)
group_scores = pd.concat(
    [group['next' + str(i + 1)].value_counts()*(WFFR - i)
     for i in range(MNPV)],
    axis=1, sort=False)
group_scores

Unnamed: 0,next1,next2,next3,next4,next5
Basketball Stadium,40.0,,,,
College Football Field,20.0,,8.0,,
Baseball Stadium,20.0,,,,
Casino,10.0,,,,
Cocktail Bar,10.0,9.0,,,
Korean Restaurant,10.0,,8.0,,
Shopping Mall,10.0,,,,
Concert Hall,10.0,,,,
Performing Arts Venue,10.0,,,,
Asian Restaurant,,9.0,,,


We get the total score for each category by adding across the rows:

In [24]:
group_scores = group_scores.sum(axis=1)
group_scores

Basketball Stadium        40.0
College Football Field    28.0
Baseball Stadium          20.0
Casino                    10.0
Cocktail Bar              19.0
Korean Restaurant         18.0
Shopping Mall             10.0
Concert Hall              10.0
Performing Arts Venue     10.0
Asian Restaurant           9.0
Baseball Field             9.0
Shopping Plaza            17.0
Outlet Mall                9.0
Soccer Stadium             9.0
Sports Bar                 9.0
General Entertainment      9.0
Basketball Court           9.0
Chinese Restaurant         9.0
Movie Theater             24.0
Rock Club                 15.0
Chocolate Shop             8.0
Clothing Store             8.0
Burger Joint               8.0
Lounge                     7.0
Dumpling Restaurant        7.0
Gastropub                 13.0
Karaoke Bar                7.0
Fast Food Restaurant       7.0
Bakery                     7.0
Music Venue                6.0
American Restaurant        6.0
Bar                        6.0
Gym / Fi

Finally, we convert these scores to percentages by dividing by the total, and list the top five in descending order:

In [25]:
NRPC = 5 #Number of results to output per category (my choice)
group_percent = group_scores*100/group_scores.sum()
group_percent.sort_values(ascending=False, inplace=True)
leaders = group_percent.head(NRPC)
leaders

Basketball Stadium        10.126582
College Football Field     7.088608
Movie Theater              6.075949
Baseball Stadium           5.063291
Cocktail Bar               4.810127
dtype: float64

Thus, we estimate that someone at a sports bar has a 10.1 percent chance of next visiting a basketball stadium, and that the next four most likely categories are college football field, movie theater, baseball stadium, and cocktail bar.

Now we repeat this computation for all categories. We will only include categories where we have at least 10 venues with next venues, with a total of at least 20 next venues.

In [26]:
results_columns = ['# of Venues', '# of Venues With Next',
                   'Total # of Next Venues']
for i in range(1, NRPC + 1):
    results_columns += ['Next Category #' + str(i), 'Pct. #' + str(i)]
results_df = pd.DataFrame(columns=results_columns)

for category, group in venues.groupby('category'):
    num_venues = len(group)
    num_with_next = len(group[group['num next'] > 0])
    if num_with_next < 10:
        continue
    total_next = group['num next'].sum()
    if total_next < 20:
        continue
    results_df.loc[category, '# of Venues'] = num_venues
    results_df.loc[category, '# of Venues With Next'] = num_with_next
    results_df.loc[category, 'Total # of Next Venues'] = total_next
    group_scores = pd.concat(
        [group['next' + str(i + 1)].value_counts()*(WFFR - i)
         for i in range(MNPV)],
        axis=1, sort=False).sum(axis=1)
    group_percent = group_scores*100/group_scores.sum()
    group_percent.sort_values(ascending=False, inplace=True)
    leaders = group_percent.head(NRPC).reset_index()  
    for j in leaders.index:
        results_df.loc[category,
                       'Next Category #' + str(j + 1)
                      ] = leaders.loc[j, 'index']
        results_df.loc[category,
                       'Pct. #' + str(j + 1)
                      ] = leaders.loc[j, 0]        
    
print(results_df.shape)
results_df

(122, 13)


Unnamed: 0,# of Venues,# of Venues With Next,Total # of Next Venues,Next Category #1,Pct. #1,Next Category #2,Pct. #2,Next Category #3,Pct. #3,Next Category #4,Pct. #4,Next Category #5,Pct. #5
American Restaurant,154,102,399,Coffee Shop,6.80437,Shopping Mall,6.28797,Bar,4.89064,Grocery Store,4.52612,Ice Cream Shop,3.70595
Art Gallery,87,46,156,Art Gallery,24.6942,Bar,6.80428,Art Museum,5.58104,Garden,2.9052,Gastropub,2.75229
Art Museum,20,14,59,Art Museum,28.3925,Coffee Shop,11.4823,Sculpture Garden,7.30689,New American Restaurant,6.88935,Historic Site,6.88935
Arts & Crafts Store,32,18,72,Shopping Mall,10.9797,Coffee Shop,6.25,Bookstore,4.56081,Flower Shop,4.39189,Furniture / Home Store,4.39189
Asian Restaurant,90,36,114,Coffee Shop,12.6294,Bubble Tea Shop,8.28157,Dessert Shop,7.14286,Grocery Store,7.14286,Ice Cream Shop,6.8323
BBQ Joint,36,20,73,Dessert Shop,6.87398,Ice Cream Shop,6.54664,Coffee Shop,5.89198,Café,5.40098,Shopping Mall,3.92799
Bakery,149,73,242,Coffee Shop,11.0619,Shopping Mall,8.01377,Grocery Store,6.24385,Bakery,4.6706,Café,4.57227
Bank,69,47,159,Grocery Store,15.2757,Coffee Shop,13.5618,Pharmacy,9.01639,Supermarket,8.71833,Shopping Mall,6.92996
Bar,116,97,413,Bar,19.0377,Gastropub,4.5144,Cocktail Bar,4.2174,Mexican Restaurant,3.2967,Lounge,3.267
Big Box Store,15,15,66,Grocery Store,17.0412,Coffee Shop,12.1723,Shopping Mall,10.2996,Supermarket,5.99251,Pharmacy,5.61798


Thus, for 122 venue categories, we have recommendations for targeting advertising to people at those venues.

In [31]:
#Save results as a Markdown table
f = open("results.md", "w+")
f.write("| | # of Venues | # of Venues With Next | Total # of Next Venues | Next Category #1 | Pct. #1 | Next Category #2 | Pct. #2 | Next Category #3 | Pct. #3 | Next Category #4 | Pct. #4 | Next Category #5 | Pct. #5 |\n")
f.write("| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:|\n")
for category in results_df.index:
    f.write("| {0} | {1:.0f} | {2:.0f} | {3:.0f} | {4} | {5:.1f} | {6} | {7:.1f} | {8} | {9:.1f} | {10} | {11:.1f} | {12} | {13:.1f} |\n".format(
         category,
         results_df.loc[category, '# of Venues'],
         results_df.loc[category, '# of Venues With Next'],
         results_df.loc[category, 'Total # of Next Venues'],
         results_df.loc[category, 'Next Category #1'],
         results_df.loc[category, 'Pct. #1'],
         results_df.loc[category, 'Next Category #2'],
         results_df.loc[category, 'Pct. #2'],
         results_df.loc[category, 'Next Category #3'],
         results_df.loc[category, 'Pct. #3'],
         results_df.loc[category, 'Next Category #4'],
         results_df.loc[category, 'Pct. #4'],
         results_df.loc[category, 'Next Category #5'],
         results_df.loc[category, 'Pct. #5'])
           )
                        
        

In [32]:
f.close()

In [34]:
results_df.sort_values(by="Pct. #1", ascending=False).head(10)

Unnamed: 0,# of Venues,# of Venues With Next,Total # of Next Venues,Next Category #1,Pct. #1,Next Category #2,Pct. #2,Next Category #3,Pct. #3,Next Category #4,Pct. #4,Next Category #5,Pct. #5
Zoo Exhibit,28,22,89,Zoo Exhibit,74.4218,Zoo,13.8776,Theme Park Ride / Attraction,4.08163,Gift Shop,1.90476,Fast Food Restaurant,1.90476
Theme Park,28,25,112,Theme Park Ride / Attraction,45.645,Theme Park,45.204,American Restaurant,2.09482,Diner,1.54355,Historic Site,1.10254
Garden,26,21,64,Garden,45.3704,Tea Room,16.1111,Museum,11.8519,Park,7.77778,Grocery Store,3.51852
Theme Park Ride / Attraction,33,26,113,Theme Park,41.5135,Theme Park Ride / Attraction,39.4595,Museum,2.81081,General Entertainment,2.27027,Zoo,2.16216
Golf Course,14,10,27,Golf Course,33.4764,Diner,8.58369,Park,8.15451,American Restaurant,6.43777,Grocery Store,6.43777
Yoga Studio,41,27,65,Grocery Store,32.0557,Coffee Shop,17.9443,Juice Bar,8.01394,Café,6.27178,Supermarket,6.09756
Thrift / Vintage Store,42,19,70,Thrift / Vintage Store,31.0881,Big Box Store,7.59931,Shopping Mall,7.25389,Grocery Store,6.90846,Clothing Store,6.39033
Trail,147,35,113,Trail,29.979,Park,16.457,Observatory,10.587,Scenic Lookout,9.85325,Mountain,4.08805
Art Museum,20,14,59,Art Museum,28.3925,Coffee Shop,11.4823,Sculpture Garden,7.30689,New American Restaurant,6.88935,Historic Site,6.88935
Wine Shop,15,13,50,Grocery Store,27.9805,American Restaurant,7.05596,Big Box Store,6.56934,Cocktail Bar,4.86618,Shopping Mall,4.37956


In [35]:
#Save results to a file
results_df.to_csv('results.csv')