# Pizza pizza pizza

Get top venues by keyword and location, querying Google, Yelp, Foursquare

#### Motivation:
 - Gmaps and Yelp GUIs sorting and filtering are limited.
 - Query all 3 services and bring combined results into sortable table and map widget

#### Google

 - Needs a Google API key and module
 - [Create Google Cloud credentials and give access to Places APIs](https://console.cloud.google.com/google/maps-apis/credentials) (also, restrict IP or set other restrictions)
 - `conda install -c conda-forge -y gmaps`
 - put key in `apikey.txt`
 - `gmaps` Jupyter nbextension to show maps in notebook, with marker pins etc.

```
conda install -c conda-forge -y jupyter_contrib_nbextensions
jupyter nbextension enable --py gmaps
jupyter notebook
```

#### Yelp
 - needs Yelp API key and module
 - https://www.yelp.com/developers/documentation/v3
 - https://github.com/gfairchild/yelpapi
 - put key in `yelpkey.txt`
 
#### Foursquare
- Needs Foursquare API key and module
- https://developer.foursquare.com/docs/places-api/getting-started/
- https://github.com/mLewisLogic/foursquare
- OAuth id in `foursquare_id.txt`
- OAuth secret in `foursquare_secret.txt`

See `requirements.txt` for versions used, other requirements (requests, beakerx, folium, Flask)

Was going to try OpenTable and TripAdvisor but their language seems to limit API key access to approved commercial partners.
- https://dev.opentable.com/affiliate-partners/
- https://www.tripadvisor.com/APIAccessSupport

In [127]:
import time
from pprint import pprint
import ipywidgets
from ipywidgets import widgets, interact
from itertools import product
from os import path

import traceback
import pdb

import numpy as np
import pandas as pd

# cluster similar entities
import pandas_dedupe

import sklearn
from sklearn.preprocessing import StandardScaler

import requests, json 

import qgrid
from qgrid import show_grid

# convert coords to km using haversine distance 
import geopy
from geopy.distance import distance

import gmaps
with open('apikeys/apikey.txt') as f:
    api_key = f.readline().strip()
    f.close
gmaps.configure(api_key=api_key)

# https://github.com/gfairchild/yelpapi
import yelpapi
from yelpapi import YelpAPI
with open('apikeys/yelpkey.txt') as f:
    yelp_key = f.readline().strip()
    f.close
yelp_api = YelpAPI(yelp_key)

import foursquare
from foursquare import Foursquare, FoursquareException
with open('apikeys/foursquare_id.txt') as f:
    foursquare_id = f.readline().strip()
    f.close
with open('apikeys/foursquare_secret.txt') as f:
    foursquare_secret = f.readline().strip()
    f.close

# interactive maps
import folium

print("%-20s %s" % ("numpy", np.__version__))
print("%-20s %s" % ("pandas", pd.__version__))
print("%-20s %s" % ("ipywidgets", ipywidgets.__version__))
print("%-20s %s" % ("qgrid", qgrid.__version__))
print("%-20s %s" % ("sklearn", sklearn.__version__))
print("%-20s %s" % ("requests", requests.__version__))
print("%-20s %s" % ("geopy", geopy.__version__))
print("%-20s %s" % ("gmaps", gmaps.__version__))
print("%-20s %s" % ("foursquare", foursquare.__version__))
print("%-20s %s" % ("folium", folium.__version__))


numpy                1.18.1
pandas               1.0.3
ipywidgets           7.5.1
qgrid                1.3.1
sklearn              0.22.1
requests             2.23.0
geopy                2.0.0
gmaps                0.9.0
foursquare           1!2020.1.30
folium               0.11.0


## Google Maps

In [90]:
# pick a search term
keyword_options = [('Pizza', 'pizza'), ('Coffee', 'coffee'), ('Ice Cream', 'icecream')]
keyword = 'pizza'

@interact
def get_kw(kw = widgets.Dropdown(
    options=keyword_options,
    value=keyword,
    description='Search term:  ',
    disabled=False,
)):
    global keyword
    keyword = kw
    return None


interactive(children=(Dropdown(description='Search term:  ', options=(('Pizza', 'pizza'), ('Coffee', 'coffee')…

In [268]:
# pick a location
location = '40.7484, -73.9857'
location_coords = tuple(eval(location))
location_options = [('Midtown', '40.7484, -73.9857'),
                    ('Downtown', '40.7077443,-74.0139089'),
                    ('Upper East Side', '40.7711473,-73.9661166'),
                    ('Upper West Side', '40.778794,-73.984257'),
                    ('Brooklyn Heights', '40.6915812,-73.9954095'), 
                    ('Grand Army Plaza', '40.671872,-73.972544'),
                    ('Bay Ridge', '40.6292633,-74.0309554'),
                    ('Williamsburg', '40.7144609,-73.9553373'),
                  ]

@interact
def get_loc(loc = widgets.Dropdown(
    options=location_options,
    value=location,
    description='Location:',
)):
    global location
    global location_coords
    location = loc
    location_coords = tuple(eval(loc))
    return None

interactive(children=(Dropdown(description='Location:', options=(('Midtown', '40.7484, -73.9857'), ('Downtown'…

In [525]:
# pick anything we don't have a pickle file for
for test_keyword, test_location in product(keyword_options, location_options):
    keyword = test_keyword[1]
    location_name = test_location[0]
    location = test_location[1]
    location_coords = tuple(eval(test_location[1]))
    test_pickle_filename = keyword + "_" + location_name.replace(' ', '').lower() + ".pkl"
    
    if not path.exists(test_pickle_filename):
        break
        
print(location_name, keyword, location, location_coords)
print(test_pickle_filename)

Williamsburg coffee 40.7144609,-73.9553373 (40.7144609, -73.9553373)
coffee_williamsburg.pkl


In [526]:
# gmap of chosen location
figure_layout = {
    'width': '800px',
    'height': '800px',
    'border': '1px solid black',
    'padding': '1px'
}
fig = gmaps.figure(center=location_coords, zoom_level=12, layout=figure_layout)
fig.add_layer(gmaps.marker_layer([location_coords]))
fig

Figure(layout=FigureLayout(border='1px solid black', height='800px', padding='1px', width='800px'))

In [527]:
# global options for all search services APIs
MIN_USER_RATINGS = 20
MIN_RATING = 0
NRESULTS = 50
RADIUS = 1000


In [528]:
# gmaps options
GMAPS_URL = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
# https://developers.google.com/places/web-service/search#TextSearchRequests
#https://developers.google.com/places/web-service/supported_types
# rankby='prominence'
rankby='distance'
ltype='establishment'


In [529]:
def gmaps_get_first_page(api_key, location, **kwargs):
    """get first page of results from gmaps using api_key, location, kwargs for search spec"""
    # use either rankby or radius kwarg
    request_url = GMAPS_URL + '?key=' + api_key
    request_url += '&location=' + location
    for name, val in kwargs.items():
        request_url += '&' + name + '=' + str(val)
    r = requests.get(request_url)
    j = r.json()
    return j


def gmaps_get_next_page(api_key, next_page_token):
    """get next search engine results page page using search token, waiting until available"""
    r = requests.get(GMAPS_URL + '?pagetoken=' + next_page_token +
                        '&key=' + api_key)
    for i in range(10):
        j = r.json()
        if not j['results']: # wait for next page to be available
            time.sleep(5)
            continue
        else:
            return j


def gmaps_get_all_df(api_key, location, **kwargs):
    """return dataframe of all results using api_key, location, search kwargs"""
    # get first page
    j = gmaps_get_first_page(api_key, location, **kwargs)
    venues_df = pd.json_normalize(j['results'])

    # get pages while additional pages available
    while 'next_page_token' in j:
        next_page_token = j['next_page_token']
        time.sleep(5)
        j = gmaps_get_next_page(api_key, next_page_token)
        venues_df = venues_df.append(pd.json_normalize(j['results']))
        
    return venues_df


def gmaps_get_df(location_coords, keyword):

    # use either rankby or radius
    location = "%.7f,%.7f" % location_coords
    gmaps_df = gmaps_get_all_df(api_key, location, keyword=keyword, ltype=ltype, radius=RADIUS)
    if gmaps_df.empty:
        return None
    else:
        # gmaps_get_df(api_key, location, keyword=keyword, ltype=ltype, radius=RADIUS)
        gmaps_df = gmaps_df.loc[(gmaps_df['user_ratings_total'] >= MIN_USER_RATINGS) & (gmaps_df['rating'] >= MIN_RATING)] \
                           .sort_values(['rating', 'user_ratings_total'], ascending=False) \
                           .reset_index(drop=True)
        gmaps_df = gmaps_df[['name', 'vicinity', 'rating', 'user_ratings_total', 'geometry.location.lat', 'geometry.location.lng']]
        gmaps_df.columns = ['name', 'address', 'rating', 'nratings', 'lat', 'lng']
        # drop trailing ", Brooklyn"
        gmaps_df['address'] = gmaps_df['address'].apply(lambda address: " ".join(address.split(',')[:-1]))
        gmaps_df['distance'] = gmaps_df.apply(lambda row: distance((row['lat'], row['lng']), location_coords).km,
                                              axis=1)
        return gmaps_df


In [530]:
%%time
gmaps_df = gmaps_get_df(location_coords, keyword)
gmaps_df = gmaps_df.loc[(gmaps_df['nratings'] >= MIN_USER_RATINGS) & (gmaps_df['rating'] >= MIN_RATING)] \
        .sort_values(['rating', 'nratings'], ascending=False) \
        .reset_index(drop=True)
gmaps_df.to_pickle('gmaps_' + test_pickle_filename)
gmaps_df


CPU times: user 105 ms, sys: 6.03 ms, total: 111 ms
Wall time: 13.8 s


Unnamed: 0,name,address,rating,nratings,lat,lng,distance
0,Copper Mug Coffee,131 N 4th St,5.0,88,40.716542,-73.959797,0.442058
1,Pueblo Querido Coffee Roasters Cafe de Colombia,34 N 6th St,5.0,33,40.719994,-73.96294,0.888933
2,Lella Alimentari,325 Manhattan Ave,4.9,285,40.715157,-73.946515,0.74948
3,Crema BK,182 Driggs Ave,4.9,74,40.723104,-73.945775,1.254584
4,Eleva Coffee Grand Street,649 Grand St,4.9,65,40.711577,-73.946331,0.825642
5,Homecoming,92 Berry St,4.9,29,40.719467,-73.958365,0.61191
6,19 Café,84 Manhattan Ave,4.8,136,40.705206,-73.944446,1.379576
7,COTTER BARBER/COTTER COFFEE,321 Graham Ave,4.8,60,40.713901,-73.944493,0.918395
8,Butler,95 S 5th St,4.7,304,40.711782,-73.964587,0.83631
9,Charter Coffeehouse,309 Graham Ave #1,4.7,178,40.713366,-73.944383,0.933589


In [531]:
# sort grid by clicking on header (can also click on filter button)
show_grid(gmaps_df)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [532]:
# plot on google map

markers = [(row.lat, row.lng) for row in gmaps_df.itertuples()]
marker_hover = ["%s: %s (%s)" % (row.name, row.rating, row.nratings) for row in gmaps_df.itertuples()]
info_box_template = """
<dl>
<dt>Name</dt><dd>{name}</dd>
<dt>Address</dt><dd>{address}</dd>
<dt>Google rating</dt><dd>{rating}</dd>
<dt>Google reviews</dt><dd>{nratings}</dd>
</dl>
"""
marker_info = [info_box_template.format(**row) for i, row in gmaps_df.iterrows()]

marker_layer = gmaps.marker_layer(markers, hover_text=marker_hover, info_box_content=marker_info)

figure_layout = {
    'width': '800px',
    'height': '800px',
    'border': '1px solid black',
    'padding': '1px'
}

fig = gmaps.figure(layout=figure_layout, center=eval(location), zoom_level=14)
fig.add_layer(marker_layer)
fig

Figure(layout=FigureLayout(border='1px solid black', height='800px', padding='1px', width='800px'))

In [533]:
folium_markers = [(a[0], a[1], b) for a, b in zip(markers, marker_info)]

venues_map = folium.Map(location=[*location_coords], zoom_start=14)
for lat, lng, label in folium_markers:
    folium.CircleMarker(
        [lat, lng],
        radius=8,
        color='blue',
        tooltip=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.5
    ).add_to(venues_map)
    
venues_map

## Yelp

In [534]:
def yelp_get_df(location_coords, keyword):
    lat, lng = location_coords
    response = yelp_api.search_query(categories=keyword, latitude=lat, longitude=lng, 
                                     radius=RADIUS, sort_by=rankby, limit=NRESULTS)

    yelp_df = pd.json_normalize(response['businesses'])
    if not yelp_df.empty:    
        yelp_df = yelp_df.loc[(yelp_df['review_count'] >= MIN_USER_RATINGS) & (yelp_df['rating'] >= MIN_RATING)] \
                         .sort_values(['rating', 'review_count'], ascending=False) \
                         .reset_index(drop=True)
        display_columns = ['name', 'location.address1', 'rating', 'review_count', 'coordinates.latitude', 'coordinates.longitude', 'url']
        yelp_df = yelp_df[display_columns]
        yelp_df.columns = ['name', 'address', 'rating', 'nratings', 'lat', 'lng', 'url']
        yelp_df['distance'] = yelp_df.apply(lambda row: distance((row['lat'], row['lng']), location_coords).km,
                                              axis=1)
        
        if yelp_df.empty:
            return None
        else:
            return yelp_df
    else:
        return None

yelp_df = yelp_get_df(location_coords, keyword)
yelp_df

Unnamed: 0,name,address,rating,nratings,lat,lng,url,distance
0,Ceremonia Bakeshop,743 Driggs Ave,5.0,66,40.7125,-73.96056,https://www.yelp.com/biz/ceremonia-bakeshop-br...,0.49211
1,Martha's Country Bakery,263 Bedford Ave,4.5,667,40.714881,-73.960752,https://www.yelp.com/biz/marthas-country-baker...,0.459853
2,Sweatshop,232 Metropolitan Ave,4.5,351,40.714954,-73.959786,https://www.yelp.com/biz/sweatshop-williamsbur...,0.379872
3,Sugarburg,519 Metropolitan Ave,4.5,321,40.714118,-73.951606,https://www.yelp.com/biz/sugarburg-brooklyn?ad...,0.317557
4,Kitsby,186 Grand St,4.5,145,40.71411,-73.960977,https://www.yelp.com/biz/kitsby-brooklyn?adjus...,0.478131
5,Lion's Milk,104 Roebling St,4.5,114,40.71592,-73.9558,https://www.yelp.com/biz/lions-milk-brooklyn-2...,0.16667
6,Black Star Bakery & Cafe,595 Metropolitan Ave,4.5,106,40.714135,-73.949013,https://www.yelp.com/biz/black-star-bakery-and...,0.535623
7,Café Beit,158 Bedford Ave,4.5,88,40.718678,-73.957109,https://www.yelp.com/biz/caf%C3%A9-beit-brookl...,0.491657
8,Campbell & Co,502 Lorimer St,4.5,88,40.712024,-73.948861,https://www.yelp.com/biz/campbell-and-co-brook...,0.6105
9,Bar Beau,61 Withers St,4.5,52,40.71695,-73.95009,https://www.yelp.com/biz/bar-beau-brooklyn?adj...,0.522478


In [535]:
yelp_df.to_pickle('yelp' + test_pickle_filename)
show_grid(yelp_df)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [536]:
markers = [(row.lat, row.lng) for row in yelp_df.itertuples()]
marker_hover = ["%s: %s (%s)" % (row.name, row.rating, row.nratings) for row in yelp_df.itertuples()]

info_box_template = """
<dl>
<dt>Name</dt><dd>{name}</dd>
<dt>Address</dt><dd>{address}</dd>
<dt>Yelp rating</dt><dd>{rating}</dd>
<dt>Yelp reviews</dt><dd>{nratings}</dd>
</dl>
"""

marker_info = [info_box_template.format(**row) for i, row in yelp_df.iterrows()]

marker_layer = gmaps.marker_layer(markers, hover_text=marker_hover, info_box_content=marker_info)

figure_layout = {
    'width': '800px',
    'height': '800px',
    'border': '1px solid black',
    'padding': '1px'
}

fig = gmaps.figure(layout=figure_layout, center=location_coords, zoom_level=14)
fig.add_layer(marker_layer)
fig

Figure(layout=FigureLayout(border='1px solid black', height='800px', padding='1px', width='800px'))

In [537]:
folium_markers = [(a[0], a[1], b) for a, b in zip(markers, marker_info)]

venues_map = folium.Map(location=[*location_coords], zoom_start=14)
for lat, lng, label in folium_markers:
    folium.CircleMarker(
        [lat, lng],
        radius=8,
        color='blue',
        tooltip=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.5
    ).add_to(venues_map)
venues_map

## Foursquare

In [538]:
def foursquare_get_df(location_coords, keyword):
    location_str = "%.7f,%.7f" % location_coords
    client = Foursquare(client_id=foursquare_id, 
                                   client_secret=foursquare_secret, 
                                   redirect_uri='http://streeteye.com/')
    response = client.venues.search(params={'query': keyword, 'll': "%s" % location_str, 
                                            'radius': RADIUS, 'limit': NRESULTS})

    foursquare_array = []

    for i, venue in pd.json_normalize(response['venues']).iterrows():
        venue_id = venue['id']
        # query detailed venue info from foursquare
        venue_name = venue['name']
        venue_address = venue['location.address']
        # sometimes no URL
        try:
            venue_url = venue['delivery.url']
        except:
            venue_url = ''
        venue_lat = venue['location.lat']
        venue_lng = venue['location.lng']
        # default these to -1
        try:
            # get rating, nratings with another API call for venue details
            venue_details = client.venues(venue_id)['venue']
            venue_rating = venue_details['rating']
            venue_nratings = venue_details['ratingSignals']
        except FoursquareException as e:
            print("Foursquare exception", type(e), str(e))
        except Exception as e:
            continue
            # sometimes no rating ... probably not popular enough
            # print(type(e), str(e))
            # print(traceback.format_exc())
            # print("No rating for %s" % venue_name)

        foursquare_array.append([venue_name, venue_address, venue_rating, venue_nratings, venue_lat, venue_lng, venue_url])
            
    foursquare_df = pd.DataFrame(foursquare_array)
    
    if len(foursquare_df) and len(foursquare_df.columns):
        foursquare_df.columns = ['name', 'address', 'rating', 'nratings', 'lat', 'lng', 'url']
        foursquare_df = foursquare_df.loc[(foursquare_df['nratings'] >= MIN_USER_RATINGS) & (foursquare_df['rating'] >= MIN_RATING)] \
                                     .sort_values(['rating', 'nratings'], ascending=False) \
                                     .reset_index(drop=True)
        
        foursquare_df['distance'] = foursquare_df.apply(lambda row: distance((row['lat'], row['lng']), location_coords).km,
                                                        axis=1)
        if foursquare_df.empty:
            return None
        else:
            return foursquare_df
    else:
        return None

foursquare_df = foursquare_get_df(location_coords, keyword)
foursquare_df

Unnamed: 0,name,address,rating,nratings,lat,lng,url,distance
0,Blue Bottle Coffee,76 N 4 St,9.1,83,40.716913,-73.961039,,0.553398
1,Partners Coffee,125 N 6th St,8.9,2085,40.718012,-73.959174,,0.510491
2,The West—Coffeehouse & Bar,379 Union Ave,8.7,566,40.711726,-73.95136,,0.452932
3,The Flat's BK Speed Coffee,308 Hooper St,8.7,103,40.707304,-73.953867,,0.80443
4,Variety Coffee Roasters,368 Graham Ave,8.6,592,40.715227,-73.94449,,0.920474
5,Charter Coffeehouse,309 Graham Ave,8.6,139,40.713273,-73.94434,https://www.seamless.com/menu/charter-coffeeho...,0.938591
6,Oslo Coffee Roasters,328 Bedford Ave,8.5,297,40.713105,-73.962179,,0.597373
7,Think Coffee,10 Devoe St,8.1,58,40.713123,-73.951077,,0.389434
8,Porto Rico Importing Co.,636 Grand St,7.9,77,40.711374,-73.946804,,0.798434
9,Oslo Coffee,133 Roebling St,7.7,345,40.714725,-73.957043,,0.147055


In [539]:
show_grid(foursquare_df)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [540]:
if foursquare_df is not None and not foursquare_df.empty:
    foursquare_df.to_pickle('foursquare_' + test_pickle_filename)
show_grid(foursquare_df)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [541]:
markers = [(row.lat, row.lng) for row in foursquare_df.itertuples()]
marker_hover = ["%s: %s (%s)" % (row.name, row.rating, row.nratings) for row in foursquare_df.itertuples()]

info_box_template = """
<dl>
<dt>Name</dt><dd>{name}</dd>
<dt>Address</dt><dd>{address}</dd>
<dt>Foursquare rating</dt><dd>{rating}</dd>
<dt>Foursquare reviews</dt><dd>{nratings}</dd>
</dl>
"""
marker_info = [info_box_template.format(**d_item) for i, d_item in foursquare_df.iterrows()]

marker_layer = gmaps.marker_layer(markers, hover_text=marker_hover, info_box_content=marker_info)

figure_layout = {
    'width': '800px',
    'height': '800px',
    'border': '1px solid black',
    'padding': '1px'
}

fig = gmaps.figure(layout=figure_layout, center=location_coords, zoom_level=14)
fig.add_layer(marker_layer)
fig

Figure(layout=FigureLayout(border='1px solid black', height='800px', padding='1px', width='800px'))

In [542]:
folium_markers = [(a[0], a[1], b) for a, b in zip(markers, marker_info)]

venues_map = folium.Map(location=[*location_coords], zoom_start=14)
for lat, lng, label in folium_markers:
    folium.CircleMarker(
        [lat, lng],
        radius=8,
        color='blue',
        tooltip=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.5
    ).add_to(venues_map)
venues_map

## Deduplicate and merge

In [543]:
# put everything in one big df
pd.set_option('display.max_rows', None)

gmaps_df_copy, yelp_df_copy, foursquare_df_copy = None, None, None

try:
    gmaps_df_copy = gmaps_df.copy()
    gmaps_df_copy['source'] = '0_gmaps'
except: 
    pass

try:
    yelp_df_copy = yelp_df.copy() 
    yelp_df_copy['source'] = '1_yelp'
except:
    pass

try:
    foursquare_df_copy = foursquare_df.copy()
    foursquare_df_copy['source'] = '2_foursquare'
except:
    pass


venues_df = pd.concat(list(filter(lambda df: df is not None, [gmaps_df_copy, yelp_df_copy, foursquare_df_copy]))).reset_index()
venues_df['latlong'] = venues_df[['lat','lng']].apply(tuple, axis=1)

venues_df.sort_values('name')

Unnamed: 0,index,name,address,rating,nratings,lat,lng,distance,source,url,latlong
6,6,19 Café,84 Manhattan Ave,4.8,136,40.705206,-73.944446,1.379576,0_gmaps,,"(40.705206, -73.94444639999999)"
23,23,Abracadabra Brooklyn,347 Bedford Ave,4.5,255,40.712126,-73.962614,0.667275,0_gmaps,,"(40.7121258, -73.9626135)"
67,17,Abracadabra Brooklyn,347 Bedford Ave,4.0,151,40.71202,-73.96274,0.681723,1_yelp,https://www.yelp.com/biz/abracadabra-brooklyn-...,"(40.71202, -73.96274)"
42,42,Atlas Cafe - Clever Blend,116 Havemeyer St,4.2,170,40.712684,-73.957004,0.242419,0_gmaps,,"(40.71268389999999, -73.9570037)"
75,25,Atlas Café,116 Havemeyer St,3.5,189,40.712722,-73.957038,0.240709,1_yelp,https://www.yelp.com/biz/atlas-caf%C3%A9-brook...,"(40.712721787199, -73.957037663574)"
19,19,BEAU Coffee,61 Withers St,4.6,32,40.716976,-73.950081,0.524643,0_gmaps,,"(40.7169758, -73.950081)"
72,22,Bagelsmith,189 Bedford Ave,3.5,500,40.717584,-73.957854,0.406818,1_yelp,https://www.yelp.com/biz/bagelsmith-brooklyn?a...,"(40.717584, -73.957854)"
59,9,Bar Beau,61 Withers St,4.5,52,40.71695,-73.95009,0.522478,1_yelp,https://www.yelp.com/biz/bar-beau-brooklyn?adj...,"(40.71695, -73.95009)"
64,14,Black Brick,300 Bedford Ave,4.0,317,40.714208,-73.961785,0.545524,1_yelp,https://www.yelp.com/biz/black-brick-brooklyn?...,"(40.7142079238412, -73.9617848396301)"
21,21,Black Brick Coffee,300 Bedford Ave,4.5,342,40.714191,-73.961655,0.534634,0_gmaps,,"(40.7141914, -73.9616546)"


In [544]:
# manual fix 
# venues_df.loc[venues_df['address']=='212b Pacific St', 'address'] = '212 Pacific St'
# venues_df

In [545]:
# run dedupe algorithm using name, address as default texts, latlong as latlong
venues_df2 = pandas_dedupe.dedupe_dataframe(venues_df, ['name', 'address', ('latlong', 'LatLong')])


Importing data ...
Reading from dedupe_dataframe_learned_settings
Clustering...
# duplicate sets 73


In [546]:
# view clustering
venues_df['cluster'] = venues_df2['cluster id']
venues_df = venues_df.sort_values(['cluster', 'source'])[['cluster', 'name', 'address', 'rating', 'nratings', 'lat', 'lng', 'source']]
venues_df

Unnamed: 0,cluster,name,address,rating,nratings,lat,lng,source
9,0,Charter Coffeehouse,309 Graham Ave #1,4.7,178,40.7133662,-73.94438269999999,0_gmaps
86,0,Charter Coffeehouse,309 Graham Ave,8.6,139,40.71327271843125,-73.94433975219727,2_foursquare
12,1,Sweatshop,232 Metropolitan Ave,4.6,302,40.7149294,-73.95978629999999,0_gmaps
52,1,Sweatshop,232 Metropolitan Ave,4.5,351,40.7149542040034,-73.959786,1_yelp
13,2,Café Beit,158 Bedford Ave,4.6,249,40.7186663,-73.9570576,0_gmaps
57,2,Café Beit,158 Bedford Ave,4.5,88,40.718678,-73.9571094,1_yelp
17,3,Hardwater Coffee Co.,340 Bedford Ave,4.6,50,40.7128151,-73.9625743,0_gmaps
91,3,Hardwater Coffee,340 Bedford Ave,7.7,23,40.712761,-73.962435,2_foursquare
20,4,Partners Coffee,125 N 6th St,4.5,1070,40.7180107,-73.9591725,0_gmaps
62,4,Partners Coffee,125 N 6th St,4.0,470,40.71796,-73.95921,1_yelp


In [547]:
# group by clusters
cluster_df = venues_df.groupby('cluster')[['name', 'address', 'lat', 'lng', 'source']] \
    .first() \
    .reset_index()
cluster_df

Unnamed: 0,cluster,name,address,lat,lng,source
0,0,Charter Coffeehouse,309 Graham Ave #1,40.7133662,-73.94438269999999,0_gmaps
1,1,Sweatshop,232 Metropolitan Ave,40.7149294,-73.95978629999999,0_gmaps
2,2,Café Beit,158 Bedford Ave,40.7186663,-73.9570576,0_gmaps
3,3,Hardwater Coffee Co.,340 Bedford Ave,40.7128151,-73.9625743,0_gmaps
4,4,Partners Coffee,125 N 6th St,40.7180107,-73.9591725,0_gmaps
5,5,Black Brick Coffee,300 Bedford Ave,40.7141914,-73.9616546,0_gmaps
6,6,Abracadabra Brooklyn,347 Bedford Ave,40.7121258,-73.9626135,0_gmaps
7,7,Black Star Bakery&Cafe,595 Metropolitan Ave,40.7143016,-73.9490829,0_gmaps
8,8,Weekends,155 S 4th St,40.7116441,-73.961944,0_gmaps
9,9,Variety Coffee Roasters,368 Graham Ave,40.7154317,-73.94438629999999,0_gmaps


In [548]:
# make markers on clusters
# add ratings on all rows

markers = [(float(row.lat), float(row.lng)) for row in cluster_df.itertuples()]
marker_hover = ["%s" % (row.name) for row in cluster_df.itertuples()]

# make a dict by cluster, initialize rating string to ''
marker_dict = {i: {'name': row['name'],
                   'address': row['address'],
                   'lat': row['lat'],
                   'lng': row['lng'],
                   'rate_str': '',
                  } for i, row in cluster_df.iterrows()}

# add all rating strings
for i, row in venues_df.iterrows():
    cluster = row['cluster']
    if row.source[2:] == 'gmaps':
        marker_dict[cluster]['rate_str'] += "<dt>Google rating</dt><dd>%s (%s reviews)</dd>\n" % (row.rating, row.nratings)
    elif row.source[2:] == 'yelp':
        marker_dict[cluster]['rate_str'] += "<dt>Yelp rating</dt><dd>%s (%s reviews)</dd>\n" % (row.rating, row.nratings)
    elif row.source[2:] == 'foursquare':
        marker_dict[cluster]['rate_str'] += "<dt>Foursquare rating</dt><dd>%s (%s reviews)</dd>\n" % (row.rating, row.nratings)
    
info_box_template = """
<dl>
<dt>Name</dt><dd>{name}</dd>
<dt>Address</dt><dd>{address}</dd>
{rate_str}

</dl>
"""    

marker_info = [info_box_template.format(**d_item) for i, d_item in marker_dict.items()]

marker_layer = gmaps.marker_layer(markers, hover_text=marker_hover, info_box_content=marker_info)

figure_layout = {
    'width': '800px',
    'height': '800px',
    'border': '1px solid black',
    'padding': '1px'
}

fig = gmaps.figure(layout=figure_layout, center=location_coords, zoom_level=14)
fig.add_layer(marker_layer)
fig


Figure(layout=FigureLayout(border='1px solid black', height='800px', padding='1px', width='800px'))

In [549]:
folium_markers = [(a[0], a[1], b) for a, b in zip(markers, marker_info)]

venues_map = folium.Map(location=[*location_coords], zoom_start=14)
for lat, lng, label in folium_markers:
    folium.CircleMarker(
        [lat, lng],
        radius=8,
        color='blue',
        tooltip=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.5
    ).add_to(venues_map)
venues_map


In [550]:
venues_df

Unnamed: 0,cluster,name,address,rating,nratings,lat,lng,source
9,0,Charter Coffeehouse,309 Graham Ave #1,4.7,178,40.7133662,-73.94438269999999,0_gmaps
86,0,Charter Coffeehouse,309 Graham Ave,8.6,139,40.71327271843125,-73.94433975219727,2_foursquare
12,1,Sweatshop,232 Metropolitan Ave,4.6,302,40.7149294,-73.95978629999999,0_gmaps
52,1,Sweatshop,232 Metropolitan Ave,4.5,351,40.7149542040034,-73.959786,1_yelp
13,2,Café Beit,158 Bedford Ave,4.6,249,40.7186663,-73.9570576,0_gmaps
57,2,Café Beit,158 Bedford Ave,4.5,88,40.718678,-73.9571094,1_yelp
17,3,Hardwater Coffee Co.,340 Bedford Ave,4.6,50,40.7128151,-73.9625743,0_gmaps
91,3,Hardwater Coffee,340 Bedford Ave,7.7,23,40.712761,-73.962435,2_foursquare
20,4,Partners Coffee,125 N 6th St,4.5,1070,40.7180107,-73.9591725,0_gmaps
62,4,Partners Coffee,125 N 6th St,4.0,470,40.71796,-73.95921,1_yelp


In [551]:
merge_df = cluster_df \
    .merge(venues_df.loc[venues_df['source']=='0_gmaps'][['cluster','rating', 'nratings']], on='cluster', how='outer') \
    .rename(columns={'rating': 'gmaps_rating', 'nratings': 'gmaps_nratings'})
merge_df['gmaps_rating_std'] = StandardScaler().fit_transform(merge_df[['gmaps_rating']])

merge_df = merge_df \
    .merge(venues_df.loc[venues_df['source']=='1_yelp'][['cluster','rating', 'nratings']], on='cluster', how='outer') \
    .rename(columns={'rating': 'yelp_rating', 'nratings': 'yelp_nratings'})
merge_df['yelp_rating_std'] = StandardScaler().fit_transform(merge_df[['yelp_rating']])

merge_df = merge_df \
    .merge(venues_df.loc[venues_df['source']=='2_foursquare'][['cluster','rating', 'nratings']], on='cluster', how='outer') \
    .rename(columns={'rating': 'foursquare_rating', 'nratings': 'foursquare_nratings'})
merge_df['foursquare_rating_std'] = StandardScaler().fit_transform(merge_df[['foursquare_rating']])

merge_df 

Unnamed: 0,cluster,name,address,lat,lng,source,gmaps_rating,gmaps_nratings,gmaps_rating_std,yelp_rating,yelp_nratings,yelp_rating_std,foursquare_rating,foursquare_nratings,foursquare_rating_std
0,0,Charter Coffeehouse,309 Graham Ave #1,40.7133662,-73.94438269999999,0_gmaps,4.7,178.0,0.767366,,,,8.6,139.0,0.494309
1,1,Sweatshop,232 Metropolitan Ave,40.7149294,-73.95978629999999,0_gmaps,4.6,302.0,0.472225,4.5,351.0,0.897942,,,
2,2,Café Beit,158 Bedford Ave,40.7186663,-73.9570576,0_gmaps,4.6,249.0,0.472225,4.5,88.0,0.897942,,,
3,3,Hardwater Coffee Co.,340 Bedford Ave,40.7128151,-73.9625743,0_gmaps,4.6,50.0,0.472225,,,,7.7,23.0,-0.666243
4,4,Partners Coffee,125 N 6th St,40.7180107,-73.9591725,0_gmaps,4.5,1070.0,0.177084,4.0,470.0,0.028061,8.9,2085.0,0.88116
5,5,Black Brick Coffee,300 Bedford Ave,40.7141914,-73.9616546,0_gmaps,4.5,342.0,0.177084,4.0,317.0,0.028061,,,
6,6,Abracadabra Brooklyn,347 Bedford Ave,40.7121258,-73.9626135,0_gmaps,4.5,255.0,0.177084,4.0,151.0,0.028061,,,
7,7,Black Star Bakery&Cafe,595 Metropolitan Ave,40.7143016,-73.9490829,0_gmaps,4.5,136.0,0.177084,4.5,106.0,0.897942,,,
8,8,Weekends,155 S 4th St,40.7116441,-73.961944,0_gmaps,4.5,64.0,0.177084,4.5,38.0,0.897942,,,
9,9,Variety Coffee Roasters,368 Graham Ave,40.7154317,-73.94438629999999,0_gmaps,4.4,226.0,-0.118056,,,,8.6,592.0,0.494309


In [552]:
merge_df['distance'] = merge_df.apply(lambda row: distance((row['lat'], row['lng']), location_coords).km,
                                      axis=1)
merge_df


Unnamed: 0,cluster,name,address,lat,lng,source,gmaps_rating,gmaps_nratings,gmaps_rating_std,yelp_rating,yelp_nratings,yelp_rating_std,foursquare_rating,foursquare_nratings,foursquare_rating_std,distance
0,0,Charter Coffeehouse,309 Graham Ave #1,40.7133662,-73.94438269999999,0_gmaps,4.7,178.0,0.767366,,,,8.6,139.0,0.494309,0.933589
1,1,Sweatshop,232 Metropolitan Ave,40.7149294,-73.95978629999999,0_gmaps,4.6,302.0,0.472225,4.5,351.0,0.897942,,,,0.37951
2,2,Café Beit,158 Bedford Ave,40.7186663,-73.9570576,0_gmaps,4.6,249.0,0.472225,4.5,88.0,0.897942,,,,0.489101
3,3,Hardwater Coffee Co.,340 Bedford Ave,40.7128151,-73.9625743,0_gmaps,4.6,50.0,0.472225,,,,7.7,23.0,-0.666243,0.638241
4,4,Partners Coffee,125 N 6th St,40.7180107,-73.9591725,0_gmaps,4.5,1070.0,0.177084,4.0,470.0,0.028061,8.9,2085.0,0.88116,0.510299
5,5,Black Brick Coffee,300 Bedford Ave,40.7141914,-73.9616546,0_gmaps,4.5,342.0,0.177084,4.0,317.0,0.028061,,,,0.534634
6,6,Abracadabra Brooklyn,347 Bedford Ave,40.7121258,-73.9626135,0_gmaps,4.5,255.0,0.177084,4.0,151.0,0.028061,,,,0.667275
7,7,Black Star Bakery&Cafe,595 Metropolitan Ave,40.7143016,-73.9490829,0_gmaps,4.5,136.0,0.177084,4.5,106.0,0.897942,,,,0.528776
8,8,Weekends,155 S 4th St,40.7116441,-73.961944,0_gmaps,4.5,64.0,0.177084,4.5,38.0,0.897942,,,,0.639921
9,9,Variety Coffee Roasters,368 Graham Ave,40.7154317,-73.94438629999999,0_gmaps,4.4,226.0,-0.118056,,,,8.6,592.0,0.494309,0.931581


In [553]:
# simple average score
merge_df['meanrating'] = np.nanmean(merge_df[['gmaps_rating_std', 'yelp_rating_std', 'foursquare_rating_std']], axis=1)
merge_df.sort_values('meanrating', ascending=False)[['name', 'address', 'gmaps_rating', 'yelp_rating', 'foursquare_rating', 'meanrating']]


Unnamed: 0,name,address,gmaps_rating,yelp_rating,foursquare_rating,meanrating
51,Ceremonia Bakeshop,743 Driggs Ave,,5.0,,1.767823
18,Copper Mug Coffee,131 N 4th St,5.0,,,1.652788
19,Pueblo Querido Coffee Roasters Cafe de Colombia,34 N 6th St,5.0,,,1.652788
22,Eleva Coffee Grand Street,649 Grand St,4.9,,,1.357647
23,Homecoming,92 Berry St,4.9,,,1.357647
21,Crema BK,182 Driggs Ave,4.9,,,1.357647
20,Lella Alimentari,325 Manhattan Ave,4.9,,,1.357647
69,Blue Bottle Coffee,76 N 4 St,,,9.1,1.139061
25,COTTER BARBER/COTTER COFFEE,321 Graham Ave,4.8,,,1.062506
24,19 Café,84 Manhattan Ave,4.8,,,1.062506


In [554]:
# bayes score
rating_cols = ['gmaps_rating_std', 'yelp_rating_std', 'foursquare_rating_std']
merge_df['nratings'] = merge_df[rating_cols].count(axis=1)
nratings_mean = np.mean(merge_df['nratings'])
rating_avg = np.nanmean(merge_df[rating_cols])
merge_df['w'] = merge_df['nratings']/(merge_df['nratings'] + nratings_mean)
merge_df['R'] = np.mean(merge_df[rating_cols], axis=1)
print('mean number of ratings', nratings_mean)
print('average rating', rating_avg)
merge_df['bayes_score'] = merge_df['w'] * merge_df['R'] + (1 - merge_df['w']) * rating_avg

show_grid(merge_df.sort_values('bayes_score', ascending=False)[['name', 'address', 'distance', 'gmaps_rating', 'yelp_rating', 'foursquare_rating', 'nratings', 'bayes_score']])

mean number of ratings 1.273972602739726
average rating -6.303201688194437e-16


QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [572]:
def dedupe(dedupe_list):

    for i, source_df in enumerate(dedupe_list):
        source_df['source'] = i
    venues_df = pd.concat(dedupe_list).reset_index()
    venues_df['latlong'] = venues_df[['lat','lng']].apply(tuple, axis=1)
    venues_df['shortname'] = venues_df['name'].apply(lambda n: n[:25])

    # dedupe and assign cluster id
    venues_df2 = pandas_dedupe.dedupe_dataframe(venues_df, ['shortname', 'address', ('latlong', 'LatLong')])
    venues_df['cluster'] = venues_df2['cluster id']
    venues_df = venues_df.sort_values(['cluster', 'source'])[['cluster', 'name', 'address', 'rating', 'nratings', 'lat', 'lng', 'distance', 'source']]

    # group by clusters, uniquify name
    cluster_df = venues_df.groupby('cluster')[['name', 'address', 'lat', 'lng', 'source']] \
                          .first() \
                          .reset_index()

    # merge ratings by source
    merge_df = cluster_df \
        .merge(venues_df.loc[venues_df['source']=='0'][['cluster','rating', 'nratings']], on='cluster', how='outer') \
        .rename(columns={'rating': 'gmaps_rating', 'nratings': 'gmaps_nratings'})
    merge_df['gmaps_rating_std'] = StandardScaler().fit_transform(merge_df[['gmaps_rating']])

    merge_df = merge_df \
        .merge(venues_df.loc[venues_df['source']=='1'][['cluster','rating', 'nratings']], on='cluster', how='outer') \
        .rename(columns={'rating': 'yelp_rating', 'nratings': 'yelp_nratings'})
    merge_df['yelp_rating_std'] = StandardScaler().fit_transform(merge_df[['yelp_rating']])

    merge_df = merge_df \
        .merge(venues_df.loc[venues_df['source']=='2'][['cluster','rating', 'nratings']], on='cluster', how='outer') \
        .rename(columns={'rating': 'foursquare_rating', 'nratings': 'foursquare_nratings'})
    merge_df['foursquare_rating_std'] = StandardScaler().fit_transform(merge_df[['foursquare_rating']])

    # bayes score
    rating_cols = ['gmaps_rating_std', 'yelp_rating_std', 'foursquare_rating_std']
    merge_df['nratings'] = merge_df[rating_cols].count(axis=1)
    nratings_mean = np.mean(merge_df['nratings'])
    rating_avg = np.nanmean(merge_df[rating_cols])
    merge_df['w'] = merge_df['nratings']/(merge_df['nratings'] + nratings_mean)
    merge_df['R'] = np.mean(merge_df[rating_cols], axis=1)
    merge_df['bayes_score'] = merge_df['w'] * merge_df['R'] + (1 - merge_df['w']) * rating_avg
    merge_df = merge_df.sort_values('bayes_score', ascending=False)
    return merge_df

    
dedupe_list = list(filter(lambda df: df is not None, [gmaps_df, yelp_df, foursquare_df]))

dedupe_df = dedupe(dedupe_list)
dedupe_df

Importing data ...
Reading from dedupe_dataframe_learned_settings
Clustering...
# duplicate sets 73


Unnamed: 0,cluster,name,address,lat,lng,source,gmaps_rating,gmaps_nratings,gmaps_rating_std,yelp_rating,yelp_nratings,yelp_rating_std,foursquare_rating,foursquare_nratings,foursquare_rating_std,nratings,w,R,bayes_score
51,51,Ceremonia Bakeshop,743 Driggs Ave,40.7125,-73.96056,1,,,,5.0,66.0,1.767823,,,,1,0.439759,1.767823,0.777416
19,19,Pueblo Querido Coffee Roasters Cafe de Colombia,34 N 6th St,40.719994,-73.9629401,0,5.0,33.0,1.652788,,,,,,,1,0.439759,1.652788,0.726828
18,18,Copper Mug Coffee,131 N 4th St,40.7165423,-73.9597969,0,5.0,88.0,1.652788,,,,,,,1,0.439759,1.652788,0.726828
20,20,Lella Alimentari,325 Manhattan Ave,40.7151571,-73.9465147,0,4.9,285.0,1.357647,,,,,,,1,0.439759,1.357647,0.597038
22,22,Eleva Coffee Grand Street,649 Grand St,40.71157729999999,-73.9463311,0,4.9,65.0,1.357647,,,,,,,1,0.439759,1.357647,0.597038
23,23,Homecoming,92 Berry St,40.7194665,-73.9583651,0,4.9,29.0,1.357647,,,,,,,1,0.439759,1.357647,0.597038
21,21,Crema BK,182 Driggs Ave,40.723104,-73.945775,0,4.9,74.0,1.357647,,,,,,,1,0.439759,1.357647,0.597038
69,69,Blue Bottle Coffee,76 N 4 St,40.716913,-73.961039,2,,,,,,,9.1,83.0,1.139061,1,0.439759,1.139061,0.500912
25,25,COTTER BARBER/COTTER COFFEE,321 Graham Ave,40.7139015,-73.94449329999999,0,4.8,60.0,1.062506,,,,,,,1,0.439759,1.062506,0.467247
24,24,19 Café,84 Manhattan Ave,40.705206,-73.94444639999999,0,4.8,136.0,1.062506,,,,,,,1,0.439759,1.062506,0.467247


In [556]:
dedupe_df.to_pickle(test_pickle_filename)
pd.read_pickle(test_pickle_filename)

Unnamed: 0,cluster,name,address,lat,lng,source,gmaps_rating,gmaps_nratings,gmaps_rating_std,yelp_rating,yelp_nratings,yelp_rating_std,foursquare_rating,foursquare_nratings,foursquare_rating_std,nratings,w,R,bayes_score
51,51,Ceremonia Bakeshop,743 Driggs Ave,40.7125,-73.96056,1,,,,5.0,66.0,1.767823,,,,1,0.439759,1.767823,0.777416
19,19,Pueblo Querido Coffee Roasters Cafe de Colombia,34 N 6th St,40.719994,-73.9629401,0,5.0,33.0,1.652788,,,,,,,1,0.439759,1.652788,0.726828
18,18,Copper Mug Coffee,131 N 4th St,40.7165423,-73.9597969,0,5.0,88.0,1.652788,,,,,,,1,0.439759,1.652788,0.726828
20,20,Lella Alimentari,325 Manhattan Ave,40.7151571,-73.9465147,0,4.9,285.0,1.357647,,,,,,,1,0.439759,1.357647,0.597038
22,22,Eleva Coffee Grand Street,649 Grand St,40.71157729999999,-73.9463311,0,4.9,65.0,1.357647,,,,,,,1,0.439759,1.357647,0.597038
23,23,Homecoming,92 Berry St,40.7194665,-73.9583651,0,4.9,29.0,1.357647,,,,,,,1,0.439759,1.357647,0.597038
21,21,Crema BK,182 Driggs Ave,40.723104,-73.945775,0,4.9,74.0,1.357647,,,,,,,1,0.439759,1.357647,0.597038
69,69,Blue Bottle Coffee,76 N 4 St,40.716913,-73.961039,2,,,,,,,9.1,83.0,1.139061,1,0.439759,1.139061,0.500912
25,25,COTTER BARBER/COTTER COFFEE,321 Graham Ave,40.7139015,-73.94449329999999,0,4.8,60.0,1.062506,,,,,,,1,0.439759,1.062506,0.467247
24,24,19 Café,84 Manhattan Ave,40.705206,-73.94444639999999,0,4.8,136.0,1.062506,,,,,,,1,0.439759,1.062506,0.467247


In [569]:
for k, l in product(['pizza', 'icecream'], ['midtown','downtown','uppereastside','upperwestside','brooklynheights','grandarmyplaza','bayridge','williamsburg',]):
    filename = "%s_%s.pkl" % (k, l)
    z = pd.read_pickle(filename)
    z['distance'] = z.apply(lambda row: distance((row['lat'], row['lng']), location_coords).km,
                            axis=1)
    z.to_pickle(filename)



In [568]:
z

Unnamed: 0,cluster,name,address,lat,lng,source,gmaps_rating,gmaps_nratings,gmaps_rating_std,yelp_rating,yelp_nratings,yelp_rating_std,foursquare_rating,foursquare_nratings,foursquare_rating_std,nratings,w,R,bayes_score,distance
0,0,Tipsy Scoop,270 Metropolitan Ave,40.71455,-73.95855879999999,0,4.8,150.0,1.229837,4.5,55.0,0.842927,,,,2,0.566667,1.036382,0.587283,0.272387
1,1,Taiyaki NYC - Williamsburg,294 Bedford Ave,40.7144025,-73.9615795,0,4.6,253.0,0.559017,4.5,200.0,0.842927,,,,2,0.566667,0.700972,0.397218,0.527489
7,7,Gelateria Gentile - Williamsburg,253 Wythe Ave,40.7161839,-73.9637758,0,4.7,374.0,0.894427,,,,,,,1,0.395349,0.894427,0.353611,0.738247
9,9,Wowfulls,90 Kent Ave,40.721231,-73.962162,1,,,,4.5,327.0,0.842927,,,,1,0.395349,0.842927,0.33325,0.947486
10,10,Kitsby,186 Grand St,40.71411,-73.960977,1,,,,4.5,145.0,0.842927,,,,1,0.395349,0.842927,0.33325,0.478131
11,11,Roll'n Chill,90 Kent Ave,40.7213689,-73.9607664,1,,,,4.5,44.0,0.842927,,,,1,0.395349,0.842927,0.33325,0.893813
4,4,Van Leeuwen Ice Cream,204 Wythe Ave,40.7183814,-73.9618455,0,4.4,707.0,-0.111803,4.0,229.0,0.234146,9.1,458.0,1.38675,3,0.662338,0.503031,0.333176,0.701386
2,2,Van Leeuwen Ice Cream,620 Manhattan Ave,40.723595,-73.9505166,0,4.5,349.0,0.223607,4.0,342.0,0.234146,,,,2,0.566667,0.228877,0.129697,1.093051
6,6,Davey's Ice Cream,201 Bedford Ave,40.7169737,-73.9583533,0,4.3,151.0,-0.447214,,,,8.1,140.0,0.46225,2,0.566667,0.007518,0.00426,0.377899
3,3,Blossom ice cream and the poke bowl,54 N 6th St,40.71947530000001,-73.9622244,0,4.5,130.0,0.223607,4.0,88.0,0.234146,6.9,45.0,-0.64715,3,0.662338,-0.063132,-0.041815,0.80542
