In [1]:
import Google_business_search
import os
import pandas as pd

# a table copied from [https://www.baruch.cuny.edu/nycdata/population-geography/neighborhoods.htm]
def get_list():
    neighborhood_table = pd.read_csv(os.getcwd()+'/NYC_neighborhood.csv', keep_default_na=False)

    # add borough name to neighborhood name and return as a flat list
    neighborhood_table = neighborhood_table.apply(lambda x: x+','+x.name)
    neighborhood_list = neighborhood_table.values.flatten()
    neighborhood_list = list(filter(lambda x: not x.startswith(','), neighborhood_list))
    return neighborhood_list

# will be used later to calculate how many of the search results overlap
def count_duplicates(str_list): 

    '''takes a list of strings and
    returns the number of duplicate elements'''

    return len(str_list) - len(set(str_list))

In [2]:
# variables to query
neighborhood_list = get_list()
radii = [50, 200 , 500, 1000, 2500, 5000]

# randomly choose 10% of neighborhoods so this doesn't take forever
import random, math
test_list = random.sample(neighborhood_list, math.ceil(len(neighborhood_list)*.1))

In [None]:
result_list = []
names_list = []
percent_overlap = []
unique_names = []
for radius in radii:
    for neighborhood in test_list:
        search_results = Google_business_search.get_google_places_by_location('restaurant', 'halal', location_name=neighborhood, radius= str(radius))
        print('Done: query with radius='+str(radius)+' in '+neighborhood)
        names = [item[0] for item in search_results]
        names_list.append(names)
    flat_names_list = [name for names in names_list for name in names]
    percent_overlap.append(count_duplicates(flat_names_list)/len(flat_names_list))
    unique_names.append(len( set(flat_names_list)))

Done: query with radius=50 in Murray Hill,Manhattan
Done: query with radius=50 in Sandy Ground,Staten Island
Done: query with radius=50 in Astoria,Queens
Done: query with radius=50 in Rockaway Park,Queens
Done: query with radius=50 in Elm Park,Staten Island
Done: query with radius=50 in Fordham,Bronx
Done: query with radius=50 in Steinway,Queens
Done: query with radius=50 in Farragut,Brooklyn
Done: query with radius=50 in New Springville,Staten Island
Done: query with radius=50 in Arlington,Staten Island
Done: query with radius=50 in Ditmas Park,Brooklyn
Done: query with radius=50 in Forest Hills,Queens
Done: query with radius=50 in Grymes Hill,Staten Island
Done: query with radius=50 in Homecrest,Brooklyn
Done: query with radius=50 in Wingate,Brooklyn
Done: query with radius=50 in Flatlands,Brooklyn
Done: query with radius=50 in Inwood,Manhattan
Done: query with radius=50 in Downtown,Brooklyn
Done: query with radius=50 in South Side,Brooklyn
Done: query with radius=50 in Bloomfield,St

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns
# plot
sns.set(style="white", rc={"lines.linewidth": 3})
fig, ax1 = plt.subplots(figsize=(10,10))
ax2 = ax1.twinx()
sns.lineplot(x=radii,
            y=unique_names, 
            color='#004488',
            ax=ax1)
sns.lineplot(x=radii, 
             y=percent_overlap,
             color='r',
             marker="o",
             ax=ax2)
plt.show()
sns.set()