In [1]:
import json
import math
import os
import sys
import typing as T

import dotenv
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from geopy.geocoders import Nominatim
from geopy.distance import geodesic

CURRENT_DIR = %pwd
ROOT_DIR = os.path.dirname(CURRENT_DIR)

SRC_DIR = os.path.join(ROOT_DIR, 'src')
sys.path.append(SRC_DIR)

DATA_DIR = os.path.join(CURRENT_DIR, "data")

dotenv.load_dotenv()

API_KEY = os.getenv("GOOGLE_MAPS_PLACES_API_KEY")

assert API_KEY, "API_KEY not found in environment variables"

METERS_PER_MILE = 1609.34
METERS_PER_KILOMETER = 1000.0
COST_PER_SEARCH = 0.04
MAX_COST_PER_CITY = 700.0

TGTG_DATA_CSV = os.path.join(DATA_DIR, "tgtg_data.csv")

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
from search_context.google_places import GooglePlacesAPI
from search_context.util import (
    calculate_cost_from_results,
    extract_city,
    get_city_center_coordinates,
    get_search_grid_details,
    get_viewport,
)

px.set_mapbox_access_token(os.getenv("MAPBOX_API_KEY"))
google_places = GooglePlacesAPI(API_KEY, verbose=True)

In [3]:
LOCATIONS = {
    "Portland": {
        "latitude": 45.5152,
        "longitude": -122.676483,
    },
    "New York": {
        "latitude": 40.71,
        "longitude": -74.01,
    },
}

ADVANCED_FIELDS = [
    "places.formattedAddress",
    "places.displayName",
    "places.nationalPhoneNumber",
    "places.location",
    "places.rating",
    "places.googleMapsUri",
    "places.websiteUri",
    "places.regularOpeningHours",
    "places.businessStatus",
    "places.priceLevel",
    "places.userRatingCount",
    "places.takeout",
    "places.delivery",
    "places.dineIn",
    "places.servesBreakfast",
    "places.primaryTypeDisplayName",
    "places.primaryType",
    "places.editorialSummary",
    "places.outdoorSeating",
    "places.servesCoffee",
    "places.paymentOptions",
    "places.accessibilityOptions",
]

TYPES = [
    "bakery",
    "sandwich_shop",
    "coffee_shop",
    "cafe",
    "fast_food_restaurant",
    "store",
    "restaurant",
    "food",
    "point_of_interest",
    "establishment",
]
ADVANCED_PROMPT = f"All {'s, '.join([t for t in TYPES])}"

In [4]:
def save_json(filename, prompt: str, fields: T.List[str], data: T.Dict[str, T.Any]):
    if "places" not in data:
        print("No places found")
        print(f"Data: {json.dumps(data, indent=2)}")
        return

    print(f"Number of results: {len(results['places'])}")

    filename = os.path.join(DATA_DIR, filename)

    print(f"Saving to {filename}")

    with open(filename, "w") as f:
        data = {"prompt": prompt, "fields": fields, "results": results}
        json.dump(data, f, indent=2)

In [5]:
radius_miles = 20.0
radius_meters = radius_miles * METERS_PER_MILE
scale_factor = 0.5

# Load the data from the CSV
df = pd.read_csv(TGTG_DATA_CSV)

# Create a summary of the stores
store_summary = (
    df.groupby("store_name")
    .agg(
        count=("store_name", "size"),
        latitude=("pickup_location:location:latitude", "first"),
        longitude=("pickup_location:location:longitude", "first"),
    )
    .reset_index()
)
# Add a marker size based on the count and a normalized count for color scaling
store_summary["marker_size"] = store_summary["count"].apply(
    lambda count: max(math.sqrt(count) * scale_factor, 1)
)
store_summary["normalized_count"] = store_summary["count"] / store_summary["count"].max()


# Get the first store's location and dynamically determine the city we are
# working with and the city center coordinates
city = None
store_index = 0
while not city and store_index < len(store_summary):
    first_store_latitude = store_summary["latitude"].iloc[store_index]
    first_store_longitude = store_summary["longitude"].iloc[store_index]

    geolocator = Nominatim(user_agent="tgtg")
    location = geolocator.reverse((first_store_latitude, first_store_longitude), exactly_one=True)
    city = extract_city(location.address)
    store_index += 1

print(f"Latitude: {first_store_latitude}")
print(f"Longitude: {first_store_longitude}")
print(f"City: {city}")

920, Providence Highway, Dedham, Norfolk County, Massachusetts, 02026, United States
Latitude: 42.2338356
Longitude: -71.1784924
City: Dedham


In [6]:
city = "Boston"

city_center_coordinates = get_city_center_coordinates(city)
assert city_center_coordinates, f"Location not found for {city}"

center_lat, center_lon = city_center_coordinates

# Get the maximum width of the viewport for our search to have good resolution
# since places api limits the search results to 20 max regardless of the radius
print(f"City center: {city_center_coordinates}")
print("Finding maximum viewpoint width")
max_grid_resolution_width_meters = google_places.find_maximum_viewpoint_width(
    center_lat, center_lon, "All restaurants"
)

grid_df, city_center_coordinates, num_grid_squares, total_cost = get_search_grid_details(
    city, max_grid_resolution_width_meters, radius_meters, MAX_COST_PER_CITY, COST_PER_SEARCH
)

# Create a map showing the grid and the stores to visually inspect the coverage
fig = go.Figure(
    go.Scattermapbox(
        lat=grid_df["latitude"],
        lon=grid_df["longitude"],
        mode="markers",
        marker=go.scattermapbox.Marker(
            size=2,
            color="blue",
        ),
    )
)

fig.add_trace(
    go.Scattermapbox(
        lat=store_summary["latitude"],
        lon=store_summary["longitude"],
        mode="markers",
        marker=go.scattermapbox.Marker(
            size=store_summary["marker_size"],
            color=store_summary["normalized_count"],
            colorscale="viridis",
            showscale=True,
            cmin=0,
            cmax=1,
        ),
        text=store_summary["store_name"],  # Optionally, set hover text to store name
    )
)

subtext = (
    f"{radius_miles:.1f}mi radius, "
    f"{max_grid_resolution_width_meters}m blocks, {num_grid_squares} blocks"
)
fig.update_layout(
    autosize=True,
    hovermode="closest",
    mapbox=go.layout.Mapbox(
        accesstoken=os.getenv("MAPBOX_API_KEY"),
        bearing=0,
        center=go.layout.mapbox.Center(lat=center_lat, lon=center_lon),
        pitch=0,
        zoom=9,
    ),
    height=800,
    width=1000,
    title_text=f"Search Grid of {city} [~${total_cost:.2f}]",
    title_x=0.5,
    annotations=[
        dict(
            text=subtext,
            showarrow=False,
            xref="paper",
            yref="paper",
            x=0.5,
            y=0.98,
            xanchor="center",
            yanchor="top",
            font=dict(size=14, color="red"),
        )
    ],
)

fig.show()

City center: (42.3554334, -71.060511)
Finding maximum viewpoint width


Found 20 results with viewpoint width 200.0 meters


Found 20 results with viewpoint width 200.0 meters


City center: (42.3554334, -71.060511)


City center: (42.3554334, -71.060511)


In [None]:
prompt = "All restaurants"
fields = ADVANCED_FIELDS
radius_miles = 0.5
# radius_meters = radius_miles * METERS_PER_MILE
radius_meters = 150.0
location = "New York"

latitude = LOCATIONS[location]["latitude"]
longitude = LOCATIONS[location]["longitude"]

rect_viewpoint = get_viewport(latitude, longitude, radius_meters)

data = {"locationRestriction": {"rectangle": rect_viewpoint}}
results = google_places.text_search(prompt, fields, data)
if "places" not in results:
    print("No results found")
else:
    filename = f"{latitude}_{longitude}_{radius_miles}_location_rect_viewpoint_search.json"
    save_json(filename, prompt, fields, results)

In [None]:
radius_miles = 0.5
latitude = 45.54
longitude = -122.98
prompt = ADVANCED_PROMPT
fields = ADVANCED_FIELDS

results = google_places.search_location_radius(
    latitude, longitude, radius_miles, prompt, ADVANCED_FIELDS
)
filename = f"{latitude}_{longitude}_{radius_miles}_location_radius_search.json"
save_json(filename, prompt, fields, results)

In [None]:
radius_miles = 1
location = "San Francisco"
prompt = f"All bakeries within {radius_miles} miles of the center of {location}"
fields = ["*"]

results = google_places.text_search(prompt, fields)
filename = f"{location}_{radius_miles}_text_search.json"
save_json(filename, prompt, fields, results)

In [None]:
radius_miles = 1
location = "San Francisco"
prompt = f"All bakeries within {radius_miles} miles of the center of {location}"
fields = ADVANCED_FIELDS

results = google_places.text_search(prompt, fields)
filename = f"{location}_{radius_miles}_text_search.json"
save_json(filename, prompt, fields, results)

In [None]:
prompt = "All restaurants"
fields = ADVANCED_FIELDS
data = {
    "locationBias": {
        "circle": {"center": {"latitude": 45.54, "longitude": -122.98}, "radius": 10.0}
    }
}
results = google_places.text_search(prompt, fields, data)
filename = f"{latitude}_{longitude}_{radius_miles}_location_radius_search.json"
save_json(filename, prompt, fields, results)