In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from geopy.distance import geodesic
import requests

In [None]:
API_KEY = ""

def get_place_types(query, latitude, longitude, radius=5000):
    """
    Fetch Google Place types for a specific business query (e.g., Walmart).
    """
    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    params = {
        "location": f"{latitude},{longitude}",
        "radius": radius,
        "keyword": query,
        "key": API_KEY
    }
    
    response = requests.get(url, params=params)
    data = response.json()

    # Print API response for debugging
    print("API Response:", data)

    if data.get("status") != "OK":
        print(f"Error: {data.get('error_message', 'Unknown error')}")
        return []

    if "results" in data:
        for place in data["results"]:
            print(f"Name: {place['name']}")
            print(f"Types: {place['types']}\n")

# Example usage: Find Walmart types in Atlanta


In [20]:
get_place_types("community_center", 33.78, -84.39)

# 33.7490, -84.3880
# {'location': {'lat': 33.7374337, 'lng': -84.4330913},

API Response: {'html_attributions': [], 'next_page_token': 'AeeoHcJ5rVYyN5L4o5VzpMoSedAiisC8H0pKtgrqfKQ7mp_vffC790IJOZQlpONnAG0l0-Cy1LPPz-L_VeD9jci_AHcIjxXjk45Kr9bpRKbChnI_JKq0BkSib2FTTH2rhB0cfriBtnrUt-5RgBsbwziRPF18sB1KBfSITi_l6v5KZUYfE87cB8CAKRBZQY5va58t3xTUa-Ot0oDU95fu1_2v3UBCmeHz40rjaYBAJxuRJuGwClfTFjCQ4c9XTpVElXPL3eGvmIL-fBRNaIJsZ6KCgp68mGAUziDsvIom9ngG7VXRhfsdmVq_kFZb1EDHj59tXbMn0pDsWEZSMr9FqT-zGfHFiSX1al3cCReWLHFSK7cc_ycb8qU1bnB8uMgWKej0nb3QAipI7W3ixU-hSvDEfbp9skbtC7j2DFqinTaRgvT5LuTANyGofWyoCDzX69Qgv1Qc', 'results': [{'geometry': {'location': {'lat': 33.7501275, 'lng': -84.3885209}, 'viewport': {'northeast': {'lat': 33.88682297372161, 'lng': -84.28956005348273}, 'southwest': {'lat': 33.64794596773328, 'lng': -84.55085397572094}}}, 'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/geocode-71.png', 'icon_background_color': '#7B9EB0', 'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/generic_pinlet', 'name': 'Atlanta', 'photos': [{'heig

In [24]:

def get_place_text(query, latitude, longitude, radius=5000):
    """
    Use Google Places API v1 (New) to search for nearby places by keyword.
    """
    url = "https://places.googleapis.com/v1/places:searchText"

    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": API_KEY,
        "X-Goog-FieldMask": (
            "places.displayName,"
            "places.formattedAddress,"
            "places.location,"
            "places.types"
        )
    }

    body = {
        "locationBias": {
            "circle": {
                "center": {
                    "latitude": latitude,
                    "longitude": longitude
                },
                "radius": radius
            }
        },
        "textQuery": query,
        "maxResultCount": 20  # Optional, max allowed per request
    }

    response = requests.post(url, headers=headers, json=body)
    data = response.json()

    # Print API response for debugging
    print("API Response:", data)

    if "places" not in data:
        print("No places found or error occurred.")
        return []

    for place in data["places"]:
        name = place.get("displayName", {}).get("text", "N/A")
        address = place.get("formattedAddress", "N/A")
        types = place.get("types", [])
        print(f"Name: {name}")
        print(f"Address: {address}")
        print(f"Types: {types}\n")

    return data["places"]

In [25]:
get_place_text("YMCA", 33.78, -84.39)


API Response: {'places': [{'types': ['gym', 'government_office', 'sports_activity_location', 'health', 'point_of_interest', 'establishment'], 'formattedAddress': '1160 Moores Mill Rd NW, Atlanta, GA 30327, USA', 'location': {'latitude': 33.8312738, 'longitude': -84.4251855}, 'displayName': {'text': 'Carl E. Sanders Family YMCA at Buckhead', 'languageCode': 'en'}}, {'types': ['preschool', 'child_care_agency', 'government_office', 'school', 'health', 'point_of_interest', 'establishment'], 'formattedAddress': '555 Luckie St NW, Atlanta, GA 30313, USA', 'location': {'latitude': 33.770347199999996, 'longitude': -84.3954736}, 'displayName': {'text': 'Arthur M. Blank Family Youth YMCA', 'languageCode': 'en'}}, {'types': ['gym', 'government_office', 'sports_activity_location', 'health', 'point_of_interest', 'establishment'], 'formattedAddress': '275 Eva Davis Way SE, Atlanta, GA 30317, USA', 'location': {'latitude': 33.7461736, 'longitude': -84.3150162}, 'displayName': {'text': 'East Lake Fami

[{'types': ['gym',
   'government_office',
   'sports_activity_location',
   'health',
   'point_of_interest',
   'establishment'],
  'formattedAddress': '1160 Moores Mill Rd NW, Atlanta, GA 30327, USA',
  'location': {'latitude': 33.8312738, 'longitude': -84.4251855},
  'displayName': {'text': 'Carl E. Sanders Family YMCA at Buckhead',
   'languageCode': 'en'}},
 {'types': ['preschool',
   'child_care_agency',
   'government_office',
   'school',
   'health',
   'point_of_interest',
   'establishment'],
  'formattedAddress': '555 Luckie St NW, Atlanta, GA 30313, USA',
  'location': {'latitude': 33.770347199999996, 'longitude': -84.3954736},
  'displayName': {'text': 'Arthur M. Blank Family Youth YMCA',
   'languageCode': 'en'}},
 {'types': ['gym',
   'government_office',
   'sports_activity_location',
   'health',
   'point_of_interest',
   'establishment'],
  'formattedAddress': '275 Eva Davis Way SE, Atlanta, GA 30317, USA',
  'location': {'latitude': 33.7461736, 'longitude': -84.31

In [12]:
def get_place_types_v1_by_type(place_type, latitude, longitude, radius=5000):
    url = "https://places.googleapis.com/v1/places:searchNearby"

    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": API_KEY,
        "X-Goog-FieldMask": (
            "places.displayName,"
            "places.formattedAddress,"
            "places.location,"
            "places.types"
        )
    }

    body = {
        "includedTypes": [place_type],  # Must be a valid place type
        "locationRestriction": {
            "circle": {
                "center": {
                    "latitude": latitude,
                    "longitude": longitude
                },
                "radius": radius
            }
        },
        "maxResultCount": 20
    }

    response = requests.post(url, headers=headers, json=body)
    data = response.json()

    print("API Response:", data)

    if "places" not in data:
        print("No places found or error occurred.")
        return []

    for place in data["places"]:
        name = place.get("displayName", {}).get("text", "N/A")
        address = place.get("formattedAddress", "N/A")
        types = place.get("types", [])
        print(f"Name: {name}")
        print(f"Address: {address}")
        print(f"Types: {types}\n")

    return data["places"]

In [27]:
get_place_types_v1_by_type("child_care_agency", 33.78, -84.39)

API Response: {'places': [{'types': ['department_store', 'shoe_store', 'child_care_agency', 'electronics_store', 'clothing_store', 'furniture_store', 'home_improvement_store', 'home_goods_store', 'store', 'point_of_interest', 'establishment'], 'formattedAddress': '375 18th St NW, Atlanta, GA 30363, USA', 'location': {'latitude': 33.7932856, 'longitude': -84.39933669999999}, 'displayName': {'text': 'Target', 'languageCode': 'en'}}, {'types': ['department_store', 'shoe_store', 'child_care_agency', 'electronics_store', 'clothing_store', 'furniture_store', 'home_improvement_store', 'home_goods_store', 'store', 'point_of_interest', 'establishment'], 'formattedAddress': '1275 Caroline St NE, Atlanta, GA 30307, USA', 'location': {'latitude': 33.7569541, 'longitude': -84.3463134}, 'displayName': {'text': 'Target', 'languageCode': 'en'}}, {'types': ['preschool', 'child_care_agency', 'church', 'place_of_worship', 'school', 'point_of_interest', 'establishment'], 'formattedAddress': '1328 Peachtre

[{'types': ['department_store',
   'shoe_store',
   'child_care_agency',
   'electronics_store',
   'clothing_store',
   'furniture_store',
   'home_improvement_store',
   'home_goods_store',
   'store',
   'point_of_interest',
   'establishment'],
  'formattedAddress': '375 18th St NW, Atlanta, GA 30363, USA',
  'location': {'latitude': 33.7932856, 'longitude': -84.39933669999999},
  'displayName': {'text': 'Target', 'languageCode': 'en'}},
 {'types': ['department_store',
   'shoe_store',
   'child_care_agency',
   'electronics_store',
   'clothing_store',
   'furniture_store',
   'home_improvement_store',
   'home_goods_store',
   'store',
   'point_of_interest',
   'establishment'],
  'formattedAddress': '1275 Caroline St NE, Atlanta, GA 30307, USA',
  'location': {'latitude': 33.7569541, 'longitude': -84.3463134},
  'displayName': {'text': 'Target', 'languageCode': 'en'}},
 {'types': ['preschool',
   'child_care_agency',
   'church',
   'place_of_worship',
   'school',
   'point_of

In [None]:
DESIRABLE_AMENITIES = {
    "national_big_box_store": {"group": 1, "google_type": "department_store", "name_contain": ["Walmart", "Target", "Costco", "BJ's", "Sam's Club"]}, 
    "retail_store": {"group": 2, "google_type": ["clothing_store", "home_goods_store"]},
    "grocery_store": {"group": 1, "google_type": ["grocery_or_supermarket", 'supermarket'], "type_not_contains": ['convience_store']}, 
    "restaurant": {"group": 2, "google_type": "retaurant"},
    "hospital": {"group": 1, "google_type": "hospital", "name_not_contains": ["Outpatient"]}, 
    "medical_clinic": {"group": 1, "google_type": "doctor", "name_contains": ["urgent care", "medical clinic", "immediate care", "physicians", "dentist"]}, 
    "pharmacy": {"group": 1, "google_type": "pharmacy"},
    "licensed_childcare": {"group": 1},
    "technical_college": {"group": 2},
    "school": {"group": 1, "google_type": ["primary_school", "secondary_school", "school"]},
    "town_square": {"group": 1, "google_type": ['city_hall', 'courthouse'] },
    "community_center": {"group": 1, "google_type": ["community_center", 'gym', 'pool']},
    "large_public_park": {"group": 1, "google_type": "park"},
    "small_public_park": {"group": 2, "google_type": "park"},
    "library": {"group": 1, "google_type": "library"},
    "fire_police_station": {"group": 2, "google_type": ["fire_station", "police"]},
    "bank": {"group": 2, "google_type": "bank"}, 
    "place_of_worship": {"group": 2, "google_type": "place_of_worship"},
    "post_office": {"group": 2, "google_type": "post_office"},
}

UNDESIRABLE_ACTIVITIES = {
    "auto_repair_station": {"google_type": "car_repair"},
    "commercial_livestock": {"google_type": "farm"},
    "excessive_light": {"google_type": ["casino", "stadium", 'night_club']},
    "excessive_noise": {'google_type': ['airport']}, 
    "laundry_facility": {"google_type": "laundry"},
    "gas_station": {"google_type": "gas_station"},
}


# Junk yards
# dumps
# landfill
# materials storage areas
# commercial livestock operations 
# Odor producing facilities 
# Potential or existing environmental hazards: 
#     - chemical activities
#     - heavy manufacturing activities
#     - Industrial development 
#     - Facilities listed in federal or state hazardous investory databases
#     - Gas stations with history of leaking underground storage tanks
#     - dry clearners with history of contamination
# Abandoned houses or buildings 
# Deteriorting houses or buildings
# Food deserts 

# """
# 2020_cdr_consumer_and_commercial_use_information
# - TRI Facility Name
# - Year
# - Census Tract
# - Latitude
# - Longitude 
# - Releases (lb)
# - Waste Managed (lb)
# - RSEI Hazard
# - # of TRI Facilities


# 2020_cdr_industrial_processing_and_use_information
# - CHEMICAL NAME
# - DOMESTIC PARENT COMPANY NAME
# - SITE NAME
# - SITE LATITUDE
# - SITE LONGITUDE

# food_access_research_atlas
# - CensusTract
# - Urban 
# - 

# Facilities_GA
# - Latitude
# - Longitude
# - Ope USTs


# """


# """
# Have
# EPA TRI Dataset 



# chemical_factory 
# industrial_facility
# waste_management
# hazardous_site

# food desert - USDA Food Access Research Atlas


# Dont have yet

# EPA RCRAInfo
#     junkyard
#     dump
#     landfill

# municipal code violation databases or tax assessor data
#     abandoned_building
#     deteriorated_housing 

# EPA FRS Database
#     gas stations with underground leakage 
#     dry cleaner contaminatio 

#     odor producing facilities

#     Floodplain, Wetlands, Soil Unsuitability - FEMA Flood Maps and USGS Soil Data
# """



In [44]:
def find_places_nearby(lat, lon, place_type, radius_meters=10000):
    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    params = {
        "location": f"{lat},{lon}",
        "radius": radius_meters,
        "type": place_type,
        "key":  MY_API_KEY
    }

    response = requests.get(url, params=params).json()
    results = response.get("results", [])

    places = []
    for r in results:
        p_lat = r["geometry"]["location"]["lat"]
        p_lon = r["geometry"]["location"]["lng"]
        name = r["name"]
        place_id = r.get("place_id", "")
        types = r.get("types", [])
        places.append({
            "name": name,
            "lat": p_lat,
            "lon": p_lon,
            "types": types,
            "place_id": place_id
        })
    return places

In [45]:
def filter_places(places, amenity_info):
    """
    Applies the custom filtering logic based on:
      - store_names (include only if name matches)
      - not_contains (exclude if substring appears in name)
      - contains (include only if substring appears in name)
    """
    filtered = places

    # 1) store_names => Keep only places with name containing any item in store_names
    store_names = amenity_info.get("store_names", [])
    if store_names:
        store_names_lower = [s.lower() for s in store_names]
        def matches_store_names(place):
            place_name_l = place["name"].lower()
            # Must match at least one
            return any(s in place_name_l for s in store_names_lower)
        filtered = list(filter(matches_store_names, filtered))

    # 2) not_contains => Exclude if place name contains any item in not_contains
    not_contains_list = amenity_info.get("not_contains", [])
    if not_contains_list:
        not_contains_lower = [x.lower() for x in not_contains_list]
        def exclude_if_contains(place):
            place_name_l = place["name"].lower()
            # If ANY forbidden substring is present, exclude
            return not any(x in place_name_l for x in not_contains_lower)
        filtered = list(filter(exclude_if_contains, filtered))

    # 3) contains => Keep only if place name contains at least one item in 'contains'
    contains_list = amenity_info.get("contains", [])
    if contains_list:
        contains_lower = [c.lower() for c in contains_list]
        def include_if_contains(place):
            place_name_l = place["name"].lower()
            # Include if ANY of the substrings is found
            return any(c in place_name_l for c in contains_lower)
        filtered = list(filter(include_if_contains, filtered))

    return filtered

In [46]:
def get_distance_matrix_distance(
    origin_lat, origin_lon,
    dest_lat, dest_lon,
    travel_mode="driving",
    api_key= MY_API_KEY
):
    """
    Returns the distance in meters for the specified travel mode (driving, walking, bicycling, transit).
    """
    url = "https://maps.googleapis.com/maps/api/distancematrix/json"
    params = {
        "origins": f"{origin_lat},{origin_lon}",
        "destinations": f"{dest_lat},{dest_lon}",
        "mode": travel_mode,    # "driving" or "walking", etc.
        "key": api_key
    }

    response = requests.get(url, params=params).json()
    print(response) 
    try:
        # Distance in meters
        distance_meters = response["rows"][0]["elements"][0]["distance"]["value"]
        return distance_meters
    except (KeyError, IndexError):
        # Handle the case where the response doesn't have the expected structure
        return None

In [48]:
if __name__ == "__main__":
    YOUR_API_KEY = MY_API_KEY

    origin_lat, origin_lon = 33.7490, -84.3880   
    dest_lat, dest_lon = 33.7374337, -84.4330913      

    driving_distance_m = get_distance_matrix_distance(
        origin_lat, origin_lon,
        dest_lat, dest_lon,
        travel_mode="driving",
        api_key=YOUR_API_KEY
    )

    walking_distance_m = get_distance_matrix_distance(
        origin_lat, origin_lon,
        dest_lat, dest_lon,
        travel_mode="walking",
        api_key=YOUR_API_KEY
    )

    if driving_distance_m is not None:
        print(f"Driving distance: {driving_distance_m} meters (~{driving_distance_m/1609.34:.2f} miles)")
    else:
        print("Could not retrieve driving distance.")

    if walking_distance_m is not None:
        print(f"Walking distance: {walking_distance_m} meters (~{walking_distance_m/1609.34:.2f} miles)")
    else:
        print("Could not retrieve walking distance.")

{'destination_addresses': ['590 Cascade Ave SW, Atlanta, GA 30310, USA'], 'origin_addresses': ['1530 Ridgewood Ln SW, Atlanta, GA 30311, USA'], 'rows': [{'elements': [{'distance': {'text': '5.9 km', 'value': 5888}, 'duration': {'text': '8 mins', 'value': 495}, 'status': 'OK'}]}], 'status': 'OK'}
{'destination_addresses': ['590 Cascade Ave SW, Atlanta, GA 30310, USA'], 'origin_addresses': ['1530 Ridgewood Ln SW, Atlanta, GA 30311, USA'], 'rows': [{'elements': [{'distance': {'text': '5.2 km', 'value': 5157}, 'duration': {'text': '1 hour 15 mins', 'value': 4481}, 'status': 'OK'}]}], 'status': 'OK'}
Driving distance: 5888 meters (~3.66 miles)
Walking distance: 5157 meters (~3.20 miles)


In [None]:
def assign_points(distance, group):
    """
    Dynamically assigns points based on amenity group and distance.
    """
    if group == 1:  
        if distance <= 0.5:
            return 2.5
        elif distance <= 1:
            return 2
        elif distance <= 1.5:
            return 1.5
    elif group == 2:  
        if distance <= 0.5:
            return 2
        elif distance <= 1:
            return 1.5
        elif distance <= 2.5:
            return 1
    return 0 

In [None]:
# Compute distance between two locations
def get_distance(lat1, lon1, lat2, lon2):
    return geodesic((lat1, lon1), (lat2, lon2)).miles

# Compute desirability score
def calculate_site_score(site_lat, site_lon, amenities_df, undesirable_df):
    total_points = 0
    deductions = 0

    for _, row in amenities_df.iterrows():
        amenity_type = row["amenity_type"]
        amenity_lat, amenity_lon = row["latitude"], row["longitude"]
        
        if amenity_type in DESIRABLE_AMENITIES:
            distance = get_distance(site_lat, site_lon, amenity_lat, amenity_lon)
            group = DESIRABLE_AMENITIES[amenity_type]["group"]
            points = assign_points(distance, group)
            total_points += points

    for _, row in undesirable_df.iterrows():
        undesired_lat, undesired_lon = row["latitude"], row["longitude"]
        distance = get_distance(site_lat, site_lon, undesired_lat, undesired_lon)
        if distance <= 0.25:
            deductions += 2

    final_score = max(0, total_points - deductions)
    return {"total_points": total_points, "deductions": deductions, "final_score": final_score}

# Example site coordinates
site_latitude = 33.7490
site_longitude = -84.3880

# Load datasets
amenities_df = pd.read_csv("../../data/processed/desirable_amenities.csv")
undesirable_df = pd.read_csv("../../data/processed/undesirable_activities.csv")

# Compute score
score_info = calculate_site_score(site_latitude, site_longitude, amenities_df, undesirable_df)

print("Total Points (Desirable):", score_info["total_points"])
print("Deductions (Undesirable):", score_info["deductions"])
print("Final Score:", score_info["final_score"])

In [31]:
QUERIES = [
    "community center",
    "recreation center",
    "YMCA",
    "Boys & Girls Club",
    "public pool",
    "senior center",
    "multipurpose facility"
]

In [32]:
MIN_LAT = 30.35
MAX_LAT = 35.00
MIN_LON = -85.60
MAX_LON = -80.75

# Step size for lat/lon grid
STEP = 0.15

# Radius in meters for each text search
RADIUS = 10000

In [35]:
import time

In [33]:
def text_search(query, lat, lon, api_key):
    """
    Perform a Places Text Search for a given query, centered at (lat, lon),
    with a fixed radius. This function returns ALL pages (up to 60 results).
    """
    all_results = []
    url = "https://maps.googleapis.com/maps/api/place/textsearch/json"

    # Initial parameters
    params = {
        "query": query,
        "location": f"{lat},{lon}",
        "radius": RADIUS,
        "key": api_key
    }

    while True:
        resp = requests.get(url, params=params)
        data = resp.json()

        if data.get("status") not in ("OK", "ZERO_RESULTS"):
            print(f"Warning: text_search got status={data.get('status')}. "
                  f"Error message: {data.get('error_message', '')}")
            break

        results = data.get("results", [])
        all_results.extend(results)

        # Check if there's more results to page through
        next_page_token = data.get("next_page_token")
        if not next_page_token:
            break

        # Google requires a short pause before using next_page_token
        time.sleep(2)
        # Set next_page_token for next iteration
        params["pagetoken"] = next_page_token

    return all_results

In [None]:
def main():
    # This dictionary will hold places keyed by place_id
    # to avoid duplicates across multiple queries and grid points.
    places_dict = {}

    # Loop over each query
    for query in QUERIES:
        print(f"\n=== Searching for: '{query}' ===\n")
        # Loop over lat/lon grid
        lat = MIN_LAT
        while lat <= MAX_LAT:
            lon = MIN_LON
            while lon <= MAX_LON:
                print(f" Searching around lat={lat:.2f}, lon={lon:.2f}")
                results = text_search(query, lat, lon, API_KEY)
                print(f"  -> Found {len(results)} results")

                for place in results:
                    pid = place["place_id"]
                    if pid not in places_dict:
                        # Extract relevant data
                        name = place.get("name")
                        geometry = place.get("geometry", {})
                        loc = geometry.get("location", {})
                        place_lat = loc.get("lat")
                        place_lon = loc.get("lng")

                        types = place.get("types", [])
                        vicinity = place.get("formatted_address") or place.get("vicinity", "")
                        business_status = place.get("business_status", "")

                        places_dict[pid] = {
                            "amenity_key": "community_center",
                            "place_id": pid,
                            "name": name,
                            "lat": place_lat,
                            "lon": place_lon,
                            "types": "|".join(types),  # store as pipe-separated
                            "vicinity": vicinity,
                            "business_status": business_status
                        }

                lon += STEP
            lat += STEP

    # Write out to CSV
    output_filename = "../../data/preprocessed/scoring_indicators/DesirableUndesirableActivities/community_centers_georgia.csv"
    fieldnames = [
        "amenity_key", 
        "place_id", 
        "name", 
        "lat", 
        "lon", 
        "types", 
        "vicinity", 
        "business_status"
    ]

    with open(output_filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for _, row in places_dict.items():
            writer.writerow(row)

    print(f"\nDone! Wrote {len(places_dict)} unique places to '{output_filename}'.\n")


if __name__ == "__main__":
    main()


=== Searching for: 'community center' ===

 Searching around lat=30.35, lon=-85.60
  -> Found 60 results
 Searching around lat=30.35, lon=-85.45
  -> Found 60 results
 Searching around lat=30.35, lon=-85.30


KeyboardInterrupt: 

In [None]:
import os
import csv
import json
import time
import requests
import json
import os
from json import JSONDecodeError

# API_KEY = "YOUR_API_KEY"  # Replace with your Places API key

# Text queries for "community or recreation centers"
QUERIES = [
    "community center",
    "recreation center",
    "YMCA",
    "Boys & Girls Club",
    "public pool",
    "senior center",
    "multipurpose facility"
]

API_KEY = ""

# Approximate bounding box for Georgia
MIN_LAT = 30.35
MAX_LAT = 35.00
MIN_LON = -85.60
MAX_LON = -80.75

# Step size for lat/lon grid
STEP = 0.15

# Radius in meters for each text search
RADIUS = 10000  # ~10 km

# Filenames for our outputs/checkpoints
OUTPUT_CSV = "../../data/preprocessed/scoring_indicators/DesirableUndesirableActivities/community_centers_georgia.csv"
CHECKPOINT_JSON = "../../data/preprocessed/scoring_indicators/DesirableUndesirableActivities/community_centers_checkpoint.json"

In [44]:
def ensure_output_directory_exists(csv_path):
    """
    Create all intermediate directories for the given CSV path if they don't exist.
    """
    directory = os.path.dirname(csv_path)
    if directory and not os.path.exists(directory):
        os.makedirs(directory, exist_ok=True)

def initialize_csv(csv_filename):
    """
    If the CSV file does not exist, create it and write the header.
    Otherwise, do nothing.
    """
    # Ensure the directory structure is there
    ensure_output_directory_exists(csv_filename)
    
    if not os.path.exists(csv_filename):
        with open(csv_filename, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            # Write header
            writer.writerow(["amenity_key", "place_id", "name", "lat", "lon", "types", "vicinity", "business_status"])


In [6]:


def load_checkpoint():
    """
    Load previously discovered places and progress from a JSON file, if it exists.
    Returns a dictionary with at least:
        {
           "places": {},
           "progress": {
               "query_index": 0,
               "lat": MIN_LAT,
               "lon": MIN_LON
           }
        }
    """
    if os.path.exists(CHECKPOINT_JSON):
        try:
            with open(CHECKPOINT_JSON, "r", encoding="utf-8") as f:
                return json.load(f)
        except JSONDecodeError:
            print(f"Warning: {CHECKPOINT_JSON} is empty or invalid JSON. Using fresh checkpoint.")
            # Fall through to return a fresh checkpoint below
        
    # Initialize empty checkpoint if file doesn't exist or is invalid
    return {
        "places": {},
        "progress": {
            "query_index": 0,
            "lat": MIN_LAT,
            "lon": MIN_LON
        }
    }

def save_checkpoint(checkpoint_data):
    """
    Save the checkpoint dictionary to JSON, so we can resume if script is interrupted.
    """
    with open(CHECKPOINT_JSON, "w", encoding="utf-8") as f:
        json.dump(checkpoint_data, f, ensure_ascii=False, indent=2)

def initialize_csv(csv_filename):
    """
    If the CSV file does not exist, create it and write the header.
    Otherwise, do nothing.
    """
    if not os.path.exists(csv_filename):
        with open(csv_filename, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            # Write header
            writer.writerow(["amenity_key", "place_id", "name", "lat", "lon", "types", "vicinity", "business_status"])

def append_row_to_csv(csv_filename, row_dict):
    """
    Append a single row of place data to the CSV.
    """
    with open(csv_filename, "a", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow([
            row_dict["amenity_key"],
            row_dict["place_id"],
            row_dict["name"],
            row_dict["lat"],
            row_dict["lon"],
            row_dict["types"],
            row_dict["vicinity"],
            row_dict["business_status"]
        ])

def text_search(query, lat, lon, api_key):
    """
    Perform a Places Text Search for a given query, centered at (lat, lon),
    with a fixed radius. This function returns ALL pages (up to 60 results).
    """
    all_results = []
    url = "https://maps.googleapis.com/maps/api/place/textsearch/json"

    # Initial parameters
    params = {
        "query": query,
        "location": f"{lat},{lon}",
        "radius": RADIUS,
        "key": api_key
    }

    while True:
        resp = requests.get(url, params=params)
        data = resp.json()

        if data.get("status") not in ("OK", "ZERO_RESULTS"):
            print(f"Warning: text_search got status={data.get('status')}. "
                  f"Error message: {data.get('error_message', '')}")
            break

        results = data.get("results", [])
        all_results.extend(results)

        # Check if there's more results to page through
        next_page_token = data.get("next_page_token")
        if not next_page_token:
            break

        # Google requires a short pause before using next_page_token
        time.sleep(2)
        # Set next_page_token for next iteration
        params["pagetoken"] = next_page_token

    return all_results

def main():
    # 1. Load checkpoint (discovered places + progress)
    checkpoint = load_checkpoint()
    discovered_places = checkpoint["places"]  # dict: { place_id: {...} }
    progress = checkpoint["progress"]         # dict: { "query_index": int, "lat": float, "lon": float }

    # 2. Initialize CSV (creates file with header if it doesn't exist)
    initialize_csv(OUTPUT_CSV)

    # 3. Start from the progress checkpoint
    current_query_idx = progress["query_index"]
    start_lat = progress["lat"]
    start_lon = progress["lon"]

    # Loop over queries starting from current_query_idx
    for q_idx in range(current_query_idx, len(QUERIES)):
        query = QUERIES[q_idx]
        print(f"\n=== Searching for: '{query}' === (index: {q_idx})\n")

        lat = start_lat
        while lat <= MAX_LAT + 0.000001:  # small epsilon to ensure coverage
            lon = start_lon
            while lon <= MAX_LON + 0.000001:
                print(f"  Searching around lat={lat:.4f}, lon={lon:.4f} for '{query}'")
                results = text_search(query, lat, lon, API_KEY)
                print(f"   -> Found {len(results)} results")

                # Process results
                new_count = 0
                for place in results:
                    pid = place["place_id"]

                    if pid not in discovered_places:
                        # Extract relevant data
                        name = place.get("name", "")
                        geometry = place.get("geometry", {})
                        loc = geometry.get("location", {})
                        place_lat = loc.get("lat")
                        place_lon = loc.get("lng")

                        types = place.get("types", [])
                        vicinity = place.get("formatted_address") or place.get("vicinity", "")
                        business_status = place.get("business_status", "")

                        row_dict = {
                            "amenity_key": "community_center",
                            "place_id": pid,
                            "name": name,
                            "lat": place_lat,
                            "lon": place_lon,
                            "types": "|".join(types),
                            "vicinity": vicinity,
                            "business_status": business_status
                        }

                        # Add to our in-memory dictionary
                        discovered_places[pid] = row_dict
                        new_count += 1

                        # Append row to CSV immediately
                        append_row_to_csv(OUTPUT_CSV, row_dict)

                print(f"   -> {new_count} new places added.\n")

                # Update checkpoint with new progress
                progress["query_index"] = q_idx
                progress["lat"] = lat
                progress["lon"] = lon
                checkpoint["places"] = discovered_places
                checkpoint["progress"] = progress
                save_checkpoint(checkpoint)

                lon += STEP

            # Once we've finished the row of longitudes, reset to the MIN_LON for next row
            start_lon = MIN_LON
            lat += STEP

        # Once we finish a query, reset lat/lon to the bounding-box minimum for the next query
        start_lat = MIN_LAT
        start_lon = MIN_LON

    print("\nAll queries and bounding-box points processed!")
    print(f"Total unique places found: {len(discovered_places)}")
    print(f"Results in: {OUTPUT_CSV}")
    print(f"Checkpoint in: {CHECKPOINT_JSON}")

if __name__ == "__main__":
    main()


=== Searching for: 'community center' === (index: 0)

  Searching around lat=30.3500, lon=-85.6000 for 'community center'
   -> Found 60 results
   -> 60 new places added.

  Searching around lat=30.3500, lon=-85.4500 for 'community center'
   -> Found 60 results
   -> 21 new places added.

  Searching around lat=30.3500, lon=-85.3000 for 'community center'
   -> Found 60 results
   -> 10 new places added.

  Searching around lat=30.3500, lon=-85.1500 for 'community center'
   -> Found 60 results
   -> 13 new places added.

  Searching around lat=30.3500, lon=-85.0000 for 'community center'
   -> Found 60 results
   -> 5 new places added.

  Searching around lat=30.3500, lon=-84.8500 for 'community center'
   -> Found 60 results
   -> 8 new places added.

  Searching around lat=30.3500, lon=-84.7000 for 'community center'
   -> Found 60 results
   -> 17 new places added.

  Searching around lat=30.3500, lon=-84.5500 for 'community center'
   -> Found 60 results
   -> 15 new places add

KeyboardInterrupt: 