In [67]:
1+1 # Quickly check the kernel is live

2

In [68]:
import requests
import time
from dotenv import load_dotenv
import os

load_dotenv()

user = os.getenv("MYSQL_USER")
password = os.getenv("MYSQL_PASSWORD")
host = os.getenv("MYSQL_HOST")
database = os.getenv("MYSQL_DATABASE")
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')



In [69]:


# Base URL for Nearby Search
base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
default_radius = 2000        # 4km radius around the location.  We are allowed up to 60 dentists per circle
# Dictionary for memoization
places_cache = {}


In [70]:


# Memoized version of fetch_places_with_pagination
def fetch_places_with_pagination(lat, lon, radius=None):
    if radius is None:
        radius = default_radius

    # Check if the result is already cached
    cache_key = (lat, lon, radius)
    if cache_key in places_cache:
        print(f"Using cached results for {lat}, {lon}")
        return places_cache[cache_key]

    # If not cached, fetch from API
    all_places = []
    next_page_token = None

    params = {
        'location': f'{lat},{lon}',
        'radius': radius,
        'type': 'dentist',
        'key': GOOGLE_API_KEY
    }

    while True:
        if next_page_token:
            params['pagetoken'] = next_page_token

        response = requests.get(base_url, params=params)
        data = response.json()

        # Error handling: stop if over the query limit
        if data['status'] == 'OVER_QUERY_LIMIT':
            print("Hit query limit, stopping.")
            return None

        if 'error_message' in data:
            print(f"Error: {data['error_message']}")
            return None

        all_places.extend(data.get('results', []))

        next_page_token = data.get('next_page_token')

        if not next_page_token:
            break

        time.sleep(2)  # Delay to prevent hitting API rate limits

    # Cache the results before returning
    places_cache[cache_key] = all_places
    return all_places


In [71]:
from math import radians, cos, sin, asin, sqrt

# Haversine formula to calculate distance between two lat/lon points
def haversine(lat1, lon1, lat2, lon2):
    # Convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * asin(sqrt(a))
    r = 6371  # Radius of the Earth in kilometers
    return c * r


In [72]:
def generate_grid(center_lat, center_lon, radius, grid_size_km):
    grid_points = []
    lat_steps = grid_size_km / 110.574  # Latitude degrees per km
    lon_steps = grid_size_km / (111.320 * cos(radians(center_lat)))  # Longitude degrees per km

    # Ensure radius and grid_size_km are treated as integers in the range function
    radius_int = int(radius)
    grid_size_int = int(grid_size_km)

    # Generate points in a grid
    for i in range(-radius_int // grid_size_int, radius_int // grid_size_int + 1):
        for j in range(-radius_int // grid_size_int, radius_int // grid_size_int + 1):
            lat = center_lat + i * lat_steps
            lon = center_lon + j * lon_steps
            grid_points.append((lat, lon))
    
    return grid_points


In [73]:
def deduplicate_places(places):
    seen_place_ids = set()
    unique_places = []

    for place in places:
        place_id = place.get('place_id')
        if place_id and place_id not in seen_place_ids:
            unique_places.append(place)
            seen_place_ids.add(place_id)

    return unique_places

In [74]:
def perform_grid_search(center_lat, center_lon, overall_radius_km=None, grid_size_km=None):
    if grid_size_km is None:
        grid_size_km = default_radius / 1000
    grid_points = generate_grid(center_lat, center_lon, overall_radius_km, grid_size_km)
    all_places = []

    for lat, lon in grid_points:
        print(f"Searching for places at {lat}, {lon}...")
        places = fetch_places_with_pagination(lat, lon)
        all_places.extend(places)

    return deduplicate_places(all_places)


In [None]:
center_lat = -36.9443906
center_lon = 174.6553629
overall_radius_km = 25
grid_size_km = default_radius / 1000

all_dentists = perform_grid_search(center_lat, center_lon, overall_radius_km, grid_size_km)
# Perform the grid search with smaller radius (5km grid cells)




Searching for places at -37.179527250568846, 174.36312655849048...
Searching for places at -37.179527250568846, 174.38560627706815...
Searching for places at -37.179527250568846, 174.4080859956458...
Searching for places at -37.179527250568846, 174.43056571422346...
Searching for places at -37.179527250568846, 174.4530454328011...
Searching for places at -37.179527250568846, 174.47552515137878...
Searching for places at -37.179527250568846, 174.49800486995642...
Searching for places at -37.179527250568846, 174.52048458853406...
Searching for places at -37.179527250568846, 174.54296430711173...
Searching for places at -37.179527250568846, 174.56544402568937...
Searching for places at -37.179527250568846, 174.58792374426704...
Searching for places at -37.179527250568846, 174.6104034628447...
Searching for places at -37.179527250568846, 174.63288318142236...
Searching for places at -37.179527250568846, 174.6553629...
Searching for places at -37.179527250568846, 174.67784261857764...
Searc

In [35]:
len(all_dentists)

417

In [36]:
sorted_names = sorted([place['name'] for place in all_dentists])
for practice in sorted_names:
    print(practice)


.Ortho
.Ortho Takapuna
A+ Dentists
Absolute Dental
Accent Dentists - Cosmetic Dentist
Advanced Dental Care Manurewa | Lumino The Dentists
Aevitas Dentistry
Affordable Dentists
Affordable Dentists Ellerslie
Airport Dental Services
Airport Oaks Dental Centre
Albany Central Dental
Albany Dental Surgery
Albany Village Dental
All Dentures Professional Denture Clinic
All Smiles Dental
Alpers Dental
Anaki Denture Services
Andrew Grayson
Andrew w Whillans
Anna Meyer Orthodontist
Apex Dental
Auckland Central | Lumino The Dentists
Auckland City Dentist
Auckland City Orthodontics CBD
Auckland City Orthodontics Grey Lynn
Auckland City Orthodontics Highbrook
Auckland City Orthodontics Remuera
Auckland Dental
Auckland Dental Care
Auckland Denture Clinic
Auckland Family Dental: Milford
Auckland Family Dental: New Lynn Dental Care
Auckland Fixed Prosthodontic
Auckland Orthodontics
Auckland Prosthodontic Clinic
AucklandPerio
Avondale Dental Centre
BISC
Bachmann Orthodontics
Baker Allen T
Balmoral Denta

In [39]:
open_dentists = [dentist for dentist in all_dentists if dentist.get('business_status') == 'OPERATIONAL' and dentist.get('opening_hours', {}).get('open_now')]
sorted_open_names = sorted([place['name'] for place in open_dentists])

In [40]:
sorted_open_names


['.Ortho',
 '.Ortho Takapuna',
 'A+ Dentists',
 'Accent Dentists - Cosmetic Dentist',
 'Advanced Dental Care Manurewa | Lumino The Dentists',
 'Aevitas Dentistry',
 'Affordable Dentists',
 'Affordable Dentists Ellerslie',
 'Airport Oaks Dental Centre',
 'Albany Dental Surgery',
 'Albany Village Dental',
 'All Smiles Dental',
 'Alpers Dental',
 'Andrew Grayson',
 'Auckland Central | Lumino The Dentists',
 'Auckland City Dentist',
 'Auckland City Orthodontics CBD',
 'Auckland City Orthodontics Highbrook',
 'Auckland City Orthodontics Remuera',
 'Auckland Dental Care',
 'Auckland Denture Clinic',
 'Auckland Family Dental: Milford',
 'Auckland Family Dental: New Lynn Dental Care',
 'Auckland Orthodontics',
 'AucklandPerio',
 'Avondale Dental Centre',
 'Balmoral Dental Centre',
 "Balmoral Intermediate Children's Community Dental Clinic",
 'Bays Dental',
 'Bayside Dental Centre',
 'Belich Dental',
 'Blockhouse Bay Dental Centre',
 'Botany Dental Practice | Lumino The Dentists',
 'Botany Orth

In [60]:
open_dentists_with_coords = [
    {
        'place_id': dentist['place_id'],
        'name': dentist['name'],
        'lat': dentist['geometry']['location']['lat'],
        'lon': dentist['geometry']['location']['lng'],
        'reviews': dentist.get('user_ratings_total', 0)  # Use .get() to avoid KeyError
    }
    for dentist in open_dentists if 'geometry' in dentist and 'location' in dentist['geometry']
]


In [61]:
open_dentists_with_coords[0]

{'place_id': 'ChIJn2iTgIGqEm0R3tcUFgH1VDY',
 'name': 'Mint Dental',
 'lat': -37.1915878,
 'lon': 174.9036523,
 'reviews': 39}

In [62]:
# Deduplicate open_dentists_with_coords by excluding entries within 50 meters of each other

deduplicated_dentists = []
added_places = set()
threshold_distance = 0.05  # 50 meters in kilometers

for i in range(len(open_dentists_with_coords)):
    dentist = open_dentists_with_coords[i]
    skip = False

    # Check if any already added dentists are within 50 meters
    for added_dentist in deduplicated_dentists:
        dist_km = haversine(dentist['lat'], dentist['lon'], added_dentist['lat'], added_dentist['lon'])
        if dist_km < threshold_distance:
            skip = True
            break
    
    if not skip:
        deduplicated_dentists.append(dentist)
        added_places.add(dentist['place_id'])

# Display deduplicated dentists
deduplicated_dentists


[{'place_id': 'ChIJn2iTgIGqEm0R3tcUFgH1VDY',
  'name': 'Mint Dental',
  'lat': -37.1915878,
  'lon': 174.9036523,
  'reviews': 39},
 {'place_id': 'ChIJZ1N2dWKqEm0RZV_x3mljNxU',
  'name': 'Pukekohe Orthodontists',
  'lat': -37.0743459,
  'lon': 174.922603,
  'reviews': 3},
 {'place_id': 'ChIJPx17MdWscm0RaYxnKNKfAcg',
  'name': 'Vanessa Wright Dental',
  'lat': -37.0629084,
  'lon': 174.9404866,
  'reviews': 11},
 {'place_id': 'ChIJiQWVRdSscm0RgB5b7NnhmNM',
  'name': 'The Denture Man',
  'lat': -37.0646526,
  'lon': 174.9435377,
  'reviews': 9},
 {'place_id': 'ChIJz13Dp9Wscm0Ru9TUvS4DslY',
  'name': 'Dental World Papakura',
  'lat': -37.06330099999999,
  'lon': 174.9435272,
  'reviews': 7},
 {'place_id': 'ChIJ3xYNwCqtcm0RrM0YQqtMEao',
  'name': 'Nova Dental Care',
  'lat': -37.0597673,
  'lon': 174.9413571,
  'reviews': 78},
 {'place_id': 'ChIJn1DCKhhSDW0R7taak_5NXnw',
  'name': 'Clendon Dental Centre',
  'lat': -37.0340319,
  'lon': 174.8679907,
  'reviews': 73},
 {'place_id': 'ChIJnwzw

In [52]:
deduplicated_dentists

[{'place_id': 'ChIJn2iTgIGqEm0R3tcUFgH1VDY',
  'name': 'Mint Dental',
  'lat': -37.1915878,
  'lon': 174.9036523},
 {'place_id': 'ChIJZ1N2dWKqEm0RZV_x3mljNxU',
  'name': 'Pukekohe Orthodontists',
  'lat': -37.0743459,
  'lon': 174.922603},
 {'place_id': 'ChIJPx17MdWscm0RaYxnKNKfAcg',
  'name': 'Vanessa Wright Dental',
  'lat': -37.0629084,
  'lon': 174.9404866},
 {'place_id': 'ChIJiQWVRdSscm0RgB5b7NnhmNM',
  'name': 'The Denture Man',
  'lat': -37.0646526,
  'lon': 174.9435377},
 {'place_id': 'ChIJz13Dp9Wscm0Ru9TUvS4DslY',
  'name': 'Dental World Papakura',
  'lat': -37.06330099999999,
  'lon': 174.9435272},
 {'place_id': 'ChIJ3xYNwCqtcm0RrM0YQqtMEao',
  'name': 'Nova Dental Care',
  'lat': -37.0597673,
  'lon': 174.9413571},
 {'place_id': 'ChIJn1DCKhhSDW0R7taak_5NXnw',
  'name': 'Clendon Dental Centre',
  'lat': -37.0340319,
  'lon': 174.8679907},
 {'place_id': 'ChIJnwzw4n1SDW0RQPQOiBxCIZY',
  'name': 'Loudon Dental',
  'lat': -37.0220447,
  'lon': 174.8977959},
 {'place_id': 'ChIJmdG

In [63]:
deduplicated_dentist_names = sorted([dentist['name'] for dentist in deduplicated_dentists])
for name in deduplicated_dentist_names:
    print(name)
    

.Ortho
A+ Dentists
Accent Dentists - Cosmetic Dentist
Advanced Dental Care Manurewa | Lumino The Dentists
Aevitas Dentistry
Affordable Dentists
Affordable Dentists Ellerslie
Airport Oaks Dental Centre
Albany Dental Surgery
Albany Village Dental
All Smiles Dental
Alpers Dental
Auckland Central | Lumino The Dentists
Auckland City Dentist
Auckland City Orthodontics Highbrook
Auckland City Orthodontics Remuera
Auckland Dental Care
Auckland Denture Clinic
Auckland Family Dental: Milford
Auckland Family Dental: New Lynn Dental Care
Auckland Orthodontics
Balmoral Dental Centre
Balmoral Intermediate Children's Community Dental Clinic
Bays Dental
Bayside Dental Centre
Belich Dental
Blockhouse Bay Dental Centre
Botany Dental Practice | Lumino The Dentists
Botany Orthodontist - Coreen Loke
Botany South Dental
Bunyan Dr TR Specialist Orthodontist
CM Dental Ltd.
Caring 4 Smiles Dental Group
Casley, Wong & Tam Dental
Catherine Porter Orthodontist
Chris Waalkens
Clean Teeth Dental
Clendon Dental Cent

In [66]:
import pickle

# Saving deduplicated_dentists to a pickle file
with open('data/deduplicated_dentists.pkl', 'wb') as f:
    pickle.dump(deduplicated_dentists, f)


In [65]:
deduplicated_dentists[4]

{'place_id': 'ChIJz13Dp9Wscm0Ru9TUvS4DslY',
 'name': 'Dental World Papakura',
 'lat': -37.06330099999999,
 'lon': 174.9435272,
 'reviews': 7}