In [113]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import sys

In [114]:
# UPDATE THIS
csvIndex = 3
edge_driver_path = "E:/Downloads/edgedriver_win64/msedgedriver.exe"

In [115]:
hospitals = pd.read_csv(f'hospitals_{csvIndex}.csv')
hospitals.drop(columns=['PPN / NON PPN', 'Address'], inplace=True)

In [116]:
hospitals['Rating'] = [None] * len(hospitals)
hospitals['Number of Reviews'] = [None] * len(hospitals)
hospitals['Plus Code'] = [None] * len(hospitals)

In [117]:
def getFullAddress(rowIndex):
    print("Generating full address for row", rowIndex)
    return ', '.join(hospitals.iloc[rowIndex].dropna()).replace('\n', '').replace('\r', '')

In [118]:
def searchForAddress(address, driver):
    print("Searching for address:", address)
    search_box = driver.find_element(By.ID, 'searchboxinput')
    search_box.clear() 
    search_box.send_keys(address)
    search_box.send_keys(Keys.RETURN)         

    print("Searched")

In [119]:
"""
EXAMPLE HTML
<div class="F7nice " jslog="76333;mutable:true;">
    <span>
        <span aria-hidden="true">4.8</span>
        <span class="ceNzKf" role="img" aria-label="4.8 stars ">
            <span class="rFrJzc"></span>
            <span class="rFrJzc"></span>
            <span class="rFrJzc"></span>
            <span class="rFrJzc"></span>
            <span class="rFrJzc"></span>
        </span>
    </span>
    <span>
        <span>
            <span aria-label="1,168 reviews">(1,168)</span>
        </span>
    </span>
</div>
"""

def scrape_data(driver):
    time.sleep(2)
    print("Scraping data")
    if driver.current_url.startswith("https://www.google.com/maps/search/"):
        print("In search page")
        try:
            WebDriverWait(driver, 4).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "a.hfpxzc"))
            ).click()
        except Exception as e:
            print("No search results found:", e)
            return None, None, None


    def get_element_text(selector, attribute=None):
        try:
            element = WebDriverWait(driver, 4).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, selector))
            )
            return element.get_attribute(attribute) if attribute else element.text
        except Exception as e:
            print(f"Error scraping {selector}:", e)
            return None

    rating = get_element_text("div.F7nice span[aria-hidden='true']")
    num_reviews = get_element_text("div.F7nice span > span > span[aria-label]", "aria-label")
    if num_reviews:
        num_reviews = int(num_reviews.strip('()').replace(',', '').split(' ')[0])
    plus_code = get_element_text("button.CsEnBe[aria-label*='Plus code: ']", "aria-label")
    if plus_code:
        plus_code = plus_code.split(': ')[1]
    
    print("Scraped data:", rating, num_reviews, plus_code)
    return rating, num_reviews, plus_code




In [120]:
service = Service(edge_driver_path)
driver = webdriver.Edge(service=service)

driver.get('https://www.google.com/maps')
with open(f"error_log{csvIndex}.txt", "w") as f:
    for i in range(len(hospitals)):
        try:
            print(f"Processing row {i}")
            address = getFullAddress(i)
            searchForAddress(address, driver)


            rating, num_reviews, plus_code = scrape_data(driver)

            hospitals.at[i, 'Rating'] = rating
            hospitals.at[i, 'Number of Reviews'] = num_reviews
            hospitals.at[i, 'Plus Code'] = plus_code

            try:
                close_button = WebDriverWait(driver, 4).until(
                    EC.element_to_be_clickable((By.CSS_SELECTOR, "button[aria-label='Close']"))
                )
            except:
                close_button = WebDriverWait(driver, 4).until(
                    EC.element_to_be_clickable((By.CSS_SELECTOR, "button[aria-label='Close directions']"))
                )
            close_button.click()

            if i % 10 == 0:
                hospitals.to_csv(f'hospitals_{csvIndex}_ratings.csv', index=False)

        except Exception as e:
            f.write(f"Error at index {i}: {str(e)}\n")
            f.flush()
            continue
hospitals.to_csv(f'hospitals_{csvIndex}_ratings.csv', index=False)
        
driver.quit()

Processing row 0
Generating full address for row 0
Searching for address: Prashanth Multispeciality Hospital, CHENNAI, TAMILNADU
Searched
Scraping data
Scraped data: 4.6 6627 X6HC+CG Chennai, Tamil Nadu
Processing row 1
Generating full address for row 1
Searching for address: Apollo Spectra Hospital Alwarpet, CHENNAI, TAMILNADU
Searched
Scraping data
Scraped data: 4.8 3223 27P4+CV Chennai, Tamil Nadu
Processing row 2
Generating full address for row 2
Searching for address: Sivam Hospital, CHENNAI, TAMILNADU
Searched
Scraping data
In search page
No search results found: Message: element not interactable
  (Session info: MicrosoftEdge=130.0.2849.56)
Stacktrace:
	GetHandleVerifier [0x00007FF66594DC75+12853]
	Microsoft::Applications::Events::EventProperty::empty [0x00007FF665BF83C4+2250276]
	(No symbol) [0x00007FF6657054FA]
	(No symbol) [0x00007FF66574AB11]
	(No symbol) [0x00007FF665740D4F]
	(No symbol) [0x00007FF6657684FA]
	(No symbol) [0x00007FF665740617]
	(No symbol) [0x00007FF6657404DD