In [93]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import sys

In [94]:
# UPDATE THIS
csvIndex = 0
edge_driver_path = "E:/Downloads/edgedriver_win64/msedgedriver.exe"

In [95]:
hospitals = pd.read_csv(f'hospitals_{csvIndex}.csv')
hospitals.drop(columns=['PPN / NON PPN', 'Address'], inplace=True)

In [96]:
hospitals['Rating'] = [None] * len(hospitals)
hospitals['Number of Reviews'] = [None] * len(hospitals)
hospitals['Plus Code'] = [None] * len(hospitals)

In [97]:
def getFullAddress(rowIndex):
    print("Generating full address for row", rowIndex)
    return ', '.join(hospitals.iloc[rowIndex].dropna()).replace('\n', '').replace('\r', '')

In [98]:
def searchForAddress(address, driver):
    print("Searching for address:", address)
    search_box = driver.find_element(By.ID, 'searchboxinput')
    search_box.clear() 
    search_box.send_keys(address)
    search_box.send_keys(Keys.RETURN)         

    print("Searched")

In [99]:
"""
EXAMPLE HTML
<div class="F7nice " jslog="76333;mutable:true;">
    <span>
        <span aria-hidden="true">4.8</span>
        <span class="ceNzKf" role="img" aria-label="4.8 stars ">
            <span class="rFrJzc"></span>
            <span class="rFrJzc"></span>
            <span class="rFrJzc"></span>
            <span class="rFrJzc"></span>
            <span class="rFrJzc"></span>
        </span>
    </span>
    <span>
        <span>
            <span aria-label="1,168 reviews">(1,168)</span>
        </span>
    </span>
</div>
"""

def scrape_data(driver):
    time.sleep(2)
    print("Scraping data")
    if driver.current_url.startswith("https://www.google.com/maps/search/"):
        print("In search page")
        try:
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "a.hfpxzc"))
            ).click()
        except Exception as e:
            print("No search results found:", e)
            return None, None, None


    def get_element_text(selector, attribute=None):
        try:
            element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, selector))
            )
            return element.get_attribute(attribute) if attribute else element.text
        except Exception as e:
            print(f"Error scraping {selector}:", e)
            return None

    rating = get_element_text("div.F7nice span[aria-hidden='true']")
    num_reviews = get_element_text("div.F7nice span > span > span[aria-label]", "aria-label")
    if num_reviews:
        num_reviews = int(num_reviews.strip('()').replace(',', '').split(' ')[0])
    plus_code = get_element_text("button.CsEnBe[aria-label*='Plus code: ']", "aria-label")
    if plus_code:
        plus_code = plus_code.split(': ')[1]
    
    print("Scraped data:", rating, num_reviews, plus_code)
    return rating, num_reviews, plus_code




In [100]:
service = Service(edge_driver_path)
driver = webdriver.Edge(service=service)

driver.get('https://www.google.com/maps')
with open(f"error_log{csvIndex}.txt", "w") as f:
    for i in range(len(hospitals)):
        try:
            print(f"Processing row {i}")
            address = getFullAddress(i)
            searchForAddress(address, driver)


            rating, num_reviews, plus_code = scrape_data(driver)

            hospitals.at[i, 'Rating'] = rating
            hospitals.at[i, 'Number of Reviews'] = num_reviews
            hospitals.at[i, 'Plus Code'] = plus_code

            close_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, "button[aria-label='Close']"))
            )
            close_button.click()
            time.sleep(2)  

            if i % 10 == 0 and i != 0:
                hospitals.to_csv(f'hospitals_{csvIndex}_ratings.csv', index=False)
                break

        except Exception as e:
            f.write(f"Error at index {i}: {str(e)}\n")
            f.flush()
            continue
        
driver.quit()

Processing row 0
Generating full address for row 0
Searching for address: SV Hospital (A Unit Of Healthcare Alliance), ANANTPUR, ANDHRA PRADESH
Searched
Scraping data
Scraped data: 2.8 74 MHJX+MW Anantapur, Andhra Pradesh
Processing row 1
Generating full address for row 1
Searching for address: Kims Saveera Hospital Pvt Ltd, ANANTPUR, ANDHRA PRADESH
Searched
Scraping data
Scraped data: 4.9 19172 MH4M+5J Anantapur, Andhra Pradesh
Processing row 2
Generating full address for row 2
Searching for address: Aashraya Multi Specialty Hospital, CHITOOR, ANDHRA PRADESH
Searched
Scraping data
In search page
Scraped data: 3.9 139 64G5+PC Chittoor, Andhra Pradesh
Processing row 3
Generating full address for row 3
Searching for address: Aragonda Apollo Hospitals, Chittoor, ANDHRA PRADESH
Searched
Scraping data
In search page
Scraped data: 4.0 92 7XM4+37 Aragonda, Andhra Pradesh
Processing row 4
Generating full address for row 4
Searching for address: Apollo Samudra Hospitals(Kakinada), East Godavari

In [102]:
pd.read_csv(f'hospitals_{csvIndex}_ratings.csv')[:10]

Unnamed: 0,Hospital Name,CITY,STATE,Rating,Number of Reviews,Plus Code
0,SV Hospital (A Unit Of Healthcare \r\nAlliance),ANANTPUR,ANDHRA PRADESH,2.8,74.0,"MHJX+MW Anantapur, Andhra Pradesh"
1,Kims Saveera Hospital Pvt Ltd,ANANTPUR,ANDHRA PRADESH,4.9,19172.0,"MH4M+5J Anantapur, Andhra Pradesh"
2,Aashraya Multi Specialty Hospital,CHITOOR,ANDHRA PRADESH,3.9,139.0,"64G5+PC Chittoor, Andhra Pradesh"
3,Aragonda Apollo Hospitals,Chittoor,ANDHRA PRADESH,4.0,92.0,"7XM4+37 Aragonda, Andhra Pradesh"
4,Apollo Samudra Hospitals(Kakinada),East Godavari,ANDHRA PRADESH,4.8,8090.0,"X66P+68 Kakinada, Andhra Pradesh"
5,Swatantra Hospitals Pvt Ltd,East Godavari,ANDHRA PRADESH,4.4,467.0,"2Q7H+P2 Rajamahendravaram, Andhra Pradesh"
6,Prof D Ramakotaiah Children Hospital,GUNTUR,ANDHRA PRADESH,4.8,1121.0,"7CVX+RM Guntur, Andhra Pradesh"
7,Padmavathy Superspeciality Hospital,GUNTUR,ANDHRA PRADESH,4.8,97.0,"7FX2+Q7 Guntur, Andhra Pradesh"
8,Sri Datta Super Specialty Hospital,GUNTUR,ANDHRA PRADESH,4.6,940.0,"62JW+79 Narasaraopeta, Andhra Pradesh"
9,Sri Vasudha Hospitals,GUNTUR,ANDHRA PRADESH,4.1,35.0,"7FX2+78 Guntur, Andhra Pradesh"
