##Google Map Reviews Scrapping 

In [6]:
import time
import json
import re
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


# ========== CONFIG ==========
maps_url = "https://maps.app.goo.gl/WxhAxP3hhBcnf6wcA"
chromedriver_path = r"E:\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"
REVIEWS_TO_SCRAPE = 100

# ========== SELENIUM SETUP ==========
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")

service = Service(chromedriver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)
wait = WebDriverWait(driver, 20)


def extract_all_reviews(driver):
    """Extract reviews with X/5 rating format detection"""
    
    reviews = driver.execute_script("""
        function extractReviews() {
            const allReviews = [];
            const wrappers = document.querySelectorAll('div.jftiEf');
            
            wrappers.forEach((wrapper, index) => {
                const review = {
                    review_id: '',
                    reviewer: '',
                    rating: null,
                    review_text: '',
                    date: ''
                };
                
                // Review ID
                try {
                    const idEl = wrapper.querySelector('[data-review-id]');
                    if (idEl) review.review_id = idEl.getAttribute('data-review-id');
                } catch(e) {}
                
                // Reviewer Name
                try {
                    const nameEl = wrapper.querySelector('.d4r55');
                    if (nameEl) review.reviewer = nameEl.textContent.trim();
                } catch(e) {}
                
                // Review Text
                try {
                    const textEl = wrapper.querySelector('.wiI7pd');
                    if (textEl) review.review_text = textEl.textContent.trim();
                } catch(e) {}
                
                // === EXTRACT RATING AND DATE TOGETHER ===
                // Rating appears next to date in format "X/5"
                
                try {
                    // Get all text in wrapper
                    const allText = wrapper.textContent;
                    
                    // Look for pattern "X/5" where X is 1-5 or decimal
                    const ratingMatch = allText.match(/(\d+(?:\.\d+)?)\s*\/\s*5/);
                    if (ratingMatch) {
                        review.rating = parseFloat(ratingMatch[1]);
                    }
                } catch(e) {}
                
                // Try alternative: Look for rating in spans near date
                if (review.rating === null) {
                    try {
                        const allSpans = wrapper.querySelectorAll('span');
                        for (let span of allSpans) {
                            const text = span.textContent.trim();
                            // Match "2/5", "3/5", "4.5/5" etc
                            const match = text.match(/^(\d+(?:\.\d+)?)\s*\/\s*5$/);
                            if (match) {
                                review.rating = parseFloat(match[1]);
                                break;
                            }
                        }
                    } catch(e) {}
                }
                
                // Try: Check divs for rating
                if (review.rating === null) {
                    try {
                        const allDivs = wrapper.querySelectorAll('div');
                        for (let div of allDivs) {
                            const text = div.textContent.trim();
                            if (text.match(/^\d+(?:\.\d+)?\s*\/\s*5$/)) {
                                const match = text.match(/^(\d+(?:\.\d+)?)/);
                                if (match) {
                                    review.rating = parseFloat(match[1]);
                                    break;
                                }
                            }
                        }
                    } catch(e) {}
                }
                
                // Try: aria-label with star (fallback)
                if (review.rating === null) {
                    try {
                        const starEl = wrapper.querySelector('span[role="img"][aria-label*="star"]');
                        if (starEl) {
                            const aria = starEl.getAttribute('aria-label');
                            const match = aria.match(/(\d+(?:\.\d+)?)/);
                            if (match) {
                                const num = parseFloat(match[1]);
                                if (num >= 1 && num <= 5) {
                                    review.rating = num;
                                }
                            }
                        }
                    } catch(e) {}
                }
                
                // === EXTRACT DATE ===
                try {
                    // Method 1: Common date classes
                    const dateClasses = ['rsqaWe', 'DZSIDd', 'xRkPPb', 'dehysf', 'lqhpac'];
                    for (let cls of dateClasses) {
                        const dateEl = wrapper.querySelector('.' + cls);
                        if (dateEl && dateEl.textContent.trim()) {
                            review.date = dateEl.textContent.trim();
                            break;
                        }
                    }
                    
                    // Method 2: Search for text with "ago"
                    if (!review.date) {
                        const allSpans = wrapper.querySelectorAll('span');
                        for (let span of allSpans) {
                            const text = span.textContent.trim();
                            if (text.match(/\d+\s*(second|minute|hour|day|week|month|year)s?\s*ago/i) ||
                                text.match(/^a\s+(day|week|month|year)\s+ago$/i) ||
                                text.match(/on Google$/i)) {
                                review.date = text;
                                break;
                            }
                        }
                    }
                } catch(e) {}
                
                // Debug first review if no rating found
                if (index === 0 && review.rating === null) {
                    console.log('=== DEBUG FIRST REVIEW ===');
                    console.log('Searching for X/5 pattern in text:', wrapper.textContent.substring(0, 500));
                    
                    // List all spans with short text
                    const spans = wrapper.querySelectorAll('span');
                    console.log('All short spans:');
                    spans.forEach(span => {
                        const text = span.textContent.trim();
                        if (text.length < 20 && text.length > 0) {
                            console.log('  - "' + text + '"');
                        }
                    });
                }
                
                allReviews.push(review);
            });
            
            return allReviews;
        }
        
        return extractReviews();
    """)
    
    return reviews


def clean_phone(phone):
    if not phone:
        return ""
    phone = phone.replace("", "").strip()
    return re.sub(r"[^0-9+]", "", phone)


# ========== MAIN ==========
try:
    print("üåç Opening Google Maps...")
    driver.get(maps_url)
    time.sleep(5)

    # Close cookies
    try:
        reject_btn = driver.find_element(By.XPATH, "//button[contains(., 'Reject all')]")
        reject_btn.click()
        time.sleep(1)
    except:
        pass

    # Get business details
    print("üè¢ Extracting business details...")
    try:
        name_el = wait.until(EC.visibility_of_element_located((By.XPATH, '//h1[contains(@class,"DUwDvf")]')))
        company_name = name_el.text.strip()
    except:
        company_name = "Unknown"

    phone_number = ""
    for sel in ['//button[contains(@aria-label,"Phone")]', '//button[contains(@data-item-id,"phone:tel")]', '//a[contains(@href,"tel:")]']:
        try:
            el = driver.find_element(By.XPATH, sel)
            phone_number = el.text or el.get_attribute("href")
            break
        except:
            continue
    phone_number = clean_phone(phone_number)

    print(f"‚úî Company: {company_name}")
    print(f"‚úî Phone: {phone_number}")

    # Open reviews
    print("üü¶ Opening reviews section...")
    try:
        review_tab = wait.until(EC.element_to_be_clickable((By.XPATH, '//button[contains(@aria-label,"reviews")]')))
        driver.execute_script("arguments[0].click();", review_tab)
        time.sleep(4)
    except Exception as e:
        print("‚ö† Could not open reviews:", e)

    # Scroll to load reviews
    print("üîÅ Scrolling to load reviews...")
    scroll_box = wait.until(EC.presence_of_element_located((By.XPATH, '//div[contains(@class,"m6QErb") and contains(@class,"DxyBCb")]')))
    
    previous_count = 0
    stale_count = 0
    scroll_attempts = 0

    while scroll_attempts < 80:
        driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", scroll_box)
        time.sleep(1.2)
        
        current_count = len(driver.find_elements(By.XPATH, '//div[contains(@class,"jftiEf")]'))
        print(f"Scroll {scroll_attempts + 1}: {current_count} reviews loaded...")
        
        if current_count >= REVIEWS_TO_SCRAPE:
            break
        
        if current_count == previous_count:
            stale_count += 1
            if stale_count >= 5:
                break
        else:
            stale_count = 0
        
        previous_count = current_count
        scroll_attempts += 1

    # Extract all reviews
    print("üìù Extracting reviews...")
    reviews_data = extract_all_reviews(driver)
    reviews_data = reviews_data[:REVIEWS_TO_SCRAPE]
    
    # Convert ratings to strings
    for review in reviews_data:
        if review['rating'] is not None:
            review['rating'] = str(review['rating'])
        else:
            review['rating'] = "No rating"

    # Save output
    os.makedirs("output", exist_ok=True)
    file_name = re.sub(r'[^A-Za-z0-9 ]+', '', company_name).replace(" ", "_") + "_reviews.json"
    path = os.path.join("output", file_name)

    with open(path, "w", encoding="utf-8") as f:
        json.dump({
            "company_name": company_name,
            "phone_number": phone_number,
            "total_reviews": len(reviews_data),
            "reviews": reviews_data
        }, f, indent=4, ensure_ascii=False)

    # Statistics
    ratings_found = sum(1 for r in reviews_data if r['rating'] != 'No rating')
    dates_found = sum(1 for r in reviews_data if r['date'])
    
    print(f"\nüéâ DONE! ‚Üí {path}")
    print(f"\nüìä Statistics:")
    print(f"   Total: {len(reviews_data)}")
    print(f"   Ratings: {ratings_found}/{len(reviews_data)} ({ratings_found/len(reviews_data)*100:.1f}%)")
    print(f"   Dates: {dates_found}/{len(reviews_data)} ({dates_found/len(reviews_data)*100:.1f}%)")
    
    if reviews_data:
        print(f"\nüìù First 3 Reviews:")
        for i in range(min(3, len(reviews_data))):
            print(f"\n   [{i+1}] {reviews_data[i]['reviewer']}")
            print(f"       Rating: {reviews_data[i]['rating']}")
            print(f"       Date: {reviews_data[i]['date']}")

except Exception as e:
    print("‚ùå ERROR:", e)
    import traceback
    traceback.print_exc()

finally:
    driver.quit()

  reviews = driver.execute_script("""


üåç Opening Google Maps...
üè¢ Extracting business details...
‚úî Company: Premier Inn Dubai Al Jaddaf Hotel
‚úî Phone: 
üü¶ Opening reviews section...
üîÅ Scrolling to load reviews...
Scroll 1: 20 reviews loaded...
Scroll 2: 30 reviews loaded...
Scroll 3: 40 reviews loaded...
Scroll 4: 50 reviews loaded...
Scroll 5: 50 reviews loaded...
Scroll 6: 60 reviews loaded...
Scroll 7: 70 reviews loaded...
Scroll 8: 70 reviews loaded...
Scroll 9: 80 reviews loaded...
Scroll 10: 90 reviews loaded...
Scroll 11: 100 reviews loaded...
üìù Extracting reviews...

üéâ DONE! ‚Üí output\Premier_Inn_Dubai_Al_Jaddaf_Hotel_reviews.json

üìä Statistics:
   Total: 100
   Ratings: 100/100 (100.0%)
   Dates: 100/100 (100.0%)

üìù First 3 Reviews:

   [1] Raju Singh
       Rating: 5
       Date: 6 days ago

   [2] Asel T
       Rating: 5
       Date: 3 days ago

   [3] Joanne Katherine
       Rating: 5
       Date: a week ago
