In [6]:
from playwright.async_api import async_playwright
import asyncio
import time

class GoogleMapsSearcher:
    async def __init__(self):
        # Initialize Playwright
        self.playwright = await async_playwright().start()
        self.browser = await self.playwright.chromium.launch(headless=False)
        self.page = await self.browser.new_page()
        self.base_url = "https://www.google.com/maps"
        
    async def search_place(self, query):
        """
        Search for a place on Google Maps
        Args:
            query (str): Place to search for
        Returns:
            bool: True if search was successful, False otherwise
        """
        try:
            # Navigate to Google Maps
            await self.page.goto(self.base_url)
            
            # Wait for search box and enter query
            await self.page.fill('input#searchboxinput', query)
            await self.page.keyboard.press('Enter')
            
            # Wait for results to load (using proper async wait)
            await asyncio.sleep(3)  # We'll improve this later with proper wait conditions
            
            return True
            
        except Exception as e:
            print(f"Error during search: {str(e)}")
            return False
        
    async def get_page_content(self):
        """
        Get the raw HTML content from the current page
        Returns:
            tuple: (bool, str) - (success status, HTML content or error message)
        """
        try:
            # Get the entire page HTML
            content = await self.page.content()
            
            # Check if we have actual content
            if not content:
                return False, "No content found"
                
            return True, content
            
        except Exception as e:
            return False, f"Error extracting content: {str(e)}"
    
    async def close(self):
        """Close the browser"""
        await self.browser.close()
        await self.playwright.stop()

# Example usage
async def main():
    searcher = await GoogleMapsSearcher()
    search_query = "Eiffel Tower, Paris"
    
    if await searcher.search_place(search_query):
        print(f"Successfully searched for: {search_query}")
        
        # Get the content
        success, content = await searcher.get_page_content()
        if success:
            print("\nFound HTML content:")
            print("-" * 50)
            print(content[:1000])  # Print first 1000 characters as preview
            print("-" * 50)
        else:
            print(f"Failed to get content: {content}")
    else:
        print("Search failed")
    
    await searcher.close()

await main()

  searcher = await GoogleMapsSearcher()


TypeError: __init__() should return None, not 'coroutine'

In [10]:
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
import asyncio

class GoogleMapsSearcher:
    def __init__(self):
        self.playwright = None
        self.browser = None
        self.page = None
        self.base_url = "https://www.google.com/maps"
        
    async def initialize(self):
        """Initialize Playwright browser and page"""
        self.playwright = await async_playwright().start()
        # Increase viewport size for better rendering
        self.browser = await self.playwright.chromium.launch(headless=False)
        self.page = await self.browser.new_page(viewport={'width': 1920, 'height': 1080})
        
    async def search_place(self, query):
        """
        Search for a place on Google Maps
        Args:
            query (str): Place to search for
        Returns:
            bool: True if search was successful, False otherwise
        """
        try:
            # Navigate to Google Maps
            await self.page.goto(self.base_url, wait_until="networkidle")
            
            # Wait for and fill search box
            await self.page.fill('input#searchboxinput', query)
            await self.page.keyboard.press('Enter')
            
            # Wait for search results to load
            try:
                # Wait for the main content pane to appear
                await self.page.wait_for_selector('div[role="main"]', timeout=10000)
                
                # Wait for place information to load
                await self.page.wait_for_selector('h1', timeout=10000)
                
                # Additional wait for dynamic content
                await asyncio.sleep(2)
                
                return True
                
            except PlaywrightTimeoutError:
                print("Timeout waiting for search results")
                return False
            
        except Exception as e:
            print(f"Error during search: {str(e)}")
            return False
        
    async def get_page_content(self, output_file="google_maps_content.html"):
        """
        Get the raw HTML content from the current page and save to file
        Args:
            output_file (str): Path to save the HTML content
        Returns:
            tuple: (bool, str) - (success status, HTML content or error message)
        """
        try:
            # Wait for main content to be visible
            await self.page.wait_for_selector('div[role="main"]', state="visible")
            
            # Get both the HTML and the visible text
            content = await self.page.content()
            visible_text = await self.page.evaluate('''
                () => {
                    // Get all text from the main content area
                    const mainContent = document.querySelector('div[role="main"]');
                    return mainContent ? mainContent.innerText : '';
                }
            ''')
            
            # Save HTML content
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(content)
            
            # Save visible text to a separate file
            text_file = output_file.replace('.html', '_text.txt')
            with open(text_file, 'w', encoding='utf-8') as f:
                f.write(visible_text)
            
            print(f"HTML content saved to: {output_file}")
            print(f"Visible text saved to: {text_file}")
            
            return True, visible_text
            
        except Exception as e:
            return False, f"Error extracting content: {str(e)}"
    
    async def close(self):
        """Close the browser"""
        if self.browser:
            await self.browser.close()
        if self.playwright:
            await self.playwright.stop()

async def main():
    searcher = GoogleMapsSearcher()
    await searcher.initialize()
    
    search_query = "Eiffel Tower, Paris"
    output_file = "eiffel_tower_content2.html"
    
    if await searcher.search_place(search_query):
        print(f"Successfully searched for: {search_query}")
        
        # Get and save the content
        success, content = await searcher.get_page_content(output_file)
        if success:
            print("\nVisible text preview:")
            print("-" * 50)
            print(content[:1000])  # Print first 1000 characters as preview
            print("-" * 50)
        else:
            print(f"Failed to get content: {content}")
    else:
        print("Search failed")
    
    await searcher.close()

# For Jupyter notebook
await main()

Successfully searched for: Eiffel Tower, Paris
HTML content saved to: eiffel_tower_content2.html
Visible text saved to: eiffel_tower_content2_text.txt

Visible text preview:
--------------------------------------------------

See photos
Eiffel Tower
Tour Eiffel
4.7
(416,774)
Historical landmark·
Overview
Tickets
Reviews
About

Directions

Save

Nearby

Send to phone

Share
Gustave Eiffel's iconic, wrought-iron 1889 tower, with steps and elevators to observation decks.

 
Sponsored
By Viator
Experience Segway in Paris Small Group 2 Hours
₦92,653
 · 
5.0(56)
Free cancellation
 
Admission
About these results

Gives you entry to this place
Eiffel Tower 

Official site
₦22,465.82

Instant confirmation · Mobile ticket
Thrillophilia
₦17,399.94

Mobile ticket
Get Your Tickets
₦17,403.10

Free cancellation · Mobile ticket
 
More
 
 

Av. Gustave Eiffel, 75007 Paris, France

Located in: Champ de Mars

Open ⋅ Closes 12 am


toureiffel.paris



V75V+8Q Paris, France


 

Su

In [16]:
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
import asyncio

class GoogleMapsSearcher:
    def __init__(self):
        self.playwright = None
        self.browser = None
        self.page = None
        self.base_url = "https://www.google.com/maps"
        
    async def initialize(self):
        """Initialize Playwright browser and page"""
        self.playwright = await async_playwright().start()
        # Increase viewport size for better rendering
        self.browser = await self.playwright.chromium.launch(headless=False)
        self.page = await self.browser.new_page(viewport={'width': 1920, 'height': 1080})    
        
    async def search_place(self, query):
        """
        Search for a place on Google Maps and determine if it's a single result or multiple results
        Args:
            query (str): Place to search for
        Returns:
            tuple: (bool, str) - (success status, 'single' or 'multiple' or error message)
        """
        try:
            # Navigate to Google Maps
            await self.page.goto(self.base_url, wait_until="networkidle")
            
            # Wait for and fill search box
            await self.page.fill('input#searchboxinput', query)
            await self.page.keyboard.press('Enter')
            
            # Wait for either scenario
            try:
                # First, wait for any results to load
                await self.page.wait_for_selector('div[role="main"]', timeout=10000)
                
                # Check for multiple results by looking for the search results list
                is_multiple = await self.page.evaluate('''
                    () => {
                        // Check for elements that indicate a list of results
                        const resultsList = document.querySelector('div[role="feed"]');
                        const multipleResults = document.querySelectorAll('.section-result');
                        return !!(resultsList || (multipleResults && multipleResults.length > 1));
                    }
                ''')
                
                if is_multiple:
                    return True, 'multiple'
                else:
                    # Wait for single result elements
                    await self.page.wait_for_selector('h1', timeout=5000)
                    return True, 'single'
                    
            except PlaywrightTimeoutError:
                return False, "Timeout waiting for search results"
            
        except Exception as e:
            return False, f"Error during search: {str(e)}"

    async def get_page_content(self, output_file="google_maps_content.html"):
        """
        Get the raw HTML content from the current page and save to file
        Args:
            output_file (str): Path to save the HTML content
        Returns:
            tuple: (bool, str) - (success status, content or error message)
        """
        try:
            # Wait for main content to be visible
            await self.page.wait_for_selector('div[role="main"]', state="visible")
            
            # Get both the HTML and the visible text
            content = await self.page.content()
            visible_text = await self.page.evaluate('''
                () => {
                    const mainContent = document.querySelector('div[role="main"]');
                    return mainContent ? mainContent.innerText : '';
                }
            ''')
            
            # Save HTML content
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(content)
            
            # Save visible text to a separate file
            text_file = output_file.replace('.html', '_text.txt')
            with open(text_file, 'w', encoding='utf-8') as f:
                f.write(visible_text)
            
            print(f"HTML content saved to: {output_file}")
            print(f"Visible text saved to: {text_file}")
            
            return True, visible_text
            
        except Exception as e:
            return False, f"Error extracting content: {str(e)}"
        
    async def close(self):
        """Close the browser"""
        if self.browser:
            await self.browser.close()
        if self.playwright:
            await self.playwright.stop()

async def main():
    searcher = GoogleMapsSearcher()
    await searcher.initialize()
    
    search_query = "Eiffel Tower"  # Try with different queries to test both scenarios
    output_file = f"{search_query.replace(' ', '_').lower()}_content.html"
    
    success, result_type = await searcher.search_place(search_query)
    
    if success:
        if result_type == 'multiple':
            print(f"Multiple results found for: {search_query}")
            print("Please refine your search query to get a specific location.")
        elif result_type == 'single':
            print(f"Found exact match for: {search_query}")
            # Get and save the content only for single results
            success, content = await searcher.get_page_content(output_file)
            if success:
                print("\nVisible text preview:")
                print("-" * 50)
                print(content[:1000])
                print("-" * 50)
            else:
                print(f"Failed to get content: {content}")
        else:
            print(f"Unexpected result type: {result_type}")
    else:
        print(f"Search failed: {result_type}")
    
    await searcher.close()

In [17]:
# For Jupyter notebook
await main()

Multiple results found for: Eiffel Tower
Please refine your search query to get a specific location.


In [19]:
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
import asyncio

class GoogleMapsSearcher:
    def __init__(self):
        self.playwright = None
        self.browser = None
        self.page = None
        self.base_url = "https://www.google.com/maps"
        self.can_proceed = False  # Add a flag to track if we can proceed
        
    async def initialize(self):
        """Initialize Playwright browser and page"""
        self.playwright = await async_playwright().start()
        self.browser = await self.playwright.chromium.launch(headless=False)
        self.page = await self.browser.new_page(viewport={'width': 1920, 'height': 1080})
        
    async def search_place(self, query):
        """
        Search for a place on Google Maps and determine if it's a single result or multiple results
        Args:
            query (str): Place to search for
        Returns:
            tuple: (bool, str) - (success status, 'single' or 'multiple' or error message)
        """
        try:
            await self.page.goto(self.base_url, wait_until="networkidle")
            await self.page.fill('input#searchboxinput', query)
            await self.page.keyboard.press('Enter')
            
            try:
                await self.page.wait_for_selector('div[role="main"]', timeout=10000)
                
                is_multiple = await self.page.evaluate('''
                    () => {
                        const resultsList = document.querySelector('div[role="feed"]');
                        const multipleResults = document.querySelectorAll('.section-result');
                        return !!(resultsList || (multipleResults && multipleResults.length > 1));
                    }
                ''')
                
                if is_multiple:
                    self.can_proceed = False
                    return True, 'multiple'
                else:
                    await self.page.wait_for_selector('h1', timeout=5000)
                    self.can_proceed = True
                    return True, 'single'
                    
            except PlaywrightTimeoutError:
                self.can_proceed = False
                return False, "Timeout waiting for search results"
            
        except Exception as e:
            self.can_proceed = False
            return False, f"Error during search: {str(e)}"

    async def get_page_content(self, output_file="google_maps_content.html"):
        """
        Get the raw HTML content from the current page and save to file
        Args:
            output_file (str): Path to save the HTML content
        Returns:
            tuple: (bool, str) - (success status, content or error message)
        """
        if not self.can_proceed:
            return False, "Cannot proceed with content extraction due to search results state"
            
        try:
            await self.page.wait_for_selector('div[role="main"]', state="visible")
            
            content = await self.page.content()
            visible_text = await self.page.evaluate('''
                () => {
                    const mainContent = document.querySelector('div[role="main"]');
                    return mainContent ? mainContent.innerText : '';
                }
            ''')
            
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(content)
            
            text_file = output_file.replace('.html', '_text.txt')
            with open(text_file, 'w', encoding='utf-8') as f:
                f.write(visible_text)
            
            print(f"HTML content saved to: {output_file}")
            print(f"Visible text saved to: {text_file}")
            
            return True, visible_text
            
        except Exception as e:
            return False, f"Error extracting content: {str(e)}"
        
    async def close(self):
        """Close the browser"""
        if self.browser:
            await self.browser.close()
        if self.playwright:
            await self.playwright.stop()

async def main():
    searcher = GoogleMapsSearcher()
    await searcher.initialize()
    
    search_query = "Eiffel Tower Paris"  # Try with different queries to test both scenarios
    output_file = f"{search_query.replace(' ', '_').lower()}_content.html"
    
    success, result_type = await searcher.search_place(search_query)
    
    if success:
        if result_type == 'multiple':
            print(f"\n❌ Multiple results found for: {search_query}")
            print("Cannot proceed with data extraction.")
            print("Please refine your search query to get a specific location.")
            print("Suggestions:")
            print("- Add city/country name")
            print("- Use the full business name")
            print("- Add street address if known")
        elif result_type == 'single':
            print(f"\n✅ Found exact match for: {search_query}")
            print("Proceeding with data extraction...")
            
            success, content = await searcher.get_page_content(output_file)
            if success:
                print("\nData extraction completed successfully!")
                print("\nVisible text preview:")
                print("-" * 50)
                print(content[:1000])
                print("-" * 50)
            else:
                print(f"\n❌ Failed to extract content: {content}")
        else:
            print(f"\n❌ Unexpected result type: {result_type}")
            print("Cannot proceed with data extraction.")
    else:
        print(f"\n❌ Search failed: {result_type}")
        print("Cannot proceed with data extraction.")
    
    await searcher.close()

# For Jupyter notebook
await main()


✅ Found exact match for: Eiffel Tower Paris
Proceeding with data extraction...
HTML content saved to: eiffel_tower_paris_content.html
Visible text saved to: eiffel_tower_paris_content_text.txt

Data extraction completed successfully!

Visible text preview:
--------------------------------------------------

See photos
Eiffel Tower
Tour Eiffel
4.7
(416,780)
Historical landmark·
Overview
Tickets
Reviews
About

Directions

Save

Nearby

Send to phone

Share
Gustave Eiffel's iconic, wrought-iron 1889 tower, with steps and elevators to observation decks.

 
Sponsored
By Viator
Eiffel Tower Tour by Elevator with Summit Option
₦85,131
 · 
4.8(960)
Free cancellation
 
Admission
About these results

Gives you entry to this place
Eiffel Tower 

Official site
₦22,465.82

Instant confirmation · Mobile ticket
Thrillophilia
₦17,399.94

Mobile ticket
Get Your Tickets
₦17,403.10

Free cancellation · Mobile ticket
 
More
 
 

Av. Gustave Eiffel, 75007 Paris, France

Located in: Champ de