In [1]:

#  Parent-child web scraping involves navigating through a hierarchy of web elements, where a parent element contains one 
#  or more child elements. This scenario often occurs when you click on a link to access detailed information on a webpage 
#  and then navigate back to the previous page to extract more links or information.

#  Identify the Parent Element: The parent element is typically the one containing the links or information you want to 
#  interact with. This could be a list of links, a table, or any other container element.

#  Click on the Parent Element: Use a web scraping tool like Selenium to find and interact with the parent element. You can 
#  locate it using various methods such as XPath, CSS selectors, or class names.

#  Extract Child Elements: Once you've clicked on the parent element and accessed a new page or section containing more 
#  detailed information (the child elements), you can extract the desired information using similar methods as before. 
#  This may involve finding specific elements, text, or attributes within the child elements.

#  Navigate Back to the Parent Page: After extracting the information from the child elements, you typically want to 
#  navigate back to the parent page to continue scraping additional links or information. You can use the browser's 
#  built-in back button or Selenium's back() method to achieve this.
        

In [3]:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

# Configure Selenium WebDriver
driver = webdriver.Chrome()
driver.maximize_window()

# Define a list of state abbreviations...obviously not all states are listed here but this 
# definitely illustrates the point that we are trying to prove
state_abbreviations = ['al', 'az', 'co', 'wy']  # Add more as needed

# Open the main page
main_page_url = 'https://locations.chipotle.com/'
driver.get(main_page_url)

# Iterate through state abbreviations
for state_abbr in state_abbreviations:
    state_url = f"https://locations.chipotle.com/{state_abbr}"
    print(f"Processing state with abbreviation: {state_abbr}, URL: {state_url}")
    
    # Navigate to the state URL
    driver.get(state_url)

    # Define a function to find city links
    def find_city_links():
        return WebDriverWait(driver, 3).until(EC.visibility_of_all_elements_located((By.XPATH, "//a[contains(@class,'Directory-listLink')]")))

    try:
        # Iterate through city links
        for city_link in find_city_links():
            city_name = city_link.text.strip()
            print(f"City: {city_name}")

            # Click on the city link to drill down
            city_link.click()

            # Wait until the location details appear or look for LocationName
            try:
                WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, 'Core-address')))
                # Extract address details
                # Get the HTML content of the page
                city_html_content = driver.page_source

                # Use BeautifulSoup to extract address details directly
                soup = BeautifulSoup(city_html_content, 'html.parser')
                street_address = soup.find(class_='c-address-street-1').get_text(strip=True)
                city = soup.find(class_='c-address-city').get_text(strip=True)
                state = soup.find(class_='c-address-state').get_text(strip=True)
                postal_code = soup.find(class_='c-address-postal-code').get_text(strip=True)
                address_details = f"{street_address}\n{city}, {state} {postal_code}"
                print(f"Address Details: {address_details}")
            except TimeoutException:
                print("No Core-address found. Looking for LocationName.")
                location_name_links = driver.find_elements(By.XPATH, "//a[@class='Teaser-titleLink']")
                if location_name_links:
                    # Create a list to store unique href attributes
                    href_list = []
                    for location_name_link in location_name_links:
                        href = location_name_link.get_attribute("href")
                        if href not in href_list:
                            href_list.append(href)

                    # Iterate through the unique href attributes
                    for href in href_list:
                        # Click on the href link
                        driver.get(href)

                        # Wait until the location details appear
                        WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, 'Core-address')))

                        # Get the HTML content of the page
                        city_html_content = driver.page_source

                        # Use BeautifulSoup to extract address details directly
                        soup = BeautifulSoup(city_html_content, 'html.parser')
                        street_address = soup.find(class_='c-address-street-1').get_text(strip=True)
                        city = soup.find(class_='c-address-city').get_text(strip=True)
                        state = soup.find(class_='c-address-state').get_text(strip=True)
                        postal_code = soup.find(class_='c-address-postal-code').get_text(strip=True)
                        address_details = f"{street_address}\n{city}, {state} {postal_code}"
                        print(f"Address Details: {address_details}")
                        driver.back()

            # Go back to the city page
            driver.back()

            # Wait until the city page loads and re-fetch city links
            WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, 'Directory-listLinkText')))
            city_links = find_city_links()
    except:
        print('finding next city...')

# Close the Selenium WebDriver
driver.quit()


Processing state with abbreviation: al, URL: https://locations.chipotle.com/al
City: ATHENS
Address Details: 1289 US Highway 72 E
Athens, AL 35611
City: AUBURN
Address Details: 346 W Magnolia Ave
Auburn, AL 36832
City: BIRMINGHAM
No Core-address found. Looking for LocationName.
Address Details: 300 20th St S
Birmingham, AL 35233
Address Details: 3220 Morrow Rd
Birmingham, AL 35235
Address Details: 4719 Highway 280
Birmingham, AL 35242
Address Details: 5342 Highway 280
Birmingham, AL 35242
City: CULLMAN
Address Details: 1821 Cherokee Ave SW
Cullman, AL 35055
City: DAPHNE
Address Details: 914 Van Ave
Daphne, AL 36526
City: DECATUR
Address Details: 1109 Beltline Rd SE
Decatur, AL 35601
City: DOTHAN
Address Details: 3473 Ross Clark Drive
Dothan, AL 36303
City: FOLEY
Address Details: 2862 S. McKenzie Street
Foley, AL 36536
City: GARDENDALE
Address Details: 616 Gayle St
Gardendale, AL 35071
City: HOMEWOOD
Address Details: 273 Lakeshore Pkwy
Homewood, AL 35209
City: HOOVER
No Core-address fou