In [1]:

#  Parent-child web scraping involves navigating through a hierarchy of web elements, where a parent element contains one 
#  or more child elements. This scenario often occurs when you click on a link to access detailed information on a webpage 
#  and then navigate back to the previous page to extract more links or information.

#  Identify the Parent Element: The parent element is typically the one containing the links or information you want to 
#  interact with. This could be a list of links, a table, or any other container element.

#  Click on the Parent Element: Use a web scraping tool like Selenium to find and interact with the parent element. You can 
#  locate it using various methods such as XPath, CSS selectors, or class names.

#  Extract Child Elements: Once you've clicked on the parent element and accessed a new page or section containing more 
#  detailed information (the child elements), you can extract the desired information using similar methods as before. 
#  This may involve finding specific elements, text, or attributes within the child elements.

#  Navigate Back to the Parent Page: After extracting the information from the child elements, you typically want to 
#  navigate back to the parent page to continue scraping additional links or information. You can use the browser's 
#  built-in back button or Selenium's back() method to achieve this.
        

In [26]:

import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup

# Define the function to find city links
def find_city_links():
    return WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, "//a[contains(@class,'Directory-listLink')]")))

# Start a new Chrome session
driver = webdriver.Chrome()

# Maximize the browser window
driver.maximize_window()

# Navigate to the website
driver.get("https://locations.chipotle.com/")

# Fetch state abbreviations
# You can easily get all the states you want data for. We are just illustrating 4 states here. 
# It takes a long time to do all 49 states! As of this date, there are no Chipoltes in Alaska! 
state_abbreviations = ["ct", "id", "ok", "wy"]

# Iterate through state abbreviations
for state_abbr in state_abbreviations:
    print(f"State: {state_abbr.upper()}")
    
    # Construct the state URL
    state_url = f"https://locations.chipotle.com/{state_abbr}"
    print(f"State URL: {state_url}")

    # Navigate to the state URL
    driver.get(state_url)
    
    # Fetch city links outside the loop
    city_links = find_city_links()

    # Iterate through city links
    for i in range(len(city_links)):
        # Re-locate the city links before interacting with them
        city_links = find_city_links()

        # Retrieve the city link based on the index
        city_link = city_links[i]

        # Extract the city name
        city_name = city_link.text.strip()
        print(f"City: {city_name}")

        # Scroll to the city link
        actions = ActionChains(driver)
        actions.move_to_element(city_link).perform()

        # Click on the city link to drill down
        city_link.click()
        
        # Wait for the page to load fully
        time.sleep(2)
        
        # Wait until the location details appear or look for LocationName
        try:
            WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, 'Core-address')))
            # Extract address details
            # Get the HTML content of the page
            city_html_content = driver.page_source
                    
            # Use BeautifulSoup to extract address details directly
            soup = BeautifulSoup(city_html_content, 'html.parser')
            street_address = soup.find(class_='c-address-street-1').get_text(strip=True)
            city = soup.find(class_='c-address-city').get_text(strip=True)
            state = soup.find(class_='c-address-state').get_text(strip=True)
            postal_code = soup.find(class_='c-address-postal-code').get_text(strip=True)
            address_details = f"{street_address}\n{city}, {state} {postal_code}"
            print(f"Address Details: {address_details}")
        except TimeoutException:
            # print("No Core-address found. Looking for LocationName.")
            location_name_links = driver.find_elements(By.XPATH, "//a[@class='Teaser-titleLink']")
            if location_name_links:
                # Create a list to store unique href attributes
                href_list = []
                for location_name_link in location_name_links:
                    href = location_name_link.get_attribute("href")
                    if href not in href_list:
                        href_list.append(href)
                
                # Iterate through the unique href attributes
                for href in href_list:
                    # Click on the href link
                    driver.get(href)
                    
                    # Wait for the page to load fully
                    time.sleep(2)
                    
                    # Wait until the location details appear
                    WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, 'Core-address')))
                    
                    # Get the HTML content of the page
                    city_html_content = driver.page_source
                    
                    # Use BeautifulSoup to extract address details directly
                    soup = BeautifulSoup(city_html_content, 'html.parser')
                    street_address = soup.find(class_='c-address-street-1').get_text(strip=True)
                    city = soup.find(class_='c-address-city').get_text(strip=True)
                    state = soup.find(class_='c-address-state').get_text(strip=True)
                    postal_code = soup.find(class_='c-address-postal-code').get_text(strip=True)
                    address_details = f"{street_address}\n{city}, {state} {postal_code}"
                    print(f"Address Details: {address_details}")
                    driver.back()
        
        # Go back to the city page
        driver.back()

        # Wait for the page to load fully
        time.sleep(2)
        
        # Wait until the city page loads and re-fetch city links
        WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, 'Directory-listLinkText')))

# Close the browser session
driver.quit()


State: CT
State URL: https://locations.chipotle.com/ct
City: BRANFORD
Address Details: 2 Commercial Pkwy
Branford, CT 06405
City: BRIDGEPORT
Address Details: 275 E Main St
Bridgeport, CT 06608
City: BRISTOL
Address Details: 1444 Farmington Ave
Bristol, CT 06010
City: CANTON
Address Details: 110 Albany Tpke
Canton, CT 06019
City: DANBURY
Address Details: 115 Mill Plain Rd
Danbury, CT 06811
Address Details: 7 Backus Ave
Danbury, CT 06810
Address Details: 81 Newtown Rd
Danbury, CT 06810
City: DARIEN
Address Details: 71 Post Rd
Darien, CT 06820
Address Details: I-95 Darien Nb Service Plaza
Darien, CT 06820
City: ENFIELD
Address Details: 7 Hazard Ave
Enfield, CT 06082
City: FAIRFIELD
Address Details: 340 Grasmere Ave
Fairfield, CT 06824
City: GLASTONBURY
Address Details: 140 Glastonbury Blvd
Glastonbury, CT 06033
City: GROTON
Address Details: 258 Route 12
Groton, CT 06340
City: HAMDEN
Address Details: 2165 Dixwell Ave
Hamden, CT 06514
City: MANCHESTER
Address Details: 48 Hale Rd
Manchester,