In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import TimeoutException

driver = webdriver.Chrome()

url = 'https://www.art.com'
driver.get(url)

wait = WebDriverWait(driver, 10)

    
artists_tab = driver.find_element(By.XPATH, '//*[@title="Artists"]')

dropdown_menu = artists_tab.find_element(By.XPATH, '//*[@id="menu-list-:R1iqlblb8rambpH1:"]/div')

artist_links = dropdown_menu.find_elements(By.XPATH, './/a[contains(@class, "chakra-link")]')

artist_urls = [link.get_attribute('href') for link in artist_links]

for url in artist_urls:
    print(url)



https://www.art.com/gallery/id--b24807/american-art-posters.htm
https://www.art.com/gallery/id--a55/andrew-wyeth-posters.htm
https://www.art.com/gallery/id--a76/andy-warhol-posters.htm
https://www.art.com/gallery/id--a1129/currier-ives-posters.htm
https://www.art.com/gallery/id--a50/edward-hopper-posters.htm
https://www.art.com/gallery/id--a514/georgia-o-keeffe-posters.htm
https://www.art.com/gallery/id--a5/jackson-pollock-posters.htm
https://www.art.com/gallery/id--a27866/jean-michel-basquiat-posters.htm
https://www.art.com/gallery/id--a23630/john-james-audubon-posters.htm
https://www.art.com/gallery/id--a82/keith-haring-posters.htm
https://www.art.com/gallery/id--a34/maxfield-parrish-posters.htm
https://www.art.com/gallery/id--a32/norman-rockwell-posters.htm
https://www.art.com/gallery/id--a79/roy-lichtenstein-posters.htm
https://www.art.com/gallery/id--a77/wayne-thiebaud-posters.htm
https://www.art.com/gallery/id--b40341/modern-masters-posters.htm
https://www.art.com/gallery/id--a70

In [21]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time

class ArtComScraper:
    def __init__(self, driver):
        self.driver = driver
        self.wait = WebDriverWait(driver, 20)

    def open_url(self, url):
        self.driver.get(url)

    def hover_and_get_artist_links(self):
        try:
            artists_tab = self.wait.until(
                EC.presence_of_element_located((By.XPATH, '//*[@title="Artists"]'))
            )
            ActionChains(self.driver).move_to_element(artists_tab).perform()

            dropdown_menu = self.wait.until(
                EC.visibility_of_element_located((By.ID, 'menu-list-:R1iqlblb8rambpH1:'))
            )

            artist_links = dropdown_menu.find_elements(By.XPATH, './/a[contains(@class, "chakra-link")]')

            artist_names = []
            artist_urls = []
            for link in artist_links:
                title = link.get_attribute('title')
                href = link.get_attribute('href')
                if href and title and not any(heading in title for heading in ["American Artists", "Modern Masters", "Rising Stars", "Vintage Artists"]):
                    artist_names.append(title)
                    artist_urls.append(href)

            return artist_urls, artist_names

        except Exception as e:
            print(f"Error in hover_and_get_artist_links: {e}")
            return [], []

    def scrape_product_details_from_artist_page(self):
        try:
            product_elements = self.driver.find_elements(By.XPATH, '//div[contains(@class, "product-tile")]')

            product_details_list = []
            for product in product_elements:
                try:
                    title = product.find_element(By.XPATH, './/h2[contains(@class, "chakra-text")]').text
                    artist = product.find_element(By.XPATH, './/a[contains(@class, "chakra-link")]//p[contains(@class, "chakra-text")]').text
                    product_type = product.find_element(By.XPATH, './/p[contains(@class, "chakra-text") and contains(@class, "css-4djx3b")]').text
                    product_size = product.find_element(By.XPATH, './/p[contains(@class, "chakra-text") and contains(@class, "css-8u139p")]').text
                    price = product.find_element(By.XPATH, './/p[contains(@class, "chakra-text") and contains(@class, "css-1ljytki")]').text

                    product_details_list.append({
                        'title': title,
                        'artist': artist,
                        'type': product_type,
                        'size': product_size,
                        'price': price
                    })

                except Exception as e:
                    print(f"Error processing product details: {e}")

            return product_details_list
        except Exception as e:
            print(f"Error scraping product details from artist page: {e}")
            return []

    def scrape_all_pages(self, base_url):
        all_product_details = []
        self.open_url(base_url)

        while True:
            
            product_details = self.scrape_product_details_from_artist_page()
            all_product_details.extend(product_details)

           
            print(f"Current URL: {self.driver.current_url}")

                
            next_button = self.wait.until(
                EC.presence_of_element_located((By.XPATH, '//a[@aria-label="Next Page"]'))
                )

            if next_button:
                print("Next Page button found.")

                     
                if next_button.is_displayed() and next_button.is_enabled():
                    print("Clicking Next Page button...")
                    next_button.click()
                    self.wait.until(EC.staleness_of(next_button)) 
                    time.sleep(2) 
                else:
                    print("Next page button is not enabled or visible. Ending scraping.")
                    break
            else:
                print("Next Page button not found. Ending scraping.")
                break


        return all_product_details

if __name__ == "__main__":
    driver = webdriver.Chrome()
    scraper = ArtComScraper(driver)

    try:
        scraper.open_url('https://www.art.com')
        artist_urls, artist_names = scraper.hover_and_get_artist_links()
        
        if not artist_urls:
            print("No artist URLs found.")
        else:
            for artist_url, artist_name in zip(artist_urls, artist_names):
                print(f"Scraping artist: {artist_name} (URL: {artist_url})")
                print("__________________________________________________________________________")

                product_details = scraper.scrape_all_pages(artist_url)
                for details in product_details:
                    print(f"Title: {details['title']}, Artist: {details['artist']}, Type: {details['type']}, Size: {details['size']}, Price: {details['price']}")

    except Exception as e:
        print(f"An error occurred in the main execution: {e}")


Scraping artist: Andrew Wyeth (URL: https://www.art.com/gallery/id--a55/andrew-wyeth-posters.htm)
__________________________________________________________________________
Current URL: https://www.art.com/gallery/id--a55/andrew-wyeth-posters.htm
An error occurred in the main execution: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7F1159412+29090]
	(No symbol) [0x00007FF7F10CE239]
	(No symbol) [0x00007FF7F0F8B1DA]
	(No symbol) [0x00007FF7F0FDEFE7]
	(No symbol) [0x00007FF7F0FDF23C]
	(No symbol) [0x00007FF7F10297C7]
	(No symbol) [0x00007FF7F100672F]
	(No symbol) [0x00007FF7F10265A2]
	(No symbol) [0x00007FF7F1006493]
	(No symbol) [0x00007FF7F0FD09D1]
	(No symbol) [0x00007FF7F0FD1B31]
	GetHandleVerifier [0x00007FF7F147871D+3302573]
	GetHandleVerifier [0x00007FF7F14C4243+3612627]
	GetHandleVerifier [0x00007FF7F14BA417+3572135]
	GetHandleVerifier [0x00007FF7F1215EB6+801862]
	(No symbol) [0x00007FF7F10D945F]
	(No symbol) [0x00007FF7F10D4FB4]
	(No symbol) [0x00007FF7F10D5140]
	(No symbol