In [14]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
}

sitemap_url = 'https://printmine.in/sitemap_collections_1.xml'

response = requests.get(sitemap_url, headers=headers)

soup = BeautifulSoup(response.content, 'xml')
loc_elements = soup.find_all('loc')

collection_urls = [loc.text for loc in loc_elements if loc.text.startswith('https://printmine.in/collections/')]

products_data = []

for url in collection_urls:
    print(f"Scraping collection: {url}")
    current_url = url
    while current_url:
        
        response_collection = requests.get(current_url, headers=headers)
        soup_collection = BeautifulSoup(response_collection.content, 'html.parser')

        products_info_div = soup_collection.find_all('div', class_='t4s-product-info')

        for products_info in products_info_div:
            product_info = {}

            title_tag = products_info.find('h3', class_='t4s-product-title')
            if title_tag:
                product_info['title'] = title_tag.text.strip()

            price_tag = products_info.find('div', class_='t4s-product-price')
            if price_tag:
                discounted_price_tag = price_tag.find('ins')

                if discounted_price_tag:
                    product_info['discounted_price'] = discounted_price_tag.text.strip()

            badge_tag = products_info.find('span', class_='t4s-badge-sale')
            if badge_tag:
                product_info['discount'] = badge_tag.text.strip()

            rating_tag = products_info.find('div', class_='loox-rating')
            if rating_tag:
                rating = rating_tag.get('data-rating')
                product_info['rating'] = rating

            products_data.append(product_info)
        pagination = soup_collection.find('nav', class_='t4s-pagination')
        if pagination:
            next_page = pagination.find('a', class_='pagination__item--prev pagination__item-arrow')
            if next_page:
                href = next_page.get('href')
                if href:
                    current_url = 'https://printmine.in' + href
                else:
                    current_url = None
            else:
                current_url = None
        else:
            current_url = None
            
        time.sleep(1)
df = pd.DataFrame(products_data)

df.to_csv('products_info.csv')

print("Scraping completed. Data saved to 'products_info.csv'.")

Scraping collection: https://printmine.in/collections/personalized-corporate-gifts-for-all-occasions-shop-printmine-in
Scraping collection: https://printmine.in/collections/illusion-lamps
Scraping collection: https://printmine.in/collections/corporate-awards
Scraping collection: https://printmine.in/collections/personalized-photo-frame-wall-hanging-birthday-anniversary-gift
Scraping collection: https://printmine.in/collections/personalized-pens
Scraping collection: https://printmine.in/collections/3d-miniature-table-top-best-birthday-gift-printmine
Scraping collection: https://printmine.in/collections/personalized-sipper-bottle
Scraping collection: https://printmine.in/collections/personalized-desk-organizers
Scraping collection: https://printmine.in/collections/personalized-keychain-keyrings
Scraping collection: https://printmine.in/collections/buy-religious-gifts
Scraping collection: https://printmine.in/collections/gifts-for-boyfriend
Scraping collection: https://printmine.in/collec

In [15]:
df

Unnamed: 0,title,discounted_price,rating
0,Personalized Golden Pen and Keychain Set | Bes...,Rs. 549.00,5.0
1,Personalized Photo Engraved 3D Illusion Lamp D...,"Rs. 1,299.00",4.8
2,Personalized Photo Engraved 3D Illusion Lamp D...,"Rs. 1,099.00",4.9
3,Couple Photo LED 3D Illusion Lamp | Valentine'...,"Rs. 1,199.00",5.0
4,Personalized Photo Engraved 3D Illusion Lamp D...,"Rs. 1,299.00",4.7
...,...,...,...
201,Croc Style Men's Wallet | Customized With Name...,Rs. 849.00,4.3
202,Pattern Men's Wallet | Customized With Name & ...,Rs. 899.00,
203,Zipper Men's Wallet | Customized With Name & C...,Rs. 899.00,4.0
204,Textured Men's Wallet | Customized With Name &...,Rs. 899.00,4.5


In [22]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time


driver = webdriver.Chrome()

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
}

sitemap_url = 'https://printmine.in/sitemap_collections_1.xml'

driver.get(sitemap_url,headers=headers)
time.sleep(2) 

soup = BeautifulSoup(driver.page_source, 'xml')
loc_elements = soup.find_all('loc')
collection_urls = [loc.text for loc in loc_elements if loc.text.startswith('https://printmine.in/collections/')]

products_data = []

for url in collection_urls:
    print(f"Scraping collection: {url}")
    driver.get(url)
    while True:
        try:
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 't4s-product-info'))
            )

            soup_collection = BeautifulSoup(driver.page_source, 'html.parser')
            products_info_div = soup_collection.find_all('div', class_='t4s-product-info')

            for products_info in products_info_div:
                product_info = {}
                title_tag = products_info.find('h3', class_='t4s-product-title')
                if title_tag:
                    product_info['title'] = title_tag.text.strip()

                price_tag = products_info.find('div', class_='t4s-product-price')
                if price_tag:
                    discounted_price_tag = price_tag.find('ins')
                    if discounted_price_tag:
                        product_info['discounted_price'] = discounted_price_tag.text.strip()

                badge_tag = products_info.find('span', class_='t4s-badge-sale')
                if badge_tag:
                    product_info['discount'] = badge_tag.text.strip()

                rating_tag = products_info.find('div', class_='loox-rating')
                if rating_tag:
                    rating = rating_tag.get('data-rating')
                    product_info['rating'] = rating

                products_data.append(product_info)

            
            try:
                next_button = driver.find_element(By.CSS_SELECTOR, 'a.pagination__item--next')
                if next_button:
                    next_button.click()
                    time.sleep(2) 
                else:
                    break
            except:
                break
        
        except Exception as e:
            print(f"An error occurred: {e}")
            break

driver.quit()

df = pd.DataFrame(products_data)
df.to_csv('products_info.csv', index=False)

print("Scraping completed. Data saved to 'products_info.csv'.")

Scraping collection: https://printmine.in/collections/personalized-corporate-gifts-for-all-occasions-shop-printmine-in
Scraping collection: https://printmine.in/collections/illusion-lamps
Scraping collection: https://printmine.in/collections/corporate-awards
An error occurred: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF78131B5D2+29090]
	(No symbol) [0x00007FF78128E689]
	(No symbol) [0x00007FF78114B1CA]
	(No symbol) [0x00007FF78119EFD7]
	(No symbol) [0x00007FF78119F22C]
	(No symbol) [0x00007FF7811E97F7]
	(No symbol) [0x00007FF7811C672F]
	(No symbol) [0x00007FF7811E65D9]
	(No symbol) [0x00007FF7811C6493]
	(No symbol) [0x00007FF7811909B1]
	(No symbol) [0x00007FF781191B11]
	GetHandleVerifier [0x00007FF781638C5D+3295277]
	GetHandleVerifier [0x00007FF781684843+3605523]
	GetHandleVerifier [0x00007FF78167A707+3564247]
	GetHandleVerifier [0x00007FF7813D6EB6+797318]
	(No symbol) [0x00007FF78129980F]
	(No symbol) [0x00007FF7812953F4]
	(No symbol) [0x00007FF781295580]
	(No symbol) [0x00007

Unnamed: 0,title,discounted_price,rating
0,Personalized Golden Pen and Keychain Set | Bes...,Rs. 549.00,5.0
1,Personalized Photo Engraved 3D Illusion Lamp D...,"Rs. 1,299.00",4.8
2,Personalized Photo Engraved 3D Illusion Lamp D...,"Rs. 1,099.00",4.9
3,Couple Photo LED 3D Illusion Lamp | Valentine'...,"Rs. 1,199.00",5.0
4,Personalized Photo Engraved 3D Illusion Lamp D...,"Rs. 1,299.00",4.7
...,...,...,...
201,Croc Style Men's Wallet | Customized With Name...,Rs. 849.00,4.3
202,Pattern Men's Wallet | Customized With Name & ...,Rs. 899.00,
203,Zipper Men's Wallet | Customized With Name & C...,Rs. 899.00,4.0
204,Textured Men's Wallet | Customized With Name &...,Rs. 899.00,4.5
