In [1]:
import requests
import pandas as pd
import random
from datetime import datetime
from selenium import webdriver
# from selenium.webdriver.chrome.options import Options
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
from amazoncaptcha import AmazonCaptcha
import time
import re
import os

In [2]:
# Definir la ruta de la carpeta y el archivo
folder_path = "../output"
csv_file = "data.csv"
csv_path = os.path.join(folder_path, csv_file)

# Verificar si la carpeta existe, si no, crearla
if not os.path.exists(folder_path):
    os.makedirs(folder_path)
    print(f"Carpeta creada: {folder_path}")

# Si el archivo CSV no existe, crear un DataFrame vacío con las columnas adecuadas
if not os.path.exists(csv_path):
    data = pd.DataFrame(columns=["Product_Code", "Title", "Vendor_Name", "Rating", "Num_Ratings", "Ranking", "Category", "Price", "Quantity", "Min_Price_Comp", "Max_Price_Comp", "Avg_Price_Comp", "Date"])
    data.to_csv(csv_path, sep='|', index=False) 
else:
    data = pd.read_csv(csv_path, sep='|')

In [None]:
# Configurar las opciones de webdriver
options = Options()
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
options.binary_location = 'C:\\Program Files\\Mozilla Firefox\\firefox.exe'
service = Service('../others/geckodriver.exe')
driver = webdriver.Firefox(service=service, options=options)

driver.maximize_window()

# Abrir la página de Amazon
url = "https://www.amazon.com/"
driver.get(url)
time.sleep(10)

try:
    image = driver.find_element(By.TAG_NAME, "img")
    image_url = image.get_attribute("src")
    print("URL de la imagen:", image_url)

    response = requests.get(image_url, stream=True)
    image_path = "../others/captured_image.jpg"
    with open(image_path, 'wb') as file:
        for chunk in response.iter_content(1024):
            file.write(chunk)
    print(f"Imagen descargada correctamente en: {image_path}")

    captcha_value = AmazonCaptcha(image_path).solve()
    print(f"El texto del captcha es: {captcha_value}")

    captcha_input = driver.find_element(By.ID, "captchacharacters")
    for char in captcha_value:
        captcha_input.send_keys(char)
        time.sleep(random.uniform(0, 1))
    print("Captcha ingresado.")
    captcha_input.send_keys(Keys.RETURN)
except:
    pass

print("Ingresaste a Amazon!")
time.sleep(10)

try:
    location_link = driver.find_element(By.ID, "nav-global-location-popover-link")
    location_link.click()
    time.sleep(5)

    postal_code = "33101"
    postal_code_input = driver.find_element(By.XPATH, "//input[@autocomplete='postal-code']")
    for char in postal_code:
        postal_code_input.send_keys(char)
        time.sleep(random.uniform(0, 1))

    postal_code_button = driver.find_element(By.XPATH, "//span[@id='GLUXZipUpdate']")
    postal_code_button.click()
    print(f"Nos geolocalizamos en Miami.")
    time.sleep(10)
except:
    pass

# Lista de productos con sus atributos
product_list = [
    {"id": "B09G92Y196", "item": "0"},
    {"id": "B0DNPY26QB", "item": "0"},
    {"id": "B0DGGHCK98", "item": "0"},
    {"id": "B0D9PT21MS", "item": "0"},
    {"id": "B08DLWSN75", "item": "1"},
    {"id": "B076JLCN2D", "item": "0"},
    {"id": "B087CBJ465", "item": "0"},
    {"id": "B0BSVJ9T76", "item": "0"},
    {"id": "B09FL6YR9L", "item": "1"},
    {"id": "B0B2RFPJY2", "item": "1"},
    {"id": "B09JG5H9HS", "item": "0"},
]

random.shuffle(product_list)

for product in product_list:
    product_id = product["id"]
    product_item_selector = product["item"]
    print(f"Ingresando al producto: {product_id}")

    try:
        product_url = url + f'gp/product/{product_id}'
        driver.get(product_url)
        time.sleep(20)

        product_code = product_id
        title = vendor_name = rating_avg = num_ratings = ranking = category = price = quantity = min_price = max_price = avg_price = "N/A"
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

        driver.execute_script(f"window.scrollBy(0, 250);")
        time.sleep(10)

        try:
            vendor_name_link = driver.find_element(By.ID, "sellerProfileTriggerId")
            vendor_name = vendor_name_link.text
        except:
            vendor_name = 'Amazon.com'
        print(f"El nombre del vendedor es {vendor_name}.")

        rating_div = driver.find_element(By.ID, "averageCustomerReviews")
        rating = rating_div.text.split("\n")
        rating_avg = rating[0]
        num_ratings = rating[1].split(" ")[0]
        print(f"El rating del producto es {rating_avg} y tiene {num_ratings} reviews.")

        sales_ranking_span = driver.find_element(By.XPATH, "//span[starts-with(normalize-space(text()), '#') and contains(text(), 'in')]")
        sales_ranking_text = sales_ranking_span.text
        match_ranking = re.search(r"#([\d,]+)", sales_ranking_text)
        ranking_str = match_ranking.group(1).replace(".", "").replace(",", "").strip()
        ranking = int(ranking_str)
        match_category = re.search(r"in (.*?)(?: \(|$)", sales_ranking_text)
        category = match_category.group(1).strip()
        print(f"El ranking del producto es {ranking} en la categoría {category}.")

        try:
            selector = f"input.a-button-input[name='{product_item_selector}']"
            elemento = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
            elemento.click()
            time.sleep(10)

            current_url = driver.current_url
            match = re.search(r"product/([^/?]+)([/?]|$)", current_url)
            product_code = match.group(1)
            print(f"El subproducto seleccionado es el {product_code}")
        except:
            print(f"No se encontró selector de tamaño con name='{product_item_selector}'")

        title_h1 = driver.find_element(By.ID, "title")
        title = title_h1.text
        print(f"El título de la publicación es {title}.")

        add_to_cart_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//*[contains(@id, 'add-to-cart-button')]"))
        )
        add_to_cart_button.click()
        print("Botón 'Añadir al carrito' encontrado.")

        try:
            dont_add_span = driver.find_element(By.ID, "attachSiNoCoverage-announce")
            print("Sugiere protección de la compra.")
            window_size = driver.get_window_size()
            center_x = window_size['width'] // 4
            center_y = window_size['height'] // 4
            actions = ActionChains(driver)
            actions.move_by_offset(center_x, center_y).click().perform()
            print("Se continua sin aceptar protección de la compra.")
            time.sleep(5)
        except:
            pass

        cart_url = url + 'cart'
        driver.get(cart_url)
        time.sleep(10)

        product_price_span = driver.find_element(By.XPATH, "//span[contains(@class, 'sc-product-price')]")
        product_price = product_price_span.text
        print(f"El precio del producto es {product_price}.")

        while True:
            try:
                quantity_span = driver.find_element(By.XPATH, "//span[@data-a-selector='value']")
                quantity = quantity_span.text

                increment_button = driver.find_element(By.XPATH, "//button[@data-a-selector='increment']")
                increment_button.click()
                time.sleep(4)

                quantity_span = driver.find_element(By.XPATH, "//span[@data-a-selector='value']")
                quantity_after = quantity_span.text

                if quantity == quantity_after:
                    delete_button = driver.find_element(By.XPATH, "//input[@data-feature-id='item-delete-button']")
                    delete_button.click()
                    print("Botón 'Eliminar' clickeado.")
                    print(f"Este producto sólo tiene {quantity} unidades.")
                    break
            except NoSuchElementException:
                try:
                    quantity_input = driver.find_element(By.XPATH, "//input[@type='number']")
                    quantity_input.send_keys("000")
                    print("Se ingresó el valor 1000 en el campo para ingreso de cantidades.")

                    try:
                        update_link = driver.find_element(By.XPATH, "//a[@data-action='update']")
                        update_link.click()
                        print("Botón de actualizar clickeado.")
                        time.sleep(2)
                        cart_h2 = driver.find_element(By.ID, "sc-active-items-header")
                        cart_h2.click()
                        time.sleep(2)
                        quantity_input = driver.find_element(By.XPATH, "//input[@type='number']")
                        quantity = quantity_input.get_attribute("value")

                        delete_button = driver.find_element(By.XPATH, "//input[@data-feature-id='item-delete-button']")
                        delete_button.click()
                        print("Botón 'Eliminar' clickeado.")
                        print(f"Este producto sólo tiene {quantity} unidades.")
                        break
                    except NoSuchElementException:
                        print("No se encontró el botón de actualizar.")
                except NoSuchElementException:
                    print("No se encontró ningún campo para ingreso de cantidades")

        search_box = driver.find_element(By.ID, "twotabsearchtextbox")
        search_box.send_keys(title)
        search_box.send_keys(Keys.RETURN)
        time.sleep(5)

        price_list = []
        price_spans = driver.find_elements(By.CLASS_NAME, "a-price")
        for span in price_spans:
            price = span.text.replace("\n", ".").replace(",", "").replace("US$", "").replace("$", "")
            try:
                price = float(price.strip())
                price_list.append(price)
            except:
                pass

        if price_list:
            min_price = min(price_list)
            max_price = max(price_list)
            avg_price = round(sum(price_list) / len(price_list), 2)

        print(f"El precio mínimo de los productos relacionados es {min_price}.")
        print(f"El precio máximo de los productos relacionados es {max_price}.")
        print(f"El precio promedio de los productos relacionados es {avg_price}.")

        new_row = {
            "Product_Code": product_code,
            "Title": title,
            "Vendor_Name": vendor_name,
            "Rating": rating_avg,
            "Num_Ratings": num_ratings,
            "Ranking": ranking,
            "Category": category,
            "Price": product_price,
            "Quantity": quantity,
            "Min_Price_Comp": min_price,
            "Max_Price_Comp": max_price,
            "Avg_Price_Comp": avg_price,
            "Date": timestamp,
        }

        data = pd.concat([data, pd.DataFrame([new_row])], ignore_index=True)
        data.to_csv(csv_path, sep='|', index=False)
        print(f"Datos del producto {product_id} guardados en el archivo: {csv_path}.")

    except Exception as e:
        print(f"Datos del producto {product_id} no pudieron ser capturados.")
        print(f"Error: {e}")
        continue

# Cerrar el navegador
driver.quit()

URL de la imagen: https://fls-na.amazon.com/1/batch/1/OP/ATVPDKIKX0DER:145-1446480-6509039:49CZWADVKR3KXSC0SVX7$uedata=s:%2Frd%2Fuedata%3Fstaticb%26id%3D49CZWADVKR3KXSC0SVX7:0
Imagen descargada correctamente en: ../others/captured_image.jpg
El texto del captcha es: Not solved
Ingresaste a Amazon!
Nos geolocalizamos en Miami.
Ingresando al producto: B09JG5H9HS
El nombre del vendedor es RAYANCO PRODUCTS.
El rating del producto es 4.5 y tiene 4,705 reviews.
El ranking del producto es 22567 en la categoría Tools & Home Improvement.
No se encontró selector de tamaño con name='0'
El título de la publicación es Star Projector Night Lights,Tiktok Astronaut Nebula Galaxy Lights for Bedroom,Gaming Room Décor Aesthetic,Remote Control Timing and 360°Rotation Magnetic Head.
Botón 'Añadir al carrito' encontrado.
El precio del producto es $19.99.
