In [1]:
import os
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import ddddocr
import time

# 设置保存商标图形的目录
save_dir = r'D:\UM\CISC4000\logos'
os.makedirs(save_dir, exist_ok=True)

# CSV 文件路径
csv_file_path = os.path.join(save_dir, 'logos_info.csv')

# 设置 Chrome 选项
chrome_options = Options()
# chrome_options.add_argument("--headless")  # 如果你不想看到浏览器打开，请取消注释这行代码

# 初始化 Selenium WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.get('https://www.dsedt.gov.mo/zh_MO/web/public/pg_es_ae_qe_trademark#briefResultPanel')

# 等待 10 秒，让用户手动关闭弹窗
print("等待 10 秒，请关闭弹窗...")
time.sleep(10)

# 添加1到29的商标图形要素编码
for i in range(1, 30):
    # 找到输入框并输入编码
    element_input = driver.find_element(By.ID, "A9158:inputViennaCode")
    element_input.clear()
    element_input.send_keys(str(i))

    # 点击加入按钮
    add_button = driver.find_element(By.ID, "A9158:j_idt68")
    add_button.click()

    # 等待加入成功
    time.sleep(1)

# 获取验证码图片
captcha_img_element = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.XPATH, "//div[@id='A9158:dsecaptcha']//img"))
)
captcha_img = captcha_img_element.screenshot_as_png

# 使用 ddddocr 识别验证码
ocr = ddddocr.DdddOcr()
captcha_code = ocr.classification(captcha_img)

# 将验证码填入输入框
captcha_input = driver.find_element(By.ID, "A9158:dsecaptchaText")
captcha_input.send_keys(captcha_code)

# 提交表单或点击查询按钮
search_button = driver.find_element(By.ID, "A9158:j_idt88")
search_button.click()

# 等待结果加载
WebDriverWait(driver, 60).until(
    EC.presence_of_element_located((By.XPATH, "//div[@id='A9158:briefResultTable_paginator_top']"))
)

# 创建 CSV 文件并写入表头
with open(csv_file_path, mode='w', newline='', encoding='utf-8') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(['商标编号', '申请人', '商标文字', '状态', '图像路径'])  # CSV 表头

    # 初始化下载计数器
    logo_index = 1

    # 遍历所有页面并下载商标图案及信息
    while True:
        # 获取当前页面的所有商标信息
        rows = driver.find_elements(By.XPATH, "//*[@id='A9158:briefResultTable_data']/tr")
        
        for row in rows:
            # 获取商标信息
            try:
                trademark_number = row.find_element(By.XPATH, "./td[1]").text  # 商标编号
                applicant = row.find_element(By.XPATH, "./td[2]").text  # 申请人
                trademark_text = row.find_element(By.XPATH, "./td[3]").text  # 商标文字
                status = row.find_element(By.XPATH, "./td[4]").text  # 状态
                classify = row.find_element(By.XPATH, "./td[5]").text  # 分类编号
                logo_img_index = f'logo_{logo_index}'
                logo_path = os.path.join(save_dir, f'logo_{logo_index}.png')# 保存商标图像
                logo_img = row.find_element(By.XPATH, ".//img[contains(@id, ':briefResultTable:')]")
                logo_img.screenshot(logo_path)

                # 写入 CSV 文件
                writer.writerow([trademark_number, applicant, trademark_text, status, classify,logo_img_index])
                logo_index += 1
            except Exception as e:
                print(f"无法提取信息或保存图片 {logo_index} - 错误: {e}")

        # 尝试找到并点击下一页按钮
        try:
            next_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, "//span[@class='ui-paginator-next ui-state-default ui-corner-all']"))
            )
            if 'ui-state-disabled' in next_button.get_attribute('class'):
                print("下一页按钮被禁用，已到达最后一页。")
                break
            next_button.click()
            time.sleep(3)  # 等待页面加载
        except Exception as e:
            print("无法找到下一页按钮或已到达最后一页:", str(e))
            break

# 关闭浏览器
driver.quit()

print("所有商标图形已保存至:", save_dir)
print("商标信息已保存至:", csv_file_path)

WebDriverException: Message: Can not connect to the Service /Users/xiaoyuanze/.wdm/drivers/chromedriver/mac64/128.0.6613.137/chromedriver-mac-arm64/chromedriver


In [2]:
pip install selenium==3.141.0

Collecting selenium==3.141.0
  Using cached selenium-3.141.0-py2.py3-none-any.whl.metadata (6.6 kB)
Using cached selenium-3.141.0-py2.py3-none-any.whl (904 kB)
Installing collected packages: selenium
  Attempting uninstall: selenium
    Found existing installation: selenium 4.24.0
    Uninstalling selenium-4.24.0:
      Successfully uninstalled selenium-4.24.0
Successfully installed selenium-3.141.0
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install ddddocr

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install webdriver-manager

Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install --upgrade selenium

Collecting selenium
  Using cached selenium-4.24.0-py3-none-any.whl.metadata (7.1 kB)
Using cached selenium-4.24.0-py3-none-any.whl (9.6 MB)
Installing collected packages: selenium
  Attempting uninstall: selenium
    Found existing installation: selenium 3.141.0
    Uninstalling selenium-3.141.0:
      Successfully uninstalled selenium-3.141.0
Successfully installed selenium-4.24.0
Note: you may need to restart the kernel to use updated packages.
