In [8]:
import os
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from PIL import Image
from fpdf import FPDF
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager

# --- Configuration ---
URL = "https://drive.google.com/file/d/1HoPxGnMhIGcyH3FB6bScTeMty_v3U139/view"
IMG_DIR = "images"
PDF_FILE = "diode p4.pdf"

# --- Setup Selenium ---
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
# driver = webdriver.Chrome(service=Service(r"C:\Chrome_Driver\chromedriver.exe"), options=chrome_options)
driver.get(URL)
time.sleep(5)  # wait for page to load


In [9]:

# --- Step 2: Find divs ---
# divs_1 = driver.find_elements(By.XPATH, "//div[@class='ndfHFb-c4YZDc-cYSp0e-DARUcf']")
divs_2 = driver.find_elements(By.XPATH, "//div[starts-with(@style, 'padding-bottom:')]")
# divs3 = driver.find_elements(By.XPATH, "//div[starts-with(@class, 'ndfHFb') and starts-with(@style, 'padding-bottom:')]")


filtered_divs = divs_2
# print(f"Found {len(filtered_divs)} divs")
# len(divs_1), len(divs_2), len(divs3)


In [10]:

# --- Step 3: Prepare directory ---
os.makedirs(IMG_DIR, exist_ok=True)
saved_count = 0

# --- Step 4: Process each div ---
for idx, div in enumerate(filtered_divs, start=1):
    driver.execute_script("arguments[0].scrollIntoView();", div)
    time.sleep(0.3)  # give it a moment to load
    try:
        img_tag = div.find_element("tag name", "img")
        src = img_tag.get_attribute("src")

        if src and src.startswith("blob:https://drive.google.com/"):
            img_id = src.split("/")[-1]
            print(f"[{idx}] Found blob image id: {img_id}")

            # Extract image via JavaScript blob conversion
            # Create a canvas in JS, draw image, return base64 data
            script = """
                const img = arguments[0];
                const canvas = document.createElement('canvas');
                canvas.width = img.naturalWidth;
                canvas.height = img.naturalHeight;
                const ctx = canvas.getContext('2d');
                ctx.drawImage(img, 0, 0);
                return canvas.toDataURL('image/png');
            """
            img_base64 = driver.execute_script(script, img_tag)

            # Decode base64 and save
            if img_base64.startswith("data:image"):
                import base64
                data = base64.b64decode(img_base64.split(",")[1])
                img_path = os.path.join(IMG_DIR, f"{idx:03d}.png")
                with open(img_path, "wb") as f:
                    f.write(data)
                saved_count += 1
        else:
            print(f"[{idx}] No valid blob image found.")
    except Exception as e:
        print(f"[{idx}] Error: {e}")

print(f"\n✅ {saved_count} images saved in '{IMG_DIR}' folder.\n")


[1] Found blob image id: 65f8ce2f-a1c0-40ed-af2c-ed7bf1b1b64f
[2] Found blob image id: 48a89d7f-3e96-4cbe-965d-53385e637ad5
[3] Found blob image id: 3c9d37fb-0c98-41e5-bbde-b52dcdcf764c
[4] Found blob image id: c2158e3c-664a-4ad8-90a3-00d3506f313c
[5] Found blob image id: 2b839fc8-b4a1-48be-8394-720a92cdac63
[6] Found blob image id: 5cca1f7f-c8de-4e1f-a7e6-3c5b104c72d4
[7] Found blob image id: 8c6fa643-5425-4d5e-aac0-daa6685f2014
[8] Found blob image id: 708c9579-9de8-4236-a603-48a31d592bd2
[9] Found blob image id: 475b4bd2-d2b5-4cfb-983d-a3132bd8b240
[10] Found blob image id: 32aaf109-258a-48bc-a3a1-0925b4b58d87
[11] Found blob image id: 94446bfe-981d-46f5-9b82-1c25366aaf25
[12] Found blob image id: b8834a1b-1591-4fec-8902-dfd27adb0667
[13] Found blob image id: 730fcdb6-f669-455d-a6b6-9d8f104b4ea3
[14] Found blob image id: ed4828a1-cb39-4f4c-a4f1-e7a4ad7299a7
[15] Found blob image id: 293ff931-03f9-4218-90dd-54e73f1d630d
[16] Found blob image id: 1ef3d46d-93f0-4173-8af3-a1d4e275e169
[

In [12]:

# --- Cleanup ---
driver.quit()


In [11]:
# --- Step 5: Create PDF with actual image size ---
# saved_count = len(os.listdir(IMG_DIR))
if saved_count > 0:
    pdf = FPDF(unit="pt")  # Use points as unit for precise sizing
    for i in range(1, saved_count + 1):
        img_path = os.path.join(IMG_DIR, f"{i:03d}.png")
        if os.path.exists(img_path):
            image = Image.open(img_path)
            w, h = image.size  # size in pixels

            # Create a page with the same size as the image
            pdf.add_page(orientation='P', format=(w, h))
            pdf.image(img_path, 0, 0, w, h)  # keep actual image size

    pdf.output(PDF_FILE)
    print(f"📄 PDF created successfully: {PDF_FILE}")
else:
    print("No images saved. PDF not created.")


📄 PDF created successfully: diode p4.pdf


  pdf.output(PDF_FILE, "F")
