In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import (
    InvalidSessionIdException,
    StaleElementReferenceException,
    WebDriverException,
    TimeoutException,
    NoSuchElementException
)
import time
import random
import requests
import os
import base64
import traceback

USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"

def make_driver():
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--window-size=1920,1080')
    chrome_options.add_argument(f'user-agent={USER_AGENT}')
    # 차단 완화 옵션 몇 가지
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=chrome_options)

def human_like_scroll(driver, rounds=8):
    body = driver.find_element(By.TAG_NAME, 'body')
    last_height = driver.execute_script("return document.body.scrollHeight")
    for i in range(rounds):
        method = random.choice(['page_down','wheel','arrow','space'])
        try:
            if method == 'page_down':
                body.send_keys(Keys.PAGE_DOWN)
            elif method == 'wheel':
                for _ in range(random.randint(2,5)):
                    ActionChains(driver).scroll_by_amount(0, random.randint(150, 300)).perform()
                    time.sleep(random.uniform(0.08, 0.2))
            elif method == 'arrow':
                for _ in range(random.randint(5,12)):
                    body.send_keys(Keys.ARROW_DOWN)
                    time.sleep(random.uniform(0.04, 0.12))
            else:
                body.send_keys(Keys.SPACE)
        except Exception:
            pass

        # 불규칙 대기
        time.sleep(random.uniform(0.6, 1.6))

        # 결과 더보기 시도
        if i % 2 == 0:
            try_click_more_button(driver)

        # 스크롤 진행 확인
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height <= last_height and i > 2:
            # 더 내려가지 않으면 잠깐 쉬었다가 한 번 더
            time.sleep(random.uniform(1.0, 2.0))
        last_height = new_height

def try_click_more_button(driver):
    selectors = [
        'input[value="결과 더보기"]',
        '.mye4qd',
        'input[type="button"][value*="더보기"]',
        'button[jsname="UCHLrd"]'
    ]
    for sel in selectors:
        try:
            btn = driver.find_element(By.CSS_SELECTOR, sel)
            if btn.is_displayed() and btn.is_enabled():
                driver.execute_script("arguments[0].scrollIntoView({behavior:'smooth',block:'center'});", btn)
                time.sleep(random.uniform(0.5, 1.0))
                ActionChains(driver).move_to_element(btn).pause(random.uniform(0.2,0.4)).click().perform()
                time.sleep(random.uniform(1.5, 2.5))
                return True
        except Exception:
            continue
    return False

def collect_image_urls_once(driver):
    # 여러 선택자 시도
    selector_groups = [
        ['img.Q4LuWd','img.rg_i','div[data-tbnid] img','img[data-src]'],
        ['img.t0fcAb','img.YQ4gaf','img[src^="http"]'],
        ['img']
    ]
    urls = set()
    for selectors in selector_groups:
        for sel in selectors:
            try:
                elements = driver.find_elements(By.CSS_SELECTOR, sel)
                for el in elements:
                    for attr in ['src','data-src','data-original','data-lazy-src']:
                        u = el.get_attribute(attr)
                        if u and (u.startswith('http') or u.startswith('data:image')):
                            urls.add(u)
                            break
            except Exception:
                continue
        if len(urls) >= 20:  # 어느 정도 모였으면 다음 단계로
            break
    return list(urls)

def collect_image_urls(driver, min_rounds=2, extra_scroll=3):
    # 한 번 수집 후 부족하면 스크롤 추가와 재수집
    urls = set(collect_image_urls_once(driver))
    if len(urls) < 10:
        # 한 번 새로고침 후 다시 스크롤/수집 (페이지 상태 꼬임 복구)
        driver.refresh()
        time.sleep(random.uniform(1.5, 2.5))
        human_like_scroll(driver, rounds=min_rounds)
        urls.update(collect_image_urls_once(driver))

    # 추가 스크롤 시도
    for _ in range(extra_scroll):
        if len(urls) >= 60:
            break
        human_like_scroll(driver, rounds=random.randint(3,5))
        urls.update(collect_image_urls_once(driver))
    return list(urls)

def download_images(urls, save_dir, base_name, start_index, target_left):
    headers = {'User-Agent': USER_AGENT}
    downloaded = 0
    for u in urls:
        if downloaded >= target_left:
            break
        try:
            ext = '.jpg'
            low = u.lower()
            if 'png' in low: ext = '.png'
            elif 'gif' in low: ext = '.gif'
            elif 'webp' in low: ext = '.webp'

            fname = f"{base_name}_{start_index + downloaded + 1}{ext}"
            fpath = os.path.join(save_dir, fname)

            if u.startswith('data:image'):
                header, encoded = u.split(',', 1)
                data = base64.b64decode(encoded)
                with open(fpath, 'wb') as f:
                    f.write(data)
            else:
                resp = requests.get(u, headers=headers, timeout=8, stream=True)
                if resp.status_code != 200:
                    continue
                with open(fpath, 'wb') as f:
                    for chunk in resp.iter_content(1024):
                        f.write(chunk)

            downloaded += 1
            # 사람 같은 간격
            time.sleep(random.uniform(0.2, 0.7))
        except Exception:
            continue
    return downloaded

def generate_related_keywords(base_keyword):
    seed = [
        f"{base_keyword}",
        f"{base_keyword} 사진", f"{base_keyword} 이미지", f"{base_keyword} 그림",
        f"{base_keyword} 디자인", f"{base_keyword} 제품", f"{base_keyword} 종류",
        f"{base_keyword} 인테리어", f"{base_keyword} 세트", f"{base_keyword} 아이디어",
        f"{base_keyword} 카탈로그", f"{base_keyword} 쇼룸", f"{base_keyword} 전시",
        f"{base_keyword} 고화질", f"{base_keyword} HD", f"{base_keyword} 4K",
        f"{base_keyword} modern", f"{base_keyword} classic", f"{base_keyword} vintage",
        f"{base_keyword} collection", f"{base_keyword} gallery", f"{base_keyword} set",
        f"{base_keyword} minimal", f"{base_keyword} premium", f"{base_keyword} aesthetic",
    ]
    # 중복 제거 유지 순서
    seen = set()
    result = []
    for k in seed:
        if k not in seen:
            result.append(k)
            seen.add(k)
    return result

def safe_session_run(keyword, per_keyword_target, save_dir):
    """
    키워드별로 독립된 드라이버 세션에서 검색/수집/다운로드 수행.
    invalid session 등 발생 시 1회 재기동 후 재시도.
    """
    attempt = 0
    total_downloaded = 0
    while attempt < 2 and total_downloaded == 0:
        attempt += 1
        driver = None
        try:
            driver = make_driver()
            # 진입
            q = keyword.strip().replace(' ', '+')
            url = f'https://www.google.com/search?tbm=isch&q={q}'
            driver.get(url)
            time.sleep(random.uniform(1.8, 3.2))

            # 사람처럼 스크롤
            human_like_scroll(driver, rounds=random.randint(6, 10))

            # URL 수집
            urls = collect_image_urls(driver, min_rounds=2, extra_scroll=3)
            # 너무 적으면 마지막으로 한번 더 강제 스크롤/수집
            if len(urls) < 10:
                human_like_scroll(driver, rounds=random.randint(5, 8))
                urls = list(set(urls) | set(collect_image_urls(driver, min_rounds=2, extra_scroll=2)))

            # 다운로드 (요청 수만큼)
            downloaded = download_images(urls, save_dir, base_name=keyword.replace(' ','_'),
                                         start_index=len(os.listdir(save_dir)),
                                         target_left=per_keyword_target)
            total_downloaded += downloaded
        except (InvalidSessionIdException, WebDriverException) as e:
            # 세션 문제 시 재시도
            pass
        except Exception:
            # 기타 예외는 로그만
            traceback.print_exc()
        finally:
            try:
                if driver:
                    driver.quit()
            except Exception:
                pass
        if total_downloaded == 0:
            # 사람처럼 잠깐 쉬었다 재시도
            time.sleep(random.uniform(2.0, 4.0))
    return total_downloaded

def main():
    base_keyword = input("🔍 기본 검색 키워드 입력: ").strip()
    target_total = int(input("📊 수집할 총 이미지 개수: ").strip())
    save_dir = f'./{base_keyword}_images'
    os.makedirs(save_dir, exist_ok=True)

    print(f"\n🎯 목표: {target_total}개 | 저장: {save_dir}")
    used = set()
    total = 0

    # 1차: 기본 키워드로 먼저 시도 (세션 분리)
    per_first = min(80, target_total)  # 첫 키워드에서 최대 80개만 시도
    got = safe_session_run(base_keyword, per_first, save_dir)
    total += got
    used.add(base_keyword)
    print(f"✅ '{base_keyword}'에서 {got}개 다운로드 | 누적 {total}/{target_total}")

    # 2차 이후: 관련 키워드 확장
    if total < target_total:
        related = generate_related_keywords(base_keyword)
        # 이미 사용한 키워드 제거
        related = [k for k in related if k not in used]

        # 키워드마다 목표를 조금씩 할당
        # 남은 수를 남은 키워드 수로 나누되, 최소/최대 범위 적용
        idx = 0
        while total < target_total and idx < len(related):
            remain = target_total - total
            # 키워드별 목표량 동적 배분
            per_keyword = max(20, min(80, remain // 4 if remain > 120 else remain))
            kw = related[idx]
            idx += 1

            print(f"\n🔄 키워드 전환: '{kw}' | 목표 {per_keyword}개 | 남은 {remain}개")
            # 사람처럼 쉬기
            time.sleep(random.uniform(1.5, 3.0))

            got = safe_session_run(kw, per_keyword, save_dir)
            total += got
            used.add(kw)
            print(f"✅ '{kw}'에서 {got}개 다운로드 | 누적 {total}/{target_total}")

            # 가끔 긴 휴식
            if idx % 3 == 0 and total < target_total:
                pause = random.uniform(3.0, 6.0)
                print(f"😴 잠시 휴식 {pause:.1f}s")
                time.sleep(pause)

    print(f"\n🏁 완료: {total}/{target_total}개 다운로드")
    if total < target_total:
        print("⚠️ 목표에 미달했습니다. 더 일반적/영문 키워드를 추가하거나 횟수를 늘려 재시도하세요.")

if __name__ == "__main__":
    main()



🎯 목표: 200개 | 저장: ./아이유_images
