In [2]:
from icrawler.builtin import GoogleImageCrawler
import os

# 검색할 키워드 목록 (현지어 + 한글)
# 현지어로 검색하면 더 정확하고 다양한 이미지를 얻을 수 있습니다.
KEYWORDS = {
    "phong_curry": "ปูผัดผงกะหรี่", # 퐁커리 (푸팟퐁커리)
    "pad_thai": "ผัดไทย",       # 팟타이
    "bun_cha": "bún chả"         # 분짜
}

# 각 키워드당 다운로드할 이미지 개수
NUM_IMAGES = 100
# 이미지를 저장할 최상위 폴더 이름
SAVE_DIR = "google_crawled_images"

# ===================================================================
# 3. 이미지 크롤링 실행
# ===================================================================
print("🚀 Google 이미지 크롤링을 시작합니다...")

# KEYWORDS 딕셔너리를 순회 (폴더 이름, 검색어)
for folder_name, search_keyword in KEYWORDS.items():
    print(f"\n[INFO] '{search_keyword}'({folder_name}) 이미지 다운로드를 시작합니다...")

    # 1. 이미지를 저장할 경로 설정 (예: google_crawled_images/phong_curry)
    storage_path = os.path.join(SAVE_DIR, folder_name)

    # 2. Google 이미지 크롤러 객체 생성 및 경로 지정
    google_crawler = GoogleImageCrawler(storage={'root_dir': storage_path})

    try:
        # 3. 크롤링 실행
        google_crawler.crawl(keyword=search_keyword, max_num=NUM_IMAGES)

        print(f"✅ '{search_keyword}' 이미지 {NUM_IMAGES}장 다운로드 완료! -> 저장 경로: {storage_path}")

    except Exception as e:
        print(f"❌ '{search_keyword}' 이미지 다운로드 중 오류 발생: {e}")

print("\n🎉 모든 Google 이미지 크롤링 작업이 완료되었습니다.")

2025-10-24 12:25:35,442 - INFO - icrawler.crawler - start crawling...
2025-10-24 12:25:35,444 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-10-24 12:25:35,446 - INFO - feeder - thread feeder-001 exit
2025-10-24 12:25:35,448 - INFO - icrawler.crawler - starting 1 parser threads...
2025-10-24 12:25:35,453 - INFO - icrawler.crawler - starting 1 downloader threads...


🚀 Google 이미지 크롤링을 시작합니다...

[INFO] 'ปูผัดผงกะหรี่'(phong_curry) 이미지 다운로드를 시작합니다...


2025-10-24 12:25:37,135 - INFO - parser - parsing result page https://www.google.com/search?q=%E0%B8%9B%E0%B8%B9%E0%B8%9C%E0%B8%B1%E0%B8%94%E0%B8%9C%E0%B8%87%E0%B8%81%E0%B8%B0%E0%B8%AB%E0%B8%A3%E0%B8%B5%E0%B9%88&ijn=0&start=0&tbs=&tbm=isch
2025-10-24 12:25:38,109 - INFO - downloader - image #1	https://i.ytimg.com/vi/GLmfjxr26zc/sddefault.jpg
2025-10-24 12:25:38,936 - ERROR - downloader - Response status code 403, file https://food.mthai.com/app/uploads/2017/01/iStock-611778670.jpg
2025-10-24 12:25:44,772 - ERROR - downloader - Exception caught when downloading file https://s359.kapook.com/pagebuilder/2ee839a5-7cbb-48b7-9b05-31c2390f6478.jpg, error: HTTPSConnectionPool(host='s359.kapook.com', port=443): Read timed out. (read timeout=5), remaining retry times: 2
2025-10-24 12:25:45,900 - INFO - downloader - image #2	https://s359.kapook.com/pagebuilder/2ee839a5-7cbb-48b7-9b05-31c2390f6478.jpg
2025-10-24 12:25:46,517 - INFO - downloader - image #3	https://img.wongnai.com/p/1920x0/2020/10/2

✅ 'ปูผัดผงกะหรี่' 이미지 100장 다운로드 완료! -> 저장 경로: google_crawled_images\phong_curry

[INFO] 'ผัดไทย'(pad_thai) 이미지 다운로드를 시작합니다...


2025-10-24 12:27:00,000 - INFO - parser - parsing result page https://www.google.com/search?q=%E0%B8%9C%E0%B8%B1%E0%B8%94%E0%B9%84%E0%B8%97%E0%B8%A2&ijn=0&start=0&tbs=&tbm=isch
2025-10-24 12:27:01,177 - ERROR - downloader - Response status code 404, file https://upload.wikimedia.org/wikipedia/commons/thumb/3/39/Phat_Thai_kung_Chang_Khien_street_stall.jpg
2025-10-24 12:27:01,694 - INFO - downloader - image #1	https://s.isanook.com/wo/0/ud/37/185369/food.jpg
2025-10-24 12:27:02,167 - INFO - downloader - image #2	https://img.wongnai.com/p/1920x0/2021/08/09/f5ff71c37a2c4101b895432aae1ac01a.jpg
2025-10-24 12:27:02,841 - INFO - downloader - image #3	https://www.ajinomoto.co.th/storage/photos/shares/Recipe/Menu/lot2-2/6172a01f7aadd.jpg
2025-10-24 12:27:03,433 - INFO - downloader - image #4	https://img-global.cpcdn.com/recipes/e71665bdf860791e/680x781cq80/%E0%B8%A3%E0%B8%9B-%E0%B8%AB%E0%B8%A5%E0%B8%81-%E0%B8%82%E0%B8%AD%E0%B8%87-%E0%B8%AA%E0%B8%95%E0%B8%A3-%E0%B8%9C%E0%B8%94%E0%B9%84%E0%B8%97%

✅ 'ผัดไทย' 이미지 100장 다운로드 완료! -> 저장 경로: google_crawled_images\pad_thai

[INFO] 'bún chả'(bun_cha) 이미지 다운로드를 시작합니다...


2025-10-24 12:28:14,273 - INFO - parser - parsing result page https://www.google.com/search?q=b%C3%BAn+ch%E1%BA%A3&ijn=0&start=0&tbs=&tbm=isch
2025-10-24 12:28:14,949 - ERROR - downloader - Response status code 403, file https://www.seriouseats.com/thmb/atsVhLwxdCWyX-QDuhOLhR0Kx4s=/1500x0/filters:no_upscale():max_bytes(150000):strip_icc()/20231204-SEA-VyTran-BunChaHanoi-18-e37d96a89a0f43d097e02311686290f2.jpg
2025-10-24 12:28:15,625 - INFO - downloader - image #1	https://cdn2.fptshop.com.vn/unsafe/1920x0/filters:format(webp):quality(75)/2023_3_4_638135494683018162_cach-lam-bun-cha-bang-noi-chien-khong-dau-1.jpg
2025-10-24 12:28:17,055 - INFO - downloader - image #2	https://cdn.tgdd.vn/2022/01/CookRecipe/GalleryStep/thanh-pham-69.jpg
2025-10-24 12:28:18,581 - INFO - downloader - image #3	https://i-giadinh.vnecdn.net/2023/04/16/Buoc-11-Thanh-pham-11-7068-1681636164.jpg
2025-10-24 12:28:19,102 - ERROR - downloader - Response status code 404, file http://www.savourydays.com/wp-content/uplo

✅ 'bún chả' 이미지 100장 다운로드 완료! -> 저장 경로: google_crawled_images\bun_cha

🎉 모든 Google 이미지 크롤링 작업이 완료되었습니다.
