In [2]:
from icrawler.builtin import GoogleImageCrawler
import os

# 검색할 키워드 목록 (현지어 + 한글)
# 현지어로 검색하면 더 정확하고 다양한 이미지를 얻을 수 있습니다.
KEYWORDS = {
    "phong_curry": ["ปูผัดผงกะหรี่", "poo pad pong curry", "thai crab curry stir fry", "뿌팟퐁커리"],
    "pad_thai": ["ผัดไทย", "pad thai noodle", "thai stir fried noodles", "팟타이"],
    "bun_cha": ["bún chả", "bun cha hanoi", "vietnamese grilled pork noodle", "분짜"]
}

# 각 키워드당 다운로드할 이미지 개수
NUM_IMAGES_PER_KEYWORD = 150 # 각 세부 키워드당 150장씩 시도 (총 400장 이상 목표)
# 이미지를 저장할 최상위 폴더 이름
SAVE_DIR = "4_google_crawled_images"

# ===================================================================
# 3. 이미지 크롤링 실행 (크기 필터링 추가)
# ===================================================================
print("🚀 Google 이미지 크롤링 (256x256 이상)을 시작합니다...")

# KEYWORDS 딕셔너리를 순회 (폴더 이름, 검색어)
for folder_name, keyword_list in KEYWORDS.items():
    print(f"\n[INFO] === {folder_name} 클래스 다운로드 시작 ===")
    storage_path = os.path.join(SAVE_DIR, folder_name)

    # 이 클래스에 대한 총 다운로드 수
    total_downloaded = 0

    # 이 클래스에 해당하는 모든 키워드로 순차적 크롤링
    for keyword in keyword_list:
        print(f"   -> 키워드 '{keyword}'로 크롤링 시도...")
        google_crawler = GoogleImageCrawler(storage={'root_dir': storage_path})
        try:
            google_crawler.crawl(
                keyword=keyword,
                max_num=NUM_IMAGES_PER_KEYWORD,
                filters={'size': 'medium'}
            )
        except Exception as e:
            print(f"   -> 오류 발생: {e}")

    # 최종적으로 몇 장이 다운로드되었는지 확인
    try:
        final_count = len(os.listdir(storage_path))
        print(f"✅ {folder_name} 클래스 총 {final_count}장 다운로드 완료!")
    except FileNotFoundError:
        print(f"🤷 {folder_name} 클래스는 다운로드된 이미지가 없습니다.")

print("\n🎉 모든 Google 이미지 크롤링 작업이 완료되었습니다.")

2025-10-24 17:00:21,054 - INFO - icrawler.crawler - start crawling...
2025-10-24 17:00:21,056 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-10-24 17:00:21,057 - INFO - feeder - thread feeder-001 exit
2025-10-24 17:00:21,060 - INFO - icrawler.crawler - starting 1 parser threads...
2025-10-24 17:00:21,063 - INFO - icrawler.crawler - starting 1 downloader threads...


🚀 Google 이미지 크롤링 (256x256 이상)을 시작합니다...

[INFO] === phong_curry 클래스 다운로드 시작 ===
   -> 키워드 'ปูผัดผงกะหรี่'로 크롤링 시도...


2025-10-24 17:00:22,424 - INFO - parser - parsing result page https://www.google.com/search?q=%E0%B8%9B%E0%B8%B9%E0%B8%9C%E0%B8%B1%E0%B8%94%E0%B8%9C%E0%B8%87%E0%B8%81%E0%B8%B0%E0%B8%AB%E0%B8%A3%E0%B8%B5%E0%B9%88&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:00:23,455 - INFO - downloader - image #1	https://i.ytimg.com/vi/GLmfjxr26zc/sddefault.jpg
2025-10-24 17:00:24,322 - ERROR - downloader - Response status code 403, file https://food.mthai.com/app/uploads/2017/01/iStock-611778670.jpg
2025-10-24 17:00:26,049 - INFO - downloader - image #2	https://s359.kapook.com/pagebuilder/2ee839a5-7cbb-48b7-9b05-31c2390f6478.jpg
2025-10-24 17:00:26,737 - INFO - downloader - image #3	https://img-global.cpcdn.com/recipes/1c6340f33ad66d63/1200x630cq80/photo.jpg
2025-10-24 17:00:27,019 - INFO - downloader - image #4	https://i.ytimg.com/vi/11QJBR5ZPyw/hq720.jpg
2025-10-24 17:00:27,343 - INFO - downloader - image #5	https://i.ytimg.com/vi/rJx-1LVJgR4/sddefault.jpg
2025-10-24 17:00:27,945 - INFO - downlo

   -> 키워드 'poo pad pong curry'로 크롤링 시도...


2025-10-24 17:01:39,832 - INFO - parser - parsing result page https://www.google.com/search?q=poo+pad+pong+curry&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:01:40,000 - INFO - downloader - skip downloading file 000001.jpg
2025-10-24 17:01:40,004 - INFO - downloader - skip downloading file 000002.jpg
2025-10-24 17:01:40,005 - INFO - downloader - skip downloading file 000003.jpg
2025-10-24 17:01:40,006 - INFO - downloader - skip downloading file 000004.jpg
2025-10-24 17:01:40,007 - INFO - downloader - skip downloading file 000005.jpg
2025-10-24 17:01:40,008 - INFO - downloader - skip downloading file 000006.jpg
2025-10-24 17:01:40,010 - INFO - downloader - skip downloading file 000007.jpg
2025-10-24 17:01:40,011 - INFO - downloader - skip downloading file 000008.jpg
2025-10-24 17:01:40,013 - INFO - downloader - skip downloading file 000009.jpg
2025-10-24 17:01:40,014 - INFO - downloader - skip downloading file 000010.jpg
2025-10-24 17:01:40,015 - INFO - downloader - skip downloading

   -> 키워드 'thai crab curry stir fry'로 크롤링 시도...


2025-10-24 17:02:15,918 - INFO - parser - parsing result page https://www.google.com/search?q=thai+crab+curry+stir+fry&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:02:16,099 - INFO - downloader - skip downloading file 000001.jpg
2025-10-24 17:02:16,102 - INFO - downloader - skip downloading file 000002.jpg
2025-10-24 17:02:16,103 - INFO - downloader - skip downloading file 000003.jpg
2025-10-24 17:02:16,105 - INFO - downloader - skip downloading file 000004.jpg
2025-10-24 17:02:16,108 - INFO - downloader - skip downloading file 000005.jpg
2025-10-24 17:02:16,110 - INFO - downloader - skip downloading file 000006.jpg
2025-10-24 17:02:16,111 - INFO - downloader - skip downloading file 000007.jpg
2025-10-24 17:02:17,940 - INFO - downloader - image #8	https://theshortli.st/wp-content/uploads/karee-1024x490.png
2025-10-24 17:02:17,942 - INFO - downloader - skip downloading file 000009.jpg
2025-10-24 17:02:17,944 - INFO - downloader - skip downloading file 000010.jpg
2025-10-24 17:02:17,

   -> 키워드 '뿌팟퐁커리'로 크롤링 시도...


2025-10-24 17:02:34,866 - INFO - parser - parsing result page https://www.google.com/search?q=%EB%BF%8C%ED%8C%9F%ED%90%81%EC%BB%A4%EB%A6%AC&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:02:35,061 - INFO - downloader - skip downloading file 000001.jpg
2025-10-24 17:02:35,065 - INFO - downloader - skip downloading file 000002.jpg
2025-10-24 17:02:35,066 - INFO - downloader - skip downloading file 000003.jpg
2025-10-24 17:02:35,067 - INFO - downloader - skip downloading file 000004.jpg
2025-10-24 17:02:35,068 - INFO - downloader - skip downloading file 000005.jpg
2025-10-24 17:02:35,070 - INFO - downloader - skip downloading file 000006.jpg
2025-10-24 17:02:35,071 - INFO - downloader - skip downloading file 000007.jpg
2025-10-24 17:02:35,072 - INFO - downloader - skip downloading file 000008.jpg
2025-10-24 17:02:35,073 - INFO - downloader - skip downloading file 000009.jpg
2025-10-24 17:02:35,074 - INFO - downloader - skip downloading file 000010.jpg
2025-10-24 17:02:35,075 - INFO - do

✅ phong_curry 클래스 총 89장 다운로드 완료!

[INFO] === pad_thai 클래스 다운로드 시작 ===
   -> 키워드 'ผัดไทย'로 크롤링 시도...


2025-10-24 17:02:47,973 - INFO - parser - parsing result page https://www.google.com/search?q=%E0%B8%9C%E0%B8%B1%E0%B8%94%E0%B9%84%E0%B8%97%E0%B8%A2&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:02:48,670 - INFO - downloader - image #1	https://img-global.cpcdn.com/recipes/e71665bdf860791e/680x781cq80/%E0%B8%A3%E0%B8%9B-%E0%B8%AB%E0%B8%A5%E0%B8%81-%E0%B8%82%E0%B8%AD%E0%B8%87-%E0%B8%AA%E0%B8%95%E0%B8%A3-%E0%B8%9C%E0%B8%94%E0%B9%84%E0%B8%97%E0%B8%A2%E0%B8%81%E0%B8%87%E0%B8%AA%E0%B8%94.jpg
2025-10-24 17:02:49,593 - INFO - downloader - image #2	https://recipe.sgethai.com/wp-content/uploads/2025/05/282525-pad-thai-with-egg-wrap-cover.png
2025-10-24 17:02:51,725 - INFO - downloader - image #3	https://www.thipkitchen.com/images/course/padthai/img1.jpg
2025-10-24 17:02:52,770 - INFO - downloader - image #4	https://img.kapook.com/u/2015/surauch/cook2/PT1.jpg
2025-10-24 17:02:53,315 - INFO - downloader - image #5	https://s.isanook.com/wo/0/ud/37/185369/food.jpg
2025-10-24 17:02:54,471 - INFO -

   -> 키워드 'pad thai noodle'로 크롤링 시도...


2025-10-24 17:04:05,338 - INFO - parser - parsing result page https://www.google.com/search?q=pad+thai+noodle&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:04:05,552 - INFO - downloader - skip downloading file 000001.jpg
2025-10-24 17:04:07,091 - INFO - downloader - image #2	https://realhousemoms.com/wp-content/uploads/One-Pot-Pad-Thai-IG.jpg
2025-10-24 17:04:07,094 - INFO - downloader - skip downloading file 000003.jpg
2025-10-24 17:04:07,095 - INFO - downloader - skip downloading file 000004.jpg
2025-10-24 17:04:07,096 - INFO - downloader - skip downloading file 000005.jpg
2025-10-24 17:04:07,097 - INFO - downloader - skip downloading file 000006.jpg
2025-10-24 17:04:07,099 - INFO - downloader - skip downloading file 000007.jpg
2025-10-24 17:04:07,556 - INFO - downloader - skip downloading file 000008.jpg
2025-10-24 17:04:07,558 - INFO - downloader - skip downloading file 000009.jpg
2025-10-24 17:04:07,559 - INFO - downloader - skip downloading file 000010.jpg
2025-10-24 17:04:08,

   -> 키워드 'thai stir fried noodles'로 크롤링 시도...


2025-10-24 17:05:11,841 - INFO - parser - parsing result page https://www.google.com/search?q=thai+stir+fried+noodles&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:05:12,040 - INFO - downloader - skip downloading file 000001.jpg
2025-10-24 17:05:12,054 - INFO - downloader - skip downloading file 000002.jpg
2025-10-24 17:05:12,055 - INFO - downloader - skip downloading file 000003.jpg
2025-10-24 17:05:12,056 - INFO - downloader - skip downloading file 000004.jpg
2025-10-24 17:05:12,058 - INFO - downloader - skip downloading file 000005.jpg
2025-10-24 17:05:12,059 - INFO - downloader - skip downloading file 000006.jpg
2025-10-24 17:05:15,057 - INFO - downloader - image #7	https://rosasthai.com/wp-content/uploads/fly-images/5303/egg-fried-noodles-739x924-c.png
2025-10-24 17:05:15,060 - INFO - downloader - skip downloading file 000008.jpg
2025-10-24 17:05:15,062 - INFO - downloader - skip downloading file 000009.jpg
2025-10-24 17:05:15,064 - INFO - downloader - skip downloading file 000

   -> 키워드 '팟타이'로 크롤링 시도...


2025-10-24 17:05:35,509 - INFO - parser - parsing result page https://www.google.com/search?q=%ED%8C%9F%ED%83%80%EC%9D%B4&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:05:35,696 - INFO - downloader - skip downloading file 000001.jpg
2025-10-24 17:05:35,699 - INFO - downloader - skip downloading file 000002.jpg
2025-10-24 17:05:35,700 - INFO - downloader - skip downloading file 000003.jpg
2025-10-24 17:05:35,702 - INFO - downloader - skip downloading file 000004.jpg
2025-10-24 17:05:35,703 - INFO - downloader - skip downloading file 000005.jpg
2025-10-24 17:05:35,704 - INFO - downloader - skip downloading file 000006.jpg
2025-10-24 17:05:35,705 - INFO - downloader - skip downloading file 000007.jpg
2025-10-24 17:05:35,706 - INFO - downloader - skip downloading file 000008.jpg
2025-10-24 17:05:35,707 - INFO - downloader - skip downloading file 000009.jpg
2025-10-24 17:05:35,708 - INFO - downloader - skip downloading file 000010.jpg
2025-10-24 17:05:35,710 - INFO - downloader - skip do

✅ pad_thai 클래스 총 104장 다운로드 완료!

[INFO] === bun_cha 클래스 다운로드 시작 ===
   -> 키워드 'bún chả'로 크롤링 시도...


2025-10-24 17:05:47,663 - INFO - parser - parsing result page https://www.google.com/search?q=b%C3%BAn+ch%E1%BA%A3&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:05:48,462 - ERROR - downloader - Response status code 404, file http://www.savourydays.com/wp-content/uploads/2016/08/c%C3%A1ch-l%C3%A0m-b%C3%BAn-ch%E1%BA%A3-banner.jpg
2025-10-24 17:05:49,436 - ERROR - downloader - Response status code 401, file https://vcdn1-giadinh.vnecdn.net/2021/01/08/nh2-1610098826-2348-1610099449.jpg
2025-10-24 17:05:50,742 - INFO - downloader - image #1	https://cdn.tgdd.vn/2022/01/CookRecipe/GalleryStep/thanh-pham-69.jpg
2025-10-24 17:05:51,330 - INFO - downloader - image #2	https://img-global.cpcdn.com/recipes/83c17e7c30d6c02d/680x781f0.497515_0.5_1.0q80/bun-ch%E1%BA%A3-n%C6%B0%E1%BB%9Bng-ha-n%E1%BB%99i-recipe-main-photo.jpg
2025-10-24 17:05:52,555 - INFO - downloader - image #3	https://comichefhome.vn/wp-content/uploads/2023/08/cach-lam-bun-cha-nuoc.jpg
2025-10-24 17:05:53,444 - ERROR - downloader 

   -> 키워드 'bun cha hanoi'로 크롤링 시도...


2025-10-24 17:07:18,913 - INFO - parser - parsing result page https://www.google.com/search?q=bun+cha+hanoi&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:07:19,091 - INFO - downloader - skip downloading file 000001.jpg
2025-10-24 17:07:19,095 - INFO - downloader - skip downloading file 000002.jpg
2025-10-24 17:07:19,097 - INFO - downloader - skip downloading file 000003.jpg
2025-10-24 17:07:20,377 - INFO - downloader - image #4	https://www.deluxegrouptours.vn/wp-content/uploads/2024/01/buncha.png
2025-10-24 17:07:20,380 - INFO - downloader - skip downloading file 000005.jpg
2025-10-24 17:07:20,381 - INFO - downloader - skip downloading file 000006.jpg
2025-10-24 17:07:21,607 - INFO - downloader - image #7	https://motogo.tours/wp-content/uploads/2024/09/Bun-Cha-10.jpg
2025-10-24 17:07:21,610 - INFO - downloader - skip downloading file 000008.jpg
2025-10-24 17:07:21,613 - INFO - downloader - skip downloading file 000009.jpg
2025-10-24 17:07:24,791 - INFO - downloader - image #10	https

   -> 키워드 'vietnamese grilled pork noodle'로 크롤링 시도...


2025-10-24 17:08:18,286 - INFO - parser - parsing result page https://www.google.com/search?q=vietnamese+grilled+pork+noodle&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:08:18,448 - INFO - downloader - skip downloading file 000001.jpg
2025-10-24 17:08:19,144 - INFO - parser - parsing result page https://www.google.com/search?q=vietnamese+grilled+pork+noodle&ijn=1&start=100&tbs=isz%3Am&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File [35m"C:\Users\it\anaconda3\Lib\threading.py"[0m, line [35m1043[0m, in [35m_bootstrap_inner[0m
    [31mself.run[0m[1;31m()[0m
    [31m~~~~~~~~[0m[1;31m^^[0m
  File [35m"C:\Users\it\AppData\Roaming\Python\Python313\site-packages\ipykernel\ipkernel.py"[0m, line [35m772[0m, in [35mrun_closure[0m
    [31m_threading_Thread_run[0m[1;31m(self)[0m
    [31m~~~~~~~~~~~~~~~~~~~~~[0m[1;31m^^^^^^[0m
  File [35m"C:\Users\it\anaconda3\Lib\threading.py"[0m, line [35m994[0m, in [35mrun[0m
    [31mself._

   -> 키워드 '분짜'로 크롤링 시도...


2025-10-24 17:08:25,939 - INFO - parser - parsing result page https://www.google.com/search?q=%EB%B6%84%EC%A7%9C&ijn=0&start=0&tbs=isz%3Am&tbm=isch
2025-10-24 17:08:26,131 - INFO - downloader - skip downloading file 000001.jpg
2025-10-24 17:08:26,134 - INFO - downloader - skip downloading file 000002.jpg
2025-10-24 17:08:26,136 - INFO - downloader - skip downloading file 000003.jpg
2025-10-24 17:08:26,137 - INFO - downloader - skip downloading file 000004.jpg
2025-10-24 17:08:26,138 - INFO - downloader - skip downloading file 000005.jpg
2025-10-24 17:08:26,139 - INFO - downloader - skip downloading file 000006.jpg
2025-10-24 17:08:26,140 - INFO - downloader - skip downloading file 000007.png
2025-10-24 17:08:26,141 - INFO - downloader - skip downloading file 000008.jpg
2025-10-24 17:08:26,142 - INFO - downloader - skip downloading file 000009.jpg
2025-10-24 17:08:26,143 - INFO - downloader - skip downloading file 000010.jpg
2025-10-24 17:08:26,144 - INFO - downloader - skip downloading

✅ bun_cha 클래스 총 97장 다운로드 완료!

🎉 모든 Google 이미지 크롤링 작업이 완료되었습니다.
