In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import os
import time
import re
import requests
from bs4 import BeautifulSoup

# Selenium 드라이버 설정
driver = webdriver.Chrome()

# 저장할 폴더 생성
if not os.path.exists('./movie_posters'):
    os.makedirs('./movie_posters')

# 1. URL 방문
page_url = 'https://www.kmdb.or.kr/db/have/detailSearch/imageSearch'
driver.get(page_url)
time.sleep(3)

# 2. 첫 번째 버튼 클릭
try:
    first_button = driver.find_element(By.XPATH, "/html/body/div[2]/div[5]/div[2]/section/div/div[3]/div[3]/span/a")
    first_button.click()
    time.sleep(5)  # 클릭 후 페이지 로딩 대기
except Exception as e:
    print(f"첫 번째 버튼 클릭 중 오류 발생: {e}")

# 3. 이미지 크롤링 함수
def crawl_images(batch_size=100):
    global total_images
    images_downloaded = 0
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    posters = soup.find_all('span', style=re.compile(r'background-image'))

    if not posters:
        print("No posters found on this page.")
        return images_downloaded

    for span in posters:
        style = span['style']
        img_url_match = re.search(r'url\((.*?)\)', style)
        if img_url_match:
            img_url = img_url_match.group(1).strip("'\"")
            if img_url.startswith('http://file.koreafilm.or.kr/poster/'):
                try:
                    response = requests.get(img_url, stream=True)
                    if response.status_code == 200:
                        content_type = response.headers['Content-Type']
                        if 'image' in content_type:  # 이미지 파일인지 확인
                            filename = f'movie_posters/poster_{total_images}.jpg'
                            with open(filename, 'wb') as handler:
                                handler.write(response.content)
                            print(f"{img_url}, Saved {filename}")
                            total_images += 1
                            images_downloaded += 1

                            # 배치 사이즈마다 일시 중지
                            if images_downloaded >= batch_size:
                                print(f"Batch limit reached: {batch_size} images processed.")
                                return images_downloaded
                        else:
                            print(f"Invalid content type: {content_type}")
                    else:
                        print(f"Error downloading image from {img_url}: Status code {response.status_code}")
                except Exception as e:
                    print(f"Error saving image from {img_url}: {e}")

    return images_downloaded

# 4. 버튼 순서대로 클릭 및 이미지 크롤링 반복
buttons_xpath = [
    "/html/body/div[2]/div[5]/div[2]/section/div/div[3]/div[3]/div/a[1]",
    "/html/body/div[2]/div[5]/div[2]/section/div/div[3]/div[3]/div/a[2]",
    "/html/body/div[2]/div[5]/div[2]/section/div/div[3]/div[3]/div/a[3]",
    "/html/body/div[2]/div[5]/div[2]/section/div/div[3]/div[3]/div/a[4]",
    "/html/body/div[2]/div[5]/div[2]/section/div/div[3]/div[3]/div/span[2]/a[1]"
]

total_images = 0
batch_size = 100  # 배치 크기 설정

for _ in range(925):
    for xpath in buttons_xpath:
        try:
            button = driver.find_element(By.XPATH, xpath)
            button.click()
            time.sleep(5)  # 클릭 후 페이지 로딩 대기
            images_in_batch = crawl_images(batch_size)  # 배치 처리

            # 배치마다 잠시 중단하여 메모리 해제 및 조정
            if images_in_batch >= batch_size:
                print("Pausing for 10 seconds to manage memory and system resources...")
                time.sleep(10)
        except Exception as e:
            print(f"버튼 클릭 중 오류 발생: {e}")
            break  # 더 이상 클릭할 버튼이 없으면 루프 종료

# 드라이버 종료
driver.quit()

http://file.koreafilm.or.kr/poster/00/05/23/DPF018609_01.jpg, Saved movie_posters/poster_0.jpg
http://file.koreafilm.or.kr/poster/00/05/21/DPF018459_01.jpg, Saved movie_posters/poster_1.jpg
http://file.koreafilm.or.kr/poster/00/05/21/DPK014659_01.jpg, Saved movie_posters/poster_2.jpg
http://file.koreafilm.or.kr/poster/00/05/21/DPF018494_01.jpg, Saved movie_posters/poster_3.jpg
http://file.koreafilm.or.kr/poster/00/05/21/DPF018493_01.jpg, Saved movie_posters/poster_4.jpg
http://file.koreafilm.or.kr/poster/00/05/21/DPF018492_01.jpg, Saved movie_posters/poster_5.jpg
http://file.koreafilm.or.kr/poster/00/05/21/DPF018491_01.jpg, Saved movie_posters/poster_6.jpg
http://file.koreafilm.or.kr/poster/00/05/21/DPF018490_01.jpg, Saved movie_posters/poster_7.jpg
http://file.koreafilm.or.kr/poster/00/05/21/DPF018470_01.jpg, Saved movie_posters/poster_8.jpg
http://file.koreafilm.or.kr/poster/00/05/21/DPF018468_01.jpg, Saved movie_posters/poster_9.jpg
http://file.koreafilm.or.kr/poster/00/05/21/DPF018