## 1. urljoin 함수 사용

In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def scrape_nate_news_images():
    base_url = 'https://news.nate.com'
    section_urls = {
        '최신뉴스': 'https://news.nate.com/recent?mid=n0100',
        '정치': 'https://news.nate.com/recent?mid=n0101',
        '경제': 'https://news.nate.com/recent?mid=n0102',
        '사회': 'https://news.nate.com/recent?mid=n0103',
        '세계': 'https://news.nate.com/recent?mid=n0104',
        'IT/과학': 'https://news.nate.com/recent?mid=n0105',
    }

    for section, url in section_urls.items():
        print(f'\n[섹션: {section}]')
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        articles = soup.select('div.postSubject a')[:5]  # 기사 5개만 예시
        for a_tag in articles:
            title = a_tag.get_text(strip=True)
            link = urljoin(base_url, a_tag['href'])

            # 이미지 찾기 (뉴스 리스트에서 이미지 태그가 있는 경우)
            img_tag = a_tag.find('img')
            if img_tag and 'src' in img_tag.attrs:
                image_url = urljoin(base_url, img_tag['src'])
            else:
                image_url = '이미지 없음'

            print(f'- 제목: {title}')
            print(f'  링크: {link}')
            print(f'  이미지: {image_url}')


In [9]:
from urllib.parse import urljoin

base_url = 'https://news.nate.com'
src = '//thumbnews.nateimg.co.kr/news90///news.nateimg.co.kr/orgImg/na/2025/07/23/7408335_high.jpg'

# 전체 이미지 URL 생성
image_url = urljoin(base_url, src)
print(image_url)

https://thumbnews.nateimg.co.kr/news90///news.nateimg.co.kr/orgImg/na/2025/07/23/7408335_high.jpg


## 2. Image 클래스와 display 함수 사용

In [12]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from IPython.display import Image, display

def scrape_nate_news_with_images():
    base_url = 'https://news.nate.com'
    section_urls = {
        '최신뉴스': 'https://news.nate.com/recent?mid=n0100',
        '정치': 'https://news.nate.com/recent?mid=n0101',
        '경제': 'https://news.nate.com/recent?mid=n0102',
        '사회': 'https://news.nate.com/recent?mid=n0103',
        '세계': 'https://news.nate.com/recent?mid=n0104',
        'IT/과학': 'https://news.nate.com/recent?mid=n0105',
    }

    for section, url in section_urls.items():
        print(f'\n🔹 [섹션: {section}]\n')
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # 기사 영역에서 a 태그와 img 태그 추출
        articles = soup.select('div.mduSubjectList > ul > li')[:5]  # 기사 5개만 예시

        for article in articles:
            a_tag = article.select_one('a')
            if not a_tag:
                continue

            title = a_tag.get_text(strip=True)
            link = urljoin(base_url, a_tag['href'])

            img_tag = article.select_one('img')
            if img_tag and 'src' in img_tag.attrs:
                image_url = urljoin(base_url, img_tag['src'])
                print(f'📰 제목: {title}')
                print(f'🔗 링크: {link}')
                print(f'🖼️ 이미지 링크: {image_url}')
                display(Image(url=image_url))
            else:
                print(f'📰 제목: {title}')
                print(f'🔗 링크: {link}')
                print('🖼️ 이미지 없음\n')

## 3. 이미지 경로 추출

In [13]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from IPython.display import Image, display

def scrape_nate_news_with_images():
    base_url = 'https://news.nate.com'
    section_urls = {
        '최신뉴스': 'https://news.nate.com/recent?mid=n0100',
        '정치': 'https://news.nate.com/recent?mid=n0101',
        '경제': 'https://news.nate.com/recent?mid=n0102',
        '사회': 'https://news.nate.com/recent?mid=n0103',
        '세계': 'https://news.nate.com/recent?mid=n0104',
        'IT/과학': 'https://news.nate.com/recent?mid=n0105',
    }

    for section, url in section_urls.items():
        print(f'\n🔷 [섹션: {section}]\n')