### 키워드 검색 (모든 블로그)

In [46]:
import requests
import json
import os
import urllib.request
import re
import sys
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

# WebDriver 설정 (macOS용 ChromeDriver)
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")

try:
    driver = webdriver.Chrome(service=webdriver.chrome.service.Service(ChromeDriverManager().install()), options=options)
    driver.implicitly_wait(3)
except Exception as e:
    print(f"Error initializing ChromeDriver: {e}")
    sys.exit(1)

client_id = os.getenv("NAVER_CLIENT_ID")
client_secret = os.getenv("NAVER_CLIENT_SECRET")
if not client_id or not client_secret:
    print("Error: NAVER_CLIENT_ID or NAVER_CLIENT_SECRET not set. Please set them in .env or directly in the script.")
    sys.exit(1)

# 검색어만 입력받음
keyword = input("검색할 키워드를 입력하세요: ").strip()
if not keyword:
    print("검색어를 입력해야 합니다.")
    sys.exit(1)
encText = urllib.parse.quote(keyword)

# 나머지는 모두 디폴트
end = 1        # 크롤링할 페이지(기본값: 1)
display = 10   # 한 번에 가져올 페이지 개수(기본값: 10)

naver_urls = []
postdate = []
titles = []

print(f"\n 1 ~ {end} 페이지까지 크롤링 진행합니다 (한 번에 {display}개)")

# Naver API 호출 및 데이터 수집
for start in range(end):
    url = f"https://openapi.naver.com/v1/search/blog?query={encText}&start={start+1}&display={display}"
    request = urllib.request.Request(url)
    request.add_header("X-Naver-Client-Id", client_id)
    request.add_header("X-Naver-Client-Secret", client_secret)
    try:
        response = urllib.request.urlopen(request)
        rescode = response.getcode()
        if rescode == 200:
            response_body = response.read()
            data = json.loads(response_body.decode('utf-8'))['items']
            for row in data:
                if 'blog.naver.com' in row['link']:
                    naver_urls.append(row['link'])
                    postdate.append(row['postdate'])
                    title = re.sub('<[^>]*>', '', row['title'])  # HTML 태그 제거
                    titles.append(title)
            time.sleep(2)  # API rate limit 방지
        else:
            print(f"Error Code: {rescode}")
    except Exception as e:
        print(f"Error fetching data for start={start+1}: {e}")

driver.quit()

# 결과 출력 및 저장
if naver_urls:
    print(f"\nFound {len(naver_urls)} blog posts for keyword '{keyword}':")
    df = pd.DataFrame({
        'Title': titles,
        'Post Date': postdate,
        'URL': naver_urls
    })
    # 날짜 내림차순 정렬
    df = df.sort_values(by='Post Date', ascending=False).reset_index(drop=True)

    for i, row in df.iterrows():
        print(f"{i+1}. [{row['Post Date']}] {row['Title']}: {row['URL']}")

    df.to_csv('naver_blog_posts.csv', index=False, encoding='utf-8-sig')
    print("\nResults saved to naver_blog_posts.csv")
else:
    print(f"\nNo posts found for keyword '{keyword}'.")



 1 ~ 1 페이지까지 크롤링 진행합니다 (한 번에 10개)

Found 10 blog posts for keyword '삼성':
1. [20250730] 안양아파트경매 안양역삼성래미안 입찰일정: https://blog.naver.com/kangheart0124/223952134002
2. [20250729] 삼성 로봇청소기 비교 아이닉 물걸레 후기: https://blog.naver.com/dltmdgns2525/223950466240
3. [20250729] 삼성건조기 AS 소음 수리 비용 N 열교환기 필터 청소 방법: https://blog.naver.com/fmfaith/223951713004
4. [20250729] 삼성전자 테슬라 향 반도체 관련주 실제 수혜주 선별: https://blog.naver.com/q1772/223950796934
5. [20250729] 삼성카드 추천 id on 혜택 신규 발급 캐시백: https://blog.naver.com/kcah/223951464199
6. [20250729] 삼성 진공청소기 추천 유선청소기 VC33M2105LD: https://blog.naver.com/ssuujin/223950807704
7. [20250729] 삼성전자 주가 7만전자와 테슬라 AI6, 단순한 환호는 아니다: https://blog.naver.com/rec1820/223950758245
8. [20250729] 테슬라와 22조 계약 삼성전자 주가 전망 떡상 한 이유: https://blog.naver.com/alcmskfl17/223951558745
9. [20250728] 삼성전자 주가 7만원 회복 탈출기회인가?: https://blog.naver.com/alswl09100/223950063580
10. [20250728] 삼성전자 주가, 테슬라 덕에 7만전자 뚫었다.. 쭉 가나?: https://blog.naver.com/suji2573/223950355616

Results saved to naver_blog_posts.

In [45]:
assert False

AssertionError: 

### 메르 블로그 최신글 가져오기 
* 다른 블로그에 적용안됨 

In [47]:
import requests
import json
import os
from dotenv import load_dotenv
import urllib3

# Suppress InsecureRequestWarning
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

load_dotenv()

# Naver API credentials
CLIENT_ID = os.getenv("NAVER_CLIENT_ID")
CLIENT_SECRET = os.getenv("NAVER_CLIENT_SECRET")

# Validate credentials
if not CLIENT_ID or not CLIENT_SECRET:
    raise ValueError("NAVER_CLIENT_ID or NAVER_CLIENT_SECRET not set in .env file")

# API endpoint for Naver Blog Search
URL = "https://openapi.naver.com/v1/search/blog.json"

# Blog to target
BLOG_ID = "ranto28"
TARGET_BLOGGER_NAME = "메르의 블로그"
QUERIES = [
    f"from:blog.naver.com/{BLOG_ID}",  # Specific blog query
    f"{BLOG_ID}",                         # General blog ID query
    f"{TARGET_BLOGGER_NAME}"           # Blogger name query
]


# Headers for authentication
headers = {
    "X-Naver-Client-Id": CLIENT_ID,
    "X-Naver-Client-Secret": CLIENT_SECRET
}

# Function to fetch blog posts
def fetch_blog_posts(query, start=1, display=100):
    params = {
        "query": query,
        "display": display,
        "start": start,
        "sort": "date"  # Sort by date (latest first)
    }
    try:
        response = requests.get(URL, headers=headers, params=params, verify=False)
        response.raise_for_status()
        data = response.json()
        if "items" not in data:
            print(f"No items in response for query='{query}', start={start}. Response: {data}")
            return None
        return data
    except requests.exceptions.HTTPError as e:
        print(f"HTTP error for query='{query}': {e}. Status code: {response.status_code}")
        if response.status_code == 401:
            print("Authentication failed. Check NAVER_CLIENT_ID and NAVER_CLIENT_SECRET.")
        return None
    except requests.exceptions.RequestException as e:
        print(f"Failed to fetch data for query='{query}': {e}")
        return None

# Collect posts across multiple queries and pages
unique_links = set()  # Track unique post links
posts = []  # Store full post data
max_results = 1000
display = 100
for query in QUERIES:
    print(f"Fetching posts for query: {query}")
    for start in range(1, max_results + 1, display):
        data = fetch_blog_posts(query, start, display)
        if data and "items" in data:
            for post in data["items"]:
                if post.get("bloggername") == TARGET_BLOGGER_NAME and f"blog.naver.com/{BLOG_ID}" in post.get("link", ""):
                    link = post.get("link")
                    if link not in unique_links:
                        unique_links.add(link)
                        posts.append(post)
        else:
            break

# Sort posts by postdate (descending)
posts = sorted(posts, key=lambda x: x.get("postdate", ""), reverse=True)

# Output the results
if posts:
    print(f"Found {len(posts)} blog posts from {BLOG_ID} by '{TARGET_BLOGGER_NAME}' (sorted by date, descending):")
    for i, post in enumerate(posts, 1):
        title = post.get("title", "No title").replace("<b>", "").replace("</b>", "")
        link = post.get("link", "No link")
        postdate = post.get("postdate", "No date")
        print(f"{i}. [{postdate}] {title}: {link}")
else:
    print(f"No posts found for blog {BLOG_ID} by '{TARGET_BLOGGER_NAME}'. Possible reasons:")
    print("- Invalid API credentials.")
    print(f"- No posts by '{TARGET_BLOGGER_NAME}' or blog ID '{BLOG_ID}' not associated with this blogger.")
    print("- Blog has no indexed posts or queries failed.")
    print("- Network issues or API rate limit exceeded.")

# Save results to a file
with open("blog_posts.json", "w", encoding="utf-8") as f:
    json.dump(posts, f, ensure_ascii=False, indent=2)
    print("Results saved to blog_posts.json")

Fetching posts for query: from:blog.naver.com/ranto28
Fetching posts for query: ranto28
Fetching posts for query: 메르의 블로그
Found 15 blog posts from ranto28 by '메르의 블로그' (sorted by date, descending):
1. [20250730] 삼성전자 근황 3 (feat 평택캠퍼스, 텍사스, 테슬라 수주): https://blog.naver.com/ranto28/223951301937
2. [20250729] 삼성전자 근황 2 (feat TSMC, 3나노 수율, 평택 ): https://blog.naver.com/ranto28/223951258353
3. [20250728] 조선업 근황 업데이트 (feat MRO, 군산항, HD현대중공업): https://blog.naver.com/ranto28/223947898476
4. [20250719] 이스라엘이 시리아 수도를 폭격하는 비밀 2(feat 하레디): https://blog.naver.com/ranto28/223938662740
5. [20250719] 이스라엘이 시리아 수도를 폭격하는 비밀 1 (feat 드루즈족): https://blog.naver.com/ranto28/223938411525
6. [20250718] 중국과 충돌하고 있는 필리핀 근황 2 (feat 마르코스 vs... : https://blog.naver.com/ranto28/223937327745
7. [20250718] 중국과 충돌하고 있는 필리핀 근황 1 (feat 미군 철수, 워게임): https://blog.naver.com/ranto28/223937280433
8. [20250617] 늦생시(늦었다고 생각할 때 시작일 경우가 많다) 6(feat 풍산): https://blog.naver.com/ranto28/223901226240
9. [20250420] 이웃 24만명 블로그의 블로그 순위는?: