# 1. requests 모듈

* HTTP프로토콜을 사용할 수 있게 해주는 모듈
* 파이썬 기본 모듈인 urllib보다 사용이 편리
* 따로 설치 필요 pip install requests/conda install requests

# 2. requests 모듈 사용법

* 1. 모듈 불러오기: import requests
* 2. url을 변수에 저장
* 3. url파라미터를 payload 라는 변수에 딕셔너리 형태로 저장
    * payload = {파라미터1 : value, 파라미터2 : value2}
* 4. headers 도 headers 라는 변수에 딕셔너리 형태로 저장
    * headers = {파라미터1 : value, 파라미터2 : value2}
* 5. HTTP요청 보내기 get, post
    * get: r= requests.get(url, params=payload, headers=headers)
    * post: r = requests.post(url, data={key:value})
* 6. 만들어진 url 확인: print(r.url)
* 7. 응답코드확인: print(r.status_code)
* 8. 응답 요소 출력: r.text, r.content, r.json()

In [4]:
import requests
url = " "
payload = dict()
r = requests.get(url, params=payload)
print(r.url)
print(r.status_code)
response = r.text, r.content, r.json()

MissingSchema: Invalid URL '': No scheme supplied. Perhaps you meant https://?

In [3]:
# !conda install requests

In [5]:
import requests
url = "https://openapi.naver.com/v1/search/blog"
keyword = "핀테크"
payload = dict(query=keyword, display=100, start=1, sort="date")
headers = {"X-Naver-Client-Id" : "", "X-Naver-Client-Secret" : ""}
r = requests.get(url, params=payload, headers=headers)
print(r.url)
print(r.status_code)
response = r.json()
response

https://openapi.naver.com/v1/search/blog?query=%ED%95%80%ED%85%8C%ED%81%AC&display=100&start=1&sort=date
401


{'errorMessage': 'Not Exist Client ID : Authentication failed. (인증에 실패했습니다.)',
 'errorCode': '024'}

In [6]:
print(type(response))

<class 'dict'>


In [15]:
# response 딕셔너리가 있다고 가정
response = {'resultCode': '00', 'message': 'SUCCESS'}

# 'items' 키를 추가하고 값으로 빈 리스트를 할당
response['items'] = []

# 결과 확인: 'items' 키가 추가됨
print(response)

{'resultCode': '00', 'message': 'SUCCESS', 'items': []}


In [16]:
response['items']

[]

In [18]:
import pandas as pd

In [19]:
pd.DataFrame(response['items'])

In [20]:
import re
def text_clean(text):
    # html 태그를 없애는 정규표현식
    result = re.sub(r"</?[^>]+>", "", text)
    # 한글, 영문, 숫자 외의 모든 문자 제거 후 공백으로 변환
    result = re.sub(r"[^가-힣a-zA-Z0-9]", " ", result)
    result = result.replace("  ", " ").replace("  ", " ").replace("  ", " ")
    return result

In [21]:
result = {}
for item in response['items']:
    for key, value in item.items():
        if key in ["title", "description"]:
            result.setdefault(key, []).append(text_clean(value))
        else:
            result.setdefault(key, []).append(value)
result

{}

In [22]:
df = pd.DataFrame(result)
df.to_csv("./data/requests로naver_api데이터수집.csv", index=False, encoding="utf-8-sig")

In [23]:
from dotenv import load_dotenv
import os
load_dotenv()
import requests

user_id = os.getenv("Id")
user_secret = os.getenv("Secret")
url = "https://openapi.naver.com/v1/search/blog"
keyword = "핀테크"
payload = dict(query=keyword, display=100, start=1, sort="date")
headers = {"X-Naver-Client-Id" : user_id, "X-Naver-Client-Secret" : user_secret}
r = requests.get(url, params=payload, headers=headers)
print(r.url)
print(r.status_code)
response = r.json()
response

https://openapi.naver.com/v1/search/blog?query=%ED%95%80%ED%85%8C%ED%81%AC&display=100&start=1&sort=date
401


{'errorMessage': 'Not Exist Client Secret : Authentication failed. (인증에 실패했습니다.)',
 'errorCode': '024'}

In [24]:
from dotenv import load_dotenv
import os
load_dotenv(dotenv_path="./data/.env_naver")
import requests

user_id = os.getenv("user_Id")
user_secret = os.getenv("user_Secret")
url = "https://openapi.naver.com/v1/search/blog"
keyword = "핀테크"
payload = dict(query=keyword, display=100, start=1, sort="date")
headers = {"X-Naver-Client-Id" : user_id, "X-Naver-Client-Secret" : user_secret}
r = requests.get(url, params=payload, headers=headers)
print(r.url)
print(r.status_code)
response = r.json()
response

https://openapi.naver.com/v1/search/blog?query=%ED%95%80%ED%85%8C%ED%81%AC&display=100&start=1&sort=date
401


{'errorMessage': 'Not Exist Client ID : Authentication failed. (인증에 실패했습니다.)',
 'errorCode': '024'}

In [25]:
start_num = 1
for i in range(1, 11):
    if start_num < 901:
        start_num += 100
        print(start_num)
    elif start_num >= 901:
        start_num += 99
        print(start_num)

101
201
301
401
501
601
701
801
901
1000


In [26]:
# while 문을 이용해야 함
page = 1
if response['total'] // 100 > 10:
    page = 10
else:
    page = response['total'] // 100 + 1

KeyError: 'total'

In [27]:
99 // 100 +1 

1

In [28]:
from dotenv import load_dotenv
import os
load_dotenv(dotenv_path="./data/.env_naver")
import requests

num = 1
all_data = []
for page in range(11):
    user_id = os.getenv("user_Id")
    user_secret = os.getenv("user_Secret")
    url = "https://openapi.naver.com/v1/search/blog"
    keyword = "핀테크"
    payload = dict(query=keyword, display=100, start=num, sort="date")
    headers = {"X-Naver-Client-Id" : user_id, "X-Naver-Client-Secret" : user_secret}
    r = requests.get(url, params=payload, headers=headers)
    print(r.url)
    print(r.status_code)
    response = r.json()
    all_data.append(pd.DataFrame(response['items']))    
    
    if num < 901:
        num += 100
    elif num >= 901:
        num += 99
    print(num)

https://openapi.naver.com/v1/search/blog?query=%ED%95%80%ED%85%8C%ED%81%AC&display=100&start=1&sort=date
401


KeyError: 'items'

In [29]:
from dotenv import load_dotenv
import os
load_dotenv(dotenv_path="./data/.env_naver")
import requests

total_page = 1
page = 1
num = 1

all_data = []
while page <= total_page:
    user_id = os.getenv("user_Id")
    user_secret = os.getenv("user_Secret")
    url = f"https://openapi.naver.com/v1/search/blog"
    keyword = "핀테크"
    payload = dict(query=keyword, display=100, start=num, sort="date")
    headers = {"X-Naver-Client-Id" : user_id, "X-Naver-Client-Secret" : user_secret}
    r = requests.get(url, params=payload, headers=headers)
    print(r.url)
#     print(r.status_code)
    response = r.json()
    all_data.append(response['items'])

    # 전체 페이지 계산
    if response['total'] // 100 > 11:
        total_page = 11
    else:
        total_page = response['total'] // 100 + 1
        
    # page 증가
    page += 1
    
    #num이 1000되도록 
    if num < 901:
        num += 100
    elif num >= 901:
        num += 99

https://openapi.naver.com/v1/search/blog?query=%ED%95%80%ED%85%8C%ED%81%AC&display=100&start=1&sort=date


KeyError: 'items'

In [30]:
len(all_data)

0

In [31]:
result = {}
for page in all_data:
    for item in page:
        for key, value in item.items():
            if key in ["title", "description"]:
                result.setdefault(key, []).append(text_clean(value))
            else:
                result.setdefault(key, []).append(value)
final_df = pd.DataFrame(result)
final_df

In [32]:
final_df.to_csv("./data/네이버api수집1100개.csv", index=False, encoding="utf-8-sig")