### 라이브러리 및 모델 모듈 임포트

In [1]:
import os
import django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rest.settings')
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
django.setup()

In [4]:
from bs4 import BeautifulSoup
import requests 
import lxml
import time

In [40]:
import csv
from pathlib import Path

In [2]:
from django.conf import settings

In [3]:
from plants.models import PlantTemporary
from taxonomy.models import Species, Genus

### 상수 선언

In [None]:
API_KEY = settings.ENV.API_KEY

In [5]:
SEARCH_WORDS_LIST = [item['name'] for item in Genus.objects.values('name')]

In [6]:
BASE_URL = "http://openapi.nature.go.kr/openapi/service/rest/PlantService"

#식물도감 목록 검색
ENDPOINT = "/plntIlstrSearch"
PARAMS = {"serviceKey": API_KEY, "st" : "2", "numOfRows":"1000"}

In [7]:
URL = BASE_URL + ENDPOINT

### 식물도감 목록 스크레이핑 및 XML 파일저장 

In [19]:
for word in SEARCH_WORDS_LIST:
    file_path = f'assets/plants_list/{word}.xml'
    if Path(file_path).exists():
        continue
    time.sleep(3)
    PARAMS['sw'] = word
    response = requests.get(URL, params=PARAMS)
    soup = BeautifulSoup(response.text, features="xml")
    with open(file_path, 'w', encoding="UTF-8") as f:
        f.write(soup.prettify())

### 식물도감 상세정보 검색을 위해 XMl파일을 순회하여 키값(plantpilbkno) 추출, csv파일로 저장

In [None]:
book_id_list = []

for xml_file in Path("assets/plants_list/").glob("*"):
    with open(xml_file, "r", encoding="UTF-8") as f:
        soup = BeautifulSoup(f, features="xml")
        items = soup.find_all("item")
        for item in items:
            book_id = item.find("plantPilbkNo")
            if book_id:
                book_id_list.append(book_id.get_text().strip())

In [54]:
with open("assets/pilbook_id.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(book_id_list)

### 추출한 키 값을 이용하여 식물도감 상세정보 스크레이핑

In [43]:
ENDPOINT = "/plntIlstrInfo"
URL = BASE_URL + ENDPOINT
PARAMS = {"serviceKey": API_KEY,}

In [59]:
with open("assets/pilbook_id.csv", "r") as f:
    reader = csv.reader(f)
    for row in reader:
        for q1 in row:            
            PARAMS["q1"] = q1
            file_path = f'assets/plants_description/{q1}.xml'
            if Path(file_path).exists():
                continue
            time.sleep(3)
            response = requests.get(URL, params=PARAMS)
            soup = BeautifulSoup(response.text, features="xml")
            with open(file_path, 'w', encoding="UTF-8") as f:
                f.write(soup.prettify())