# NY Times 웹 크로링(API)

In [1]:
import requests
from bs4 import BeautifulSoup

# 1. URL 설정
url = "http://quotes.toscrape.com/page/1/"

# 2. HTTP 요청
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# 3. 명언과 저자 추출
quotes = soup.find_all("div", class_="quote")

for quote in quotes:
    text = quote.find("span", class_="text").get_text()
    author = quote.find("small", class_="author").get_text()
    print(f"{text} — {author}")

“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.” — Albert Einstein
“It is our choices, Harry, that show what we truly are, far more than our abilities.” — J.K. Rowling
“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.” — Albert Einstein
“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.” — Jane Austen
“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.” — Marilyn Monroe
“Try not to become a man of success. Rather become a man of value.” — Albert Einstein
“It is better to be hated for what you are than to be loved for what you are not.” — André Gide
“I have not failed. I've just found 10,000 ways that won't work.” — Thomas A. Edison
“A woman is like a tea bag; you never know how strong it is until it's in hot water.” — Eleanor Roos

In [8]:
import requests
import pandas as pd

API_KEY = 'eDciAY1vTfhGEWBJhUOYtEns4SvgbUEt'

url = 'https://api.nytimes.com/svc/mostpopular/v2/viewed/1.json'
params = {
    "api-key" : API_KEY
}

response = requests.get(url, params=params)

data = response.json()
df = pd.json_normalize(data['results'])
df.head(1)

Unnamed: 0,uri,url,id,asset_id,source,published_date,updated,section,subsection,nytdsection,...,byline,type,title,abstract,des_facet,org_facet,per_facet,geo_facet,media,eta_id
0,nyt://article/145cfe92-7c5f-5da3-8ecf-6167d2e7...,https://www.nytimes.com/2025/06/17/nyregion/br...,100000010234457,100000010234457,New York Times,2025-06-17,2025-06-17 18:49:31,New York,,new york,...,By Luis Ferré-Sadurní,Article,Brad Lander Is Arrested by ICE Agents at Immig...,"Mr. Lander, the New York City comptroller and ...","[Immigration and Emigration, Elections, Mayors]","[Immigration and Customs Enforcement (US), Off...","[Lander, Brad]",[],"[{'type': 'image', 'subtype': 'photo', 'captio...",0


# 단일 학교 1일 급식 정보 가져오기

In [25]:
import requests
import pandas as pd

EDU_CODE = 'B10'
SCHOOL_CODE = '7021105'
API_KEY = 'dda13d8b986a418caac237bb175b4c02'
TARGET_DATE = '20250618' # 수요일

# 학교의 이름 가져오는 함수 
def get_school_name(edu_code, school_code):
    print(f'학교 이름 조회 예정 : 교육청={edu_code}, 학교코드={school_code}')
    url = 'https://open.neis.go.kr/hub/schoolInfo'
    params = {
        'KEY': API_KEY, 
        'Type' : 'json', 
        'ATPT_OFCDC_SC_CODE' : edu_code, 
        'SD_SCHUL_CODE' : school_code
    }
    try:
        res = requests.get(url, params=params, timeout=5)
        print(f"응답코드 : {res.status_code}")
        if res.status_code == 200:
            data = res.json()
            if "schoolInfo" in data:
                # 여기코드가 핵심
                name = data["schoolInfo"][1]["row"][0]["SCHUL_NM"]
                print(f"학교명: {name}")
                return name
            else:
                print("해당학교는 존재하지 않음", data)
        else:
            print("응답 실패:", res.text)
    except Exception as e:
        print(f"학교명 정보 조회 오류 : {e}")
    return "학교명조회불가"
        
get_school_name(EDU_CODE, SCHOOL_CODE)

학교 이름 조회 예정 : 교육청=B10, 학교코드=7021105
응답코드 : 200
학교명: 서울이문초등학교


'서울이문초등학교'

In [26]:
def get_meal_info(edu_code, school_code):
    print(f"급식 정보 조회 중: 날짜={TARGET_DATE}")
    url = "https://open.neis.go.kr/hub/mealServiceDietInfo"
    params = {
        "KEY": API_KEY,
        "Type": "json",
        "ATPT_OFCDC_SC_CODE": edu_code,
        "SD_SCHUL_CODE": school_code,
        "MLSV_YMD": TARGET_DATE
    }
    try:
        res = requests.get(url, params=params, timeout=5)
        print(f"mealService 응답코드: {res.status_code}")
        if res.status_code == 200:
            data = res.json()
            if "mealServiceDietInfo" in data:
                print(f"급식 데이터 수: {len(data['mealServiceDietInfo'][1]['row'])}")
                return data["mealServiceDietInfo"][1]["row"]
            else:
                print("mealServiceDietInfo 키 없음:", data)
        else:
            print("응답 실패:", res.text)
    except Exception as e:
        print(f"급식 정보 조회 오류: {e}")
    return []

# 데이터 수집
school_name = get_school_name(EDU_CODE, SCHOOL_CODE)
meals = get_meal_info(EDU_CODE, SCHOOL_CODE)

data = []
for meal in meals:
    data.append({
        "교육청코드": EDU_CODE,
        "학교코드": SCHOOL_CODE,
        "학교명": school_name,
        "급식일자": meal.get("MLSV_YMD"),
        "식사명": meal.get("MMEAL_SC_NM"),
        "급식식단": meal.get("DDISH_NM")
    })

df = pd.DataFrame(data)
print("수집된 행 수:", len(df))
print(df.head())

df.to_csv("서울_초등학교_급식_20241030.csv", index=False, encoding="utf-8-sig")
print("CSV 저장 완료: 서울_초등학교_급식_20241030.csv")

학교 이름 조회 예정 : 교육청=B10, 학교코드=7021105
응답코드 : 200
학교명: 서울이문초등학교
급식 정보 조회 중: 날짜=20250618
mealService 응답코드: 200
급식 데이터 수: 1
수집된 행 수: 1
  교육청코드     학교코드       학교명      급식일자 식사명  \
0   B10  7021105  서울이문초등학교  20250618  중식   

                                                급식식단  
0  발아현미밥 <br/>카레소스(닭고기)* (2.5.6.12.13.15.16.18)<b...  
CSV 저장 완료: 서울_초등학교_급식_20241030.csv


# 1개 학교 한달간 데이터 수집
- 날짜만 변경

In [29]:
# 기간 설정 코드
from datetime import datetime, timedelta
today = datetime.today()
one_month_ago = today - timedelta(days=30)
FROM_DATE = one_month_ago.strftime('%Y%m%d')
TO_DATE = today.strftime('%Y%m%d')

FROM_DATE, TO_DATE

('20250519', '20250618')

In [30]:
import requests
import pandas as pd
from datetime import datetime, timedelta

EDU_CODE = 'B10'
SCHOOL_CODE = '7021105'
API_KEY = 'dda13d8b986a418caac237bb175b4c02'
TARGET_DATE = '20250618' # 수요일

# 최근 한달 기간 설정
today = datetime.today()
one_month_ago = today - timedelta(days=30)
FROM_DATE = one_month_ago.strftime('%Y%m%d')
TO_DATE = today.strftime('%Y%m%d')

# 학교의 이름 가져오는 함수 
def get_school_name(edu_code, school_code):
    print(f'학교 이름 조회 예정 : 교육청={edu_code}, 학교코드={school_code}')
    url = 'https://open.neis.go.kr/hub/schoolInfo'
    params = {
        'KEY': API_KEY, 
        'Type' : 'json', 
        'ATPT_OFCDC_SC_CODE' : edu_code, 
        'SD_SCHUL_CODE' : school_code
    }
    try:
        res = requests.get(url, params=params, timeout=5)
        print(f"응답코드 : {res.status_code}")
        if res.status_code == 200:
            data = res.json()
            if "schoolInfo" in data:
                # 여기코드가 핵심
                name = data["schoolInfo"][1]["row"][0]["SCHUL_NM"]
                print(f"학교명: {name}")
                return name
            else:
                print("해당학교는 존재하지 않음", data)
        else:
            print("응답 실패:", res.text)
    except Exception as e:
        print(f"학교명 정보 조회 오류 : {e}")
    return "학교명조회불가"
        
def get_meal_info(edu_code, school_code, from_date, to_date):
    print(f"급식 정보 조회 중: 날짜={TARGET_DATE}")
    url = "https://open.neis.go.kr/hub/mealServiceDietInfo"
    params = {
        "KEY": API_KEY,
        "Type": "json",
        "ATPT_OFCDC_SC_CODE": edu_code,
        "SD_SCHUL_CODE": school_code,
        "MLSV_FROM_YMD": from_date,
        "MLSV_TO_YMD": to_date,
        "pIndex": 1,
        "pSize":100
    }
    try:
        res = requests.get(url, params=params, timeout=5)
        print(f"mealService 응답코드: {res.status_code}")
        if res.status_code == 200:
            data = res.json()
            if "mealServiceDietInfo" in data:
                print(f"급식 데이터 수: {len(data['mealServiceDietInfo'][1]['row'])}")
                return data["mealServiceDietInfo"][1]["row"]
            else:
                print("mealServiceDietInfo 키 없음:", data)
        else:
            print("응답 실패:", res.text)
    except Exception as e:
        print(f"급식 정보 조회 오류: {e}")
    return []

# 데이터 수집
school_name = get_school_name(EDU_CODE, SCHOOL_CODE)
meals = get_meal_info(EDU_CODE, SCHOOL_CODE, FROM_DATE, TO_DATE)

data = []
for meal in meals:
    data.append({
        "교육청코드": EDU_CODE,
        "학교코드": SCHOOL_CODE,
        "학교명": school_name,
        "급식일자": meal.get("MLSV_YMD"),
        "식사명": meal.get("MMEAL_SC_NM"),
        "급식식단": meal.get("DDISH_NM")
    })

df = pd.DataFrame(data)
print("수집된 행 수:", len(df))
print(df.head())

#df.to_csv("서울_초등학교_급식_20241030.csv", index=False, encoding="utf-8-sig")
#print("CSV 저장 완료: 서울_초등학교_급식_20241030.csv")

학교 이름 조회 예정 : 교육청=B10, 학교코드=7021105
응답코드 : 200
학교명: 서울이문초등학교
급식 정보 조회 중: 날짜=20250618
mealService 응답코드: 200
급식 데이터 수: 21
수집된 행 수: 21
  교육청코드     학교코드       학교명      급식일자 식사명  \
0   B10  7021105  서울이문초등학교  20250519  중식   
1   B10  7021105  서울이문초등학교  20250520  중식   
2   B10  7021105  서울이문초등학교  20250521  중식   
3   B10  7021105  서울이문초등학교  20250522  중식   
4   B10  7021105  서울이문초등학교  20250523  중식   

                                                급식식단  
0  차수수밥* <br/>김치콩나물국^ (5.6.9)<br/>오이볶음 (5)<br/>순살...  
1  강황밥 <br/>감자옹심이국 (5.6.17)<br/>미역줄기볶음* (5)<br/>오...  
2  부지갱이나물밥&양념장 (5.6.13)<br/>북어계란국* (1.5.6)<br/>비름...  
3  귀리밥* <br/>호박잎된장국y (5.6.18)<br/>참나물무침* (5.6)<br...  
4  발아현미밥 <br/>어묵국y (1.2.5.6.7.8.13.16.18)<br/>고춧잎...  
