# Request, Response
- request : client가 server에게 자원을 요청하는 것
- response : client가 요청한 자원을 server가 응답하는 것


# requests 모듈
- 정적인 웹 문서를 읽어오는 용도로 사용
- 3rd party 모듈
- 표준라이브러리 : urlib

In [1]:
# 요청하는 도구인 requests 모듈을 import
import requests

# 요청할 URL을 변수에 저장
url = "https://www.naver.com/"
#url 주소의 해당 페이지를 서버에 요청하고 결과 응답을 받음
res = requests.get(url)
# response[200] 은 정상 출력
# 404는 NOT FOUND
res.text[:200]

'   <!doctype html> <html lang="ko" class="fzoom"> <head> <meta charset="utf-8"> <meta name="Referrer" content="origin"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta name="viewport" cont'

In [2]:
res.encoding

'UTF-8'

In [3]:
html = res.text
open('naver.html', 'w', encoding = 'UTF-8')

<_io.TextIOWrapper name='naver.html' mode='w' encoding='UTF-8'>

In [4]:
url = "http://api.sunrise-sunset.org/json?lat=36.7201600&lng=-4.4203400" # 특정일자의 일출과 일몰 정보 추출 
data = requests.get(url).json()
data

{'results': {'sunrise': '5:37:36 AM',
  'sunset': '7:04:40 PM',
  'solar_noon': '12:21:08 PM',
  'day_length': '13:27:04',
  'civil_twilight_begin': '5:11:48 AM',
  'civil_twilight_end': '7:30:27 PM',
  'nautical_twilight_begin': '4:39:16 AM',
  'nautical_twilight_end': '8:02:59 PM',
  'astronomical_twilight_begin': '4:05:09 AM',
  'astronomical_twilight_end': '8:37:07 PM'},
 'status': 'OK'}

In [5]:
url = "http://api.sunrise-sunset.org/json?lat=36.7201600&lng=-4.4203400"
data = requests.get(url)
# data.text

# 문자열 formatting
- {f"    "} f 포맷팅 : 형식 양식처럼 기본 틀이 갖춰짐
- f formatting : 문자열 처음 시작할 때 소문자 f를 붙임
- 문자열 안에서 {}안에 변수 이름을 넣으면 변수의 값이 들어감


In [6]:
a, b, c = 10, 11.5 ,'abcd'
sample = f'하나{a}둘{b}셋{c}넷 다섯'
# sample

In [7]:
date = '2022-08-05'
url = f"http://api.sunrise-sunset.org/json?lat=36.7201600&lng=-4.4203400&date={date}"
res = requests.get(url).json()['results']
res

{'sunrise': '5:25:27 AM',
 'sunset': '7:21:56 PM',
 'solar_noon': '12:23:42 PM',
 'day_length': '13:56:29',
 'civil_twilight_begin': '4:58:37 AM',
 'civil_twilight_end': '7:48:46 PM',
 'nautical_twilight_begin': '4:24:21 AM',
 'nautical_twilight_end': '8:23:02 PM',
 'astronomical_twilight_begin': '3:47:41 AM',
 'astronomical_twilight_end': '8:59:42 PM'}

In [8]:
def by_date(date):
    f"http://api.sunrise-sunset.org/json?lat=36.7201600&lng=-4.4203400&date={date}"
    return requests.get(url).json()['results']

In [9]:
by_date('2022-08-15')

#날짜가 바뀌면 다른 정보가 옴 -> 요청이 다르면 응답이 달라짐

{'sunrise': '5:25:27 AM',
 'sunset': '7:21:56 PM',
 'solar_noon': '12:23:42 PM',
 'day_length': '13:56:29',
 'civil_twilight_begin': '4:58:37 AM',
 'civil_twilight_end': '7:48:46 PM',
 'nautical_twilight_begin': '4:24:21 AM',
 'nautical_twilight_end': '8:23:02 PM',
 'astronomical_twilight_begin': '3:47:41 AM',
 'astronomical_twilight_end': '8:59:42 PM'}

## Data Frame
- Data가 틀에 맞게 저장된 형태
- 파이썬의 리스트와 딕셔너리로 data frame을 표현할 수 있음
- List : 데이터가 순서(index)대로 저장되어 있는 것
- Dictionary : key 와 value의 한 쌍으로 저장되어있는 것

In [10]:
sample_list=[]
sample_list.append({"date" : "2022-01-01", "dat" : "sat"})
sample_list.append({"date" : "2022-01-02", "dat" : "sun"})
sample_list.append({"date" : "2022-01-03", "dat" : "mon"})

sample_list

[{'date': '2022-01-01', 'dat': 'sat'},
 {'date': '2022-01-02', 'dat': 'sun'},
 {'date': '2022-01-03', 'dat': 'mon'}]

In [11]:
import pandas as pd
df = pd.DataFrame(sample_list)
df

Unnamed: 0,date,dat
0,2022-01-01,sat
1,2022-01-02,sun
2,2022-01-03,mon


In [12]:
df.to_csv('sample.csv', index = False)  #원하는 파일에 원하는 이름으로 저장을 하게 해준다. #인덱스를 제거하고 csv에 저장해줌

#PermissionError 출력하려는 파일이 열려있으면 나는 오류이다.

In [13]:
# df.to_csv('C:/Users/chlwn/OneDrive/바탕 화면/새 폴더'+'/'+'sample.csv', index = False)
#원하는 곳에 저장하는 방법

In [14]:
df2 = pd.read_csv('sample.csv')
df2

Unnamed: 0,date,dat
0,2022-01-01,sat
1,2022-01-02,sun
2,2022-01-03,mon


In [15]:
def by_date(date):
    f"http://api.sunrise-sunset.org/json?lat=36.7201600&lng=-4.4203400&date={date}"
    return requests.get(url).json()['results']

sample_list = []

sample_list.append(by_date('2023-01-01'))
sample_list.append(by_date('2023-01-02'))
sample_list.append(by_date('2023-01-03'))

sample_list

[{'sunrise': '5:25:27 AM',
  'sunset': '7:21:56 PM',
  'solar_noon': '12:23:42 PM',
  'day_length': '13:56:29',
  'civil_twilight_begin': '4:58:37 AM',
  'civil_twilight_end': '7:48:46 PM',
  'nautical_twilight_begin': '4:24:21 AM',
  'nautical_twilight_end': '8:23:02 PM',
  'astronomical_twilight_begin': '3:47:41 AM',
  'astronomical_twilight_end': '8:59:42 PM'},
 {'sunrise': '5:25:27 AM',
  'sunset': '7:21:56 PM',
  'solar_noon': '12:23:42 PM',
  'day_length': '13:56:29',
  'civil_twilight_begin': '4:58:37 AM',
  'civil_twilight_end': '7:48:46 PM',
  'nautical_twilight_begin': '4:24:21 AM',
  'nautical_twilight_end': '8:23:02 PM',
  'astronomical_twilight_begin': '3:47:41 AM',
  'astronomical_twilight_end': '8:59:42 PM'},
 {'sunrise': '5:25:27 AM',
  'sunset': '7:21:56 PM',
  'solar_noon': '12:23:42 PM',
  'day_length': '13:56:29',
  'civil_twilight_begin': '4:58:37 AM',
  'civil_twilight_end': '7:48:46 PM',
  'nautical_twilight_begin': '4:24:21 AM',
  'nautical_twilight_end': '8:23:

In [16]:
df4 = pd.DataFrame(sample_list)
df4

Unnamed: 0,sunrise,sunset,solar_noon,day_length,civil_twilight_begin,civil_twilight_end,nautical_twilight_begin,nautical_twilight_end,astronomical_twilight_begin,astronomical_twilight_end
0,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM
1,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM
2,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM


## 시간을 다루는 도구 time ✨✨✨✨✨
- 수집시간을 늘려서 상대 서버의 자원을 적게 사용할 수 있게함 (눈치 보기)
- 의도적으로 코드 실행 시간을 늦춤
- 웹 크롤링에서는 일반적으로 프로그래밍상 빠른 코드가 좋은 코드라는 논리에 해당되지 않음.


In [17]:
import time as t

for i in range(5):
    print(i)
    t.sleep(2)

0
1
2
3
4


In [18]:
import time as t
import pandas as pd

sample_list = []
for i in range(1, 32):
    if 1 <= i < 10: 
        sample_list.append(by_date(f'2023-01-0{i}'))
       
    else:
        sample_list.append(by_date(f'2023-01-{i}'))
       
df = pd.DataFrame(sample_list)
df

Unnamed: 0,sunrise,sunset,solar_noon,day_length,civil_twilight_begin,civil_twilight_end,nautical_twilight_begin,nautical_twilight_end,astronomical_twilight_begin,astronomical_twilight_end
0,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM
1,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM
2,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM
3,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM
4,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM
5,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM
6,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM
7,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM
8,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM
9,5:25:27 AM,7:21:56 PM,12:23:42 PM,13:56:29,4:58:37 AM,7:48:46 PM,4:24:21 AM,8:23:02 PM,3:47:41 AM,8:59:42 PM


In [19]:
df.to_csv('sample_data.csv', index=False)

In [20]:
#     http://api.sunrise-sunset.org/json?lat=36.7201600&lng=-4.4203400&date={date}
# protocol |         domain        / path 물음표 뒤에는 파라미터, 여러개의 파라미터는 &로 구분해준다.        