# 셀리니움 사용하기

### [1] 패키지 참조

In [7]:
import chromedriver_autoinstaller # selenium과 chrome 연결 프로그램
from selenium import webdriver    # 웹브라우저를 제어할 수 있는 패키지
from selenium.webdriver.support.ui import WebDriverWait  # chrome이 웹페이지 로딩을 완료할 때까지 최대 n초간 대기하는 기능.
from bs4 import BeautifulSoup  
import time
from selenium.webdriver.common.by import By

### [2] 크롬 브라우저 가동

In [8]:
chromedriver_autoinstaller.install()

driver = webdriver.Chrome()

driver.implicitly_wait(5)

### [3] 수집할 페이지로 이동

In [11]:
driver.get("https://data.hossam.kr/py/image.html")

## #03. 셀리니움을 통한 크롬 브라우저 제어

### [1] 키보드 입력 전달하기

#### (1) 검색어를 입력할 수 있는 요소를 가져온다.

In [13]:
query = WebDriverWait(driver, 3).until(lambda x: x.find_element(By.CSS_SELECTOR, "#query"))

query

<selenium.webdriver.remote.webelement.WebElement (session="df5541b3cd74a296d4221a913d9edc81", element="0F0FE4D4AA13ABA50C4AE86310DC41DC_element_4")>

#### (2) 해당 요소에 문자열을 입력한다.

In [14]:
query.clear()
query.send_keys("파이썬")


### [2] 마우스 클릭 전달하기

#### (1) 검색 버튼을 가져온다.

In [15]:
button = WebDriverWait(driver, 3).until(lambda x: x.find_element(By.CSS_SELECTOR, "#searchForm button"))

button

<selenium.webdriver.remote.webelement.WebElement (session="df5541b3cd74a296d4221a913d9edc81", element="0F0FE4D4AA13ABA50C4AE86310DC41DC_element_7")>

#### (2) 검색 버튼을 클릭한다.

In [17]:
button.click()

### [3] 스크롤을 화면 맨 밑으로 이동한다.

크롬브라우저로 하여금 Javascript 코드를 실행하도록 처리

- Javascript: 웹 페이지 개발 언어

아래의 구문은 스크롤이 y축으로 화면 맨 아래까지 이동하는 Javascript 구문

```js
window.scrollTo(0, document.body.scrollHeight);
```

In [20]:
for i in range(0, 10):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    time.sleep(3)

### [4] 브라우저상에 나타나는 소스코드 가져오기

이 코드는 크롬브라우저상의 개발자도구로 확인되는 내용과 동일하다.

#### (1) 소스코드 취득

In [22]:
myhtml = driver.page_source
myhtml

'<html lang="ko"><head>\n        <meta charset="UTF-8">\n        <meta name="viewport" content="width=device-width, initial-scale=1.0">\n        <title>Document</title>\n        <style>\n            #loading {\n                width: 100px;\n                height: 100px;\n                background: url(img/loading.gif) no-repeat center/cover;\n                position: absolute;\n                left: 50%;\n                top: 50%;\n                transform: translate(-50%, -50%);\n                z-index: 9999999999;\n                display: none;\n            }\n\n            #loading.active {\n                display: block;\n            }\n\n            body {\n                font-family: Arial;\n                padding-top: 127px;\n            }\n\n            #header {\n                position: fixed;\n                top: 0;\n                left: 0;\n                width: 100%;\n                background-color: #fff;\n                z-index: 1000;\n                pad

#### (2) 브라우저 닫기

In [24]:
driver.close()

InvalidSessionIdException: Message: invalid session id
Stacktrace:
	GetHandleVerifier [0x010D6E73+174291]
	(No symbol) [0x01000AC1]
	(No symbol) [0x00D16E8A]
	(No symbol) [0x00D40862]
	(No symbol) [0x00D66EBA]
	(No symbol) [0x00D62036]
	(No symbol) [0x00D61CC2]
	(No symbol) [0x00CF70DB]
	(No symbol) [0x00CF75DE]
	(No symbol) [0x00CF79EB]
	GetHandleVerifier [0x011749CC+820268]
	sqlite3_dbdata_init [0x01234EBE+652494]
	sqlite3_dbdata_init [0x012348D9+650985]
	sqlite3_dbdata_init [0x0122962C+605244]
	sqlite3_dbdata_init [0x0123586B+654971]
	(No symbol) [0x0100FEBC]
	(No symbol) [0x00CF6F4C]
	(No symbol) [0x00CF6AEA]
	(No symbol) [0x00E6522C]
	BaseThreadInitThunk [0x7770FCC9+25]
	RtlGetAppContainerNamedObjectPath [0x77B87C6E+286]
	RtlGetAppContainerNamedObjectPath [0x77B87C3E+238]


## #04. 원하는 내용 추출

### [1] 취득한 HTML코드를 bs4객체로 전환

In [26]:
soup = BeautifulSoup(myhtml)
soup

<html lang="ko"><head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<title>Document</title>
<style>
            #loading {
                width: 100px;
                height: 100px;
                background: url(img/loading.gif) no-repeat center/cover;
                position: absolute;
                left: 50%;
                top: 50%;
                transform: translate(-50%, -50%);
                z-index: 9999999999;
                display: none;
            }

            #loading.active {
                display: block;
            }

            body {
                font-family: Arial;
                padding-top: 127px;
            }

            #header {
                position: fixed;
                top: 0;
                left: 0;
                width: 100%;
                background-color: #fff;
                z-index: 1000;
                padding: 0px 10px 5px 10px;
                box-sizing: border-box;

### [2] bs4객체를 통해 수집하기를 원하는 요소 추출

In [28]:
img = soup.select("li > a > img")
img

[<img src="https://search2.kakaocdn.net/argon/130x130_85_c/IBLE742mwrG"/>,
 <img src="https://search4.kakaocdn.net/argon/130x130_85_c/8kJhWFOfzKc"/>,
 <img src="https://search2.kakaocdn.net/argon/130x130_85_c/IsLf95gD17p"/>,
 <img src="https://search3.kakaocdn.net/argon/130x130_85_c/JzT0rXXy3Nq"/>,
 <img src="https://search2.kakaocdn.net/argon/130x130_85_c/1mNOZCWck25"/>,
 <img src="https://search3.kakaocdn.net/argon/130x130_85_c/CwuojzUvOij"/>,
 <img src="https://search3.kakaocdn.net/argon/130x130_85_c/Ez5R1NigIdS"/>,
 <img src="https://search4.kakaocdn.net/argon/130x130_85_c/DbhlqtuvnIm"/>,
 <img src="https://search3.kakaocdn.net/argon/130x130_85_c/F6lQl3hwsDG"/>,
 <img src="https://search3.kakaocdn.net/argon/130x130_85_c/JtGgWeMwspG"/>,
 <img src="https://search3.kakaocdn.net/argon/130x130_85_c/DW4hwWZ2ZMO"/>,
 <img src="https://search2.kakaocdn.net/argon/130x130_85_c/3wO535MWPtf"/>,
 <img src="https://search4.kakaocdn.net/argon/130x130_85_c/BUVT6Pz70db"/>,
 <img src="https://search

### [3] 추출된 내용 활용하기

In [29]:
for i in img:
    print(i.attrs['src'])

https://search2.kakaocdn.net/argon/130x130_85_c/IBLE742mwrG
https://search4.kakaocdn.net/argon/130x130_85_c/8kJhWFOfzKc
https://search2.kakaocdn.net/argon/130x130_85_c/IsLf95gD17p
https://search3.kakaocdn.net/argon/130x130_85_c/JzT0rXXy3Nq
https://search2.kakaocdn.net/argon/130x130_85_c/1mNOZCWck25
https://search3.kakaocdn.net/argon/130x130_85_c/CwuojzUvOij
https://search3.kakaocdn.net/argon/130x130_85_c/Ez5R1NigIdS
https://search4.kakaocdn.net/argon/130x130_85_c/DbhlqtuvnIm
https://search3.kakaocdn.net/argon/130x130_85_c/F6lQl3hwsDG
https://search3.kakaocdn.net/argon/130x130_85_c/JtGgWeMwspG
https://search3.kakaocdn.net/argon/130x130_85_c/DW4hwWZ2ZMO
https://search2.kakaocdn.net/argon/130x130_85_c/3wO535MWPtf
https://search4.kakaocdn.net/argon/130x130_85_c/BUVT6Pz70db
https://search2.kakaocdn.net/argon/130x130_85_c/11XM7lDWmB4
https://search3.kakaocdn.net/argon/130x130_85_c/4ZdS67o3RxM
https://search2.kakaocdn.net/argon/130x130_85_c/6Ce8v8VxsYE
https://search1.kakaocdn.net/argon/130x1