Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SNS 프로그래밍 5주차 #2

Open
jihyunjeongme opened this issue Oct 19, 2019 · 2 comments
Open

SNS 프로그래밍 5주차 #2

jihyunjeongme opened this issue Oct 19, 2019 · 2 comments

Comments

@jihyunjeongme
Copy link
Owner

jihyunjeongme commented Oct 19, 2019

# urllib.request의 (.)으로 구분된 모듈을 지정한것은 urllib 패키지 내부에 있는 request이라는 모듈입니다.
# urllib.request 모듈을 읽어옴
import urllib.request
from bs4 import BeautifulSoup

url = "https://news.naver.com/main/main.nhn?mode=LSD&mid=shm&sid1=101"

# request.urlopen() 메서드를 호츨해서 읽어온 데이터를 html에 담는다.
html = urllib.request.urlopen(url)

print(type(html))
# 데이터를 조립해 원하는 데이터를 빼내는 프로그램을 하는것

# 첫번째 매개변수에 HTML을 지정한다.
# 두번째 매개 변수에는 분석할 분석기(parser)의 종류를 지정한다. HTML을 분석할 때는 “html.parser”라고 지정한다.
bs_obj = BeautifulSoup(html, "html.parser")

#main_content > div > div._persist > div:nth-child(1) > div:nth-child(1) > div.cluster_body > ul > li:nth-child(1) > div.cluster_text > a

#main_content > div > div._persist > div:nth-child(1) > div:nth-child(1) > div.cluster_body > ul > li:nth-child(1) > div.cluster_text > a

div = bs_obj.find("div", {"class": "_persist"})
lis = div.findAll("a", {"class": "cluster_text_headline"})

# print(lis)
for li in lis:
    print(li.text)
@jihyunjeongme
Copy link
Owner Author

# requests 모듈을 읽어옴
import requests
from bs4 import BeautifulSoup

url = "https://news.naver.com/main/main.nhn?mode=LSD&mid=shm&sid1=101"

# request.get() 메서드를 호츨해서 읽어온 데이터를 html에 담는다.
result_requests = requests.get(url)

print(type(result_requests))

# HTML 소스 가져오기
html = result_requests.text

# 첫번째 매개변수에 HTML을 지정한다.
# 두번째 매개 변수에는 분석할 분석기(parser)의 종류를 지정한다. HTML을 분석할 때는 “html.parser”라고 지정한다.
bs_obj = BeautifulSoup(html, "html.parser")

#main_content > div > div._persist > div:nth-child(1) > div:nth-child(1) > div.cluster_body > ul > li:nth-child(1) > div.cluster_text > a

news_headline = bs_obj.select(
    'div > div > div > div > div > ul > li > div > a'
)
# print(news_headline)

# print(lis)
for li in news_headline:
    print(li.text)

@jihyunjeongme
Copy link
Owner Author

from selenium import webdriver
import os
import time

global browser

browser = webdriver.Chrome(
    os.getcwd() + "/chromedriver",
)
browser.get("https://instagram.com/")
time.sleep(4)

# 로그인
login_link = browser.find_element_by_css_selector(
    "p.izU2O"
).find_element_by_css_selector("a")

login_link.click()
time.sleep(3)
browser.close()


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant