In [1]:
import requests
import time
from bs4 import BeautifulSoup

### Requests
- http://docs.python-requests.org/en/master/

##### 네이버 주식 데이터 가져오기
- api 사용 : json 파싱을 한다.
- 네이버 주식 페이지에서 주식 데이터를 가져와 데이터 프레임으로 만들기
- http://m.stock.naver.com

In [2]:
def make_url(pageSize=10, page=1):
    return "http://m.stock.naver.com/api/json/sise/siseListJson.nhn?menu=market_sum&sosok=0&pageSize=" + str(pageSize) + "&page=" + str(page)

def get_data(url):
    response = requests.get(url)
    json_info = response.json()
    companys = json_info["result"]["itemList"]
    df = pd.DataFrame(columns=["종목", "시세", "전일비", "등락율", "시가총액", "거래량"])
    for company in companys:
        df.loc[len(df)] = {
            "종목":company["nm"],
            "시세":company["nv"],
            "전일비":company["cv"],
            "등락율":company["cr"],
            "시가총액":company["mks"],
            "거래량":company["aq"],
        }
    return df

In [3]:
url = make_url(100,1)
df = get_data(url)
df.tail()

Unnamed: 0,종목,시세,전일비,등락율,시가총액,거래량
95,포스코대우,22500,-1500,-6.25,27759,551956
96,동서,27750,-650,-2.29,27667,71137
97,메리츠화재,23800,-900,-3.64,26260,76351
98,대림산업,75500,-3100,-3.94,26274,216100
99,만도,276000,-500,-0.18,25920,39966


##### Dark Sky API
- 날씨 정보를 알려주는 api
- https://darksky.net/dev
- pip3 install python-forecastio

In [5]:
import forecastio
FORECAST_TOKEN = "c259d4aeb593bc5e83d0c3f7ed916f5d"

In [6]:
def forecast(lat,lng):
    url = "https://api.darksky.net/forecast/{}/{},{}".format(FORECAST_TOKEN, lat, lng)
    response = requests.get(url)
    json_info = response.json()
    return json_info["hourly"]["summary"]

In [7]:
lat = 37.5124413
lng = 126.9540519
forecast(lat,lng)

'Clear throughout the day.'

In [8]:
def forecast(lat,lng):
    forecast = forecastio.load_forecast(FORECAST_TOKEN, lat, lng)
    byHour = forecast.hourly()
    return byHour.summary

In [9]:
lat = 37.5124413
lng = 126.9540519
forecast(lat,lng)

'Clear throughout the day.'

### BeautifulSoup
- https://www.crummy.com/software/BeautifulSoup/bs4/doc/
- pip3 install bs4

##### 네이버 검색어 순위
- bs4 사용 : html element를 selector를 사용한다.
- 네이버 검색어 순위를 가져와 데이터 프레임으로 만들기
- http://naver.com

In [10]:
def naver_top20():
    df = pd.DataFrame(columns=["rank","keyword"])
    response = requests.get("http://naver.com")
    dom = BeautifulSoup(response.content, "html.parser")
    keywords = dom.select(".ah_roll .ah_l .ah_item")
    for keyword in keywords:
        df.loc[len(df)] = {
            "rank":keyword.select_one(".ah_r").text,
            "keyword":keyword.select_one(".ah_k").text,
        }
    return df

In [11]:
naver_df = naver_top20()
naver_df

Unnamed: 0,rank,keyword
0,1,몰디브
1,2,이방카
2,3,임수현
3,4,국가장학금 소득분위 금액
4,5,지드래곤
5,6,디스패치
6,7,선미
7,8,한국장학재단 소득분위
8,9,청와대 국민청원
9,10,클로버필드 패러독스


##### 다음 검색어 순위

In [12]:
def daum_top10():
    df = pd.DataFrame(columns=["rank","keyword"])
    response = requests.get("http://daum.net")
    dom = BeautifulSoup(response.content, "html.parser")
    keywords = dom.select("#mArticle ol.list_hotissue.issue_row.list_mini > li")
    for keyword in keywords:
        df.loc[len(df)] = {
            "rank":keyword.select_one(".ir_wa").text.replace("위",""),
            "keyword":keyword.select_one(".link_issue").text,
        }
    return df

In [13]:
daum_df = daum_top10()
daum_df

Unnamed: 0,rank,keyword
0,1,정형식 판사
1,2,몰디브
2,3,한국장학재단
3,4,김희애
4,5,선미
5,6,강수지
6,7,김국진
7,8,청와대 국민청원홈페이지
8,9,이방카
9,10,제주 날씨


##### 중복된 키워드 찾아서 출력하기

In [14]:
result = [keyword for keyword in daum_df["keyword"] if naver_df["keyword"].str.contains(keyword).any() ]
result

['정형식 판사', '몰디브', '한국장학재단', '김희애', '선미', '이방카']

### file download

In [14]:
def download(title, download_link):
    response = requests.get(download_link, stream=True)
    download_path = "./data/" + title
    size = 0
    with open(download_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=1024): 
            if chunk:
                size += 1024
                f.write(chunk)
    return size

In [15]:
title = "iPhone X is Here — Apple.mp4"
download_link = "http://bit.ly/2FLpRF9"
size = download(title, download_link)
print("download done : {} Mbyte".format(round(size/1024/1024,2)))

download done : 7.12 Mbyte


In [None]:
#PM_ID_ct > div.header > div.section_navbar > div.area_hotkeyword.PM_CL_realtimeKeyword_base > div.ah_roll.PM_CL_realtimeKeyword_rolling_base > div