In [1]:
import requests
import time
from bs4 import BeautifulSoup
import pandas as pd

데이터 긁어올 때 
   1. ## 자사에서 제공하는 api를 request모듈로 요청함
   2. ### 뷰티풀솝으로 html코드를 통째로 가져와서 데이터를 selector로 고름
   3. ### 셀레니움으로 브라우저 자동화를 통해 가져옴

### Requests
- http://docs.python-requests.org/en/master/

##### 네이버 주식 데이터 가져오기
- api 사용 : json 파싱을 한다.
- 네이버 주식 페이지에서 주식 데이터를 가져와 데이터 프레임으로 만들기
- http://m.stock.naver.com

In [5]:
def make_url(pageSize=10, page=1):
    return "http://m.stock.naver.com/api/json/sise/siseListJson.nhn?menu=market_sum&sosok=0&pageSize=" + str(pageSize) + "&page=" + str(page)

def get_data(url):
    response = requests.get(url)
    json_info = response.json()
    companys = json_info["result"]["itemList"]
    df = pd.DataFrame(columns=["종목", "시세", "전일비", "등락율", "시가총액", "거래량"])
    for company in companys:
        df.loc[len(df)] = {
            "종목":company["nm"],
            "시세":company["nv"],
            "전일비":company["cv"],
            "등락율":company["cr"],
            "시가총액":company["mks"],
            "거래량":company["aq"],
        }
    return df

In [9]:
url = make_url(100, 1)
df = get_data(url)
df.head()

Unnamed: 0,종목,시세,전일비,등락율,시가총액,거래량
0,삼성전자,2349000,-47000,-1.96,3032524,205525
1,SK하이닉스,68700,-1500,-2.14,500138,3030176
2,삼성전자우,1945000,-33000,-1.67,354974,16565
3,현대차,154000,-5000,-3.14,339226,373073
4,POSCO,370500,-11000,-2.88,323027,170006


##### Dark Sky API
- 날씨 정보를 알려주는 api
- https://darksky.net/dev
- pip3 install python-forecastio

In [10]:
import forecastio
FORECAST_TOKEN = "c259d4aeb593bc5e83d0c3f7ed916f5d"

In [13]:
def forecast(lat,lng):
    url = "https://api.darksky.net/forecast/{}/{},{}".format(FORECAST_TOKEN, lat, lng)
    response = requests.get(url)
    json_info = response.json()
    return json_info["currently"]["summary"]

In [14]:
lat = 37.5124413
lng = 126.9540519
forecast(lat,lng)

'Clear'

In [15]:
def forecast(lat,lng):
    forecast = forecastio.load_forecast(FORECAST_TOKEN, lat, lng)
    byHour = forecast.hourly()
    return byHour.summary

In [16]:
lat = 37.5124413
lng = 126.9540519
forecast(lat,lng)

'Clear throughout the day.'

### BeautifulSoup
- https://www.crummy.com/software/BeautifulSoup/bs4/doc/
- pip3 install bs4

##### 네이버 검색어 순위
- bs4 사용 : html element를 selector를 사용한다.
- 네이버 검색어 순위를 가져와 데이터 프레임으로 만들기
- http://naver.com

In [17]:
def naver_top20():
    df = pd.DataFrame(columns=["rank","keyword"])
    response = requests.get("http://naver.com")
    dom = BeautifulSoup(response.content, "html.parser")
    keywords = dom.select(".ah_roll .ah_l .ah_item")
    for keyword in keywords:
        df.loc[len(df)] = {
            "rank":keyword.select_one(".ah_r").text,
            "keyword":keyword.select_one(".ah_k").text,
        }
    return df

In [18]:
naver_df = naver_top20()
naver_df

NameError: name 'pd' is not defined

##### 다음 검색어 순위

In [11]:
def daum_top10():
    df = pd.DataFrame(columns=["rank","keyword"])
    response = requests.get("http://daum.net")
    dom = BeautifulSoup(response.content, "html.parser")
    keywords = dom.select("#mArticle ol.list_hotissue.issue_row.list_mini > li")
    for keyword in keywords:
        df.loc[len(df)] = {
            "rank":keyword.select_one(".ir_wa").text.replace("위",""),
            "keyword":keyword.select_one(".link_issue").text,
        }
    return df

In [12]:
daum_df = daum_top10()
daum_df

Unnamed: 0,rank,keyword
0,1,정형식 판사
1,2,으라차차 와이키키
2,3,이재용
3,4,박선영
4,5,국가장학금
5,6,올해의 여성영화인상
6,7,인교진
7,8,소이현
8,9,권인숙
9,10,선미


##### 중복된 키워드 찾아서 출력하기

In [13]:
result = [keyword for keyword in daum_df["keyword"] if naver_df["keyword"].str.contains(keyword).any() ]
result

['으라차차 와이키키', '이재용', '국가장학금', '인교진', '선미']

### file download

In [14]:
def download(title, download_link):
    response = requests.get(download_link, stream=True)
    download_path = "./data/" + title
    size = 0
    with open(download_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=1024): 
            if chunk:
                size += 1024
                f.write(chunk)
    return size

In [15]:
title = "iPhone X is Here — Apple.mp4"
download_link = "http://bit.ly/2FLpRF9"
size = download(title, download_link)
print("download done : {} Mbyte".format(round(size/1024/1024,2)))

download done : 7.12 Mbyte
