## 웹 크롤링
웹 스크래핑
 - 각종 웹 사이트들에서 원하는 정보를 추출 <br>
 
웹 크롤러 
 - 웹 페이지를 방문해서 자료를 수집하는 프로그램

### HTML
- 웹 문서 구조 정의 ,컨텐츠를 표시하는 기본 마크업 언어
### CSS
- HTML 문서에 각종 시각적 요소를 정의하기 위한 스타일 시트 언어


## BeautifulSoup 모듈
- 홈페이지 내 데이터를 쉽게 추출할 수 잇도록 도와주는 파이썬 외부 라이브러리
- 웹 문서 내 수많은 HTML 태그들을 PARSER을 활용해 사용하기 편한 파이썬 객체로 만들어 제공
- 웸 문서구조를 알고 있다면, 아주 편하게 원하는 데이터를 뽑아 활용할 수 있음
- pip install beautifulsoup4

# 샌드위치 맛집 분석

In [1]:
from bs4 import BeautifulSoup

In [2]:
page = open("../../../tt.html","r",encoding='UTF-8').read()
soup = BeautifulSoup(page,'html.parser')
print(soup.prettify())
# prettify() 옵션은 html페이지의 내용 전체를 보고자 할 때

<!DOCTYPE html>
<html>
 <head>
  <title>
   Very Simple HTML Code
  </title>
 </head>
 <body>
  <div>
   <p class="inner-text first-item" id="first">
    Naver Home
    <a href="http://www.naver.com">
     Naver
    </a>
   </p>
   <p class="inner-text second_item">
    Happy Data Science.
    <a href="http://www.python.org" id="py-link">
     Python
    </a>
   </p>
  </div>
  <p class="outer-text">
   <b>
    <!--글씨 두껍게-->
    Have a nice day.
    <br/>
    Have a nice day2.
   </b>
  </p>
 </body>
</html>



In [3]:
list(soup.children)

['html',
 '\n',
 <html>
 <head>
 <title>
             Very Simple HTML Code
         </title>
 </head>
 <body>
 <div>
 <p class="inner-text first-item" id="first">
                 Naver Home
                 <a href="http://www.naver.com">Naver</a>
 </p>
 <p class="inner-text second_item">
                 Happy Data Science.
                 <a href="http://www.python.org" id="py-link">Python</a>
 </p>
 </div>
 <p class="outer-text">
 <b>
 <!--글씨 두껍게-->
                 Have a nice day.<br/>
                 Have a nice day2.
             </b>
 </p>
 </body>
 </html>,
 '\n']

In [4]:
html = list(soup.children)[2] # html 태그에 접속
html

<html>
<head>
<title>
            Very Simple HTML Code
        </title>
</head>
<body>
<div>
<p class="inner-text first-item" id="first">
                Naver Home
                <a href="http://www.naver.com">Naver</a>
</p>
<p class="inner-text second_item">
                Happy Data Science.
                <a href="http://www.python.org" id="py-link">Python</a>
</p>
</div>
<p class="outer-text">
<b>
<!--글씨 두껍게-->
                Have a nice day.<br/>
                Have a nice day2.
            </b>
</p>
</body>
</html>

In [5]:
body = list(html.children)[3]  # html 태그의 children 중 3번째를 조사
body

<body>
<div>
<p class="inner-text first-item" id="first">
                Naver Home
                <a href="http://www.naver.com">Naver</a>
</p>
<p class="inner-text second_item">
                Happy Data Science.
                <a href="http://www.python.org" id="py-link">Python</a>
</p>
</div>
<p class="outer-text">
<b>
<!--글씨 두껍게-->
                Have a nice day.<br/>
                Have a nice day2.
            </b>
</p>
</body>

In [6]:
#children을 이용해서 태그를 조사할 수도 있고 한번에 나타낼 수도 있음
soup.head

<head>
<title>
            Very Simple HTML Code
        </title>
</head>

In [7]:
# 접근해야 할 태그를 알고있다면 find, find_alll 명령을 사용
soup.find_all('p')

[<p class="inner-text first-item" id="first">
                 Naver Home
                 <a href="http://www.naver.com">Naver</a>
 </p>,
 <p class="inner-text second_item">
                 Happy Data Science.
                 <a href="http://www.python.org" id="py-link">Python</a>
 </p>,
 <p class="outer-text">
 <b>
 <!--글씨 두껍게-->
                 Have a nice day.<br/>
                 Have a nice day2.
             </b>
 </p>]

In [8]:
soup.find('p') # 첫 번째 p 태그를 찾음

<p class="inner-text first-item" id="first">
                Naver Home
                <a href="http://www.naver.com">Naver</a>
</p>

In [9]:
# p 태그의 class가 outer_text인 것을 찾아줌
soup.find_all('p',class_='outer-text')

[<p class="outer-text">
 <b>
 <!--글씨 두껍게-->
                 Have a nice day.<br/>
                 Have a nice day2.
             </b>
 </p>]

In [10]:
# p 태그의 id가 first인 것
soup.find_all(id="first")

[<p class="inner-text first-item" id="first">
                 Naver Home
                 <a href="http://www.naver.com">Naver</a>
 </p>]

In [11]:
soup.head

<head>
<title>
            Very Simple HTML Code
        </title>
</head>

In [12]:
soup.head.next_sibling # soup의 head 다음에 줄바꿈 문자가 있습니다

'\n'

In [13]:
soup.head.previous_sibling

'\n'

In [14]:
soup.head.next_sibling.next_sibling

<body>
<div>
<p class="inner-text first-item" id="first">
                Naver Home
                <a href="http://www.naver.com">Naver</a>
</p>
<p class="inner-text second_item">
                Happy Data Science.
                <a href="http://www.python.org" id="py-link">Python</a>
</p>
</div>
<p class="outer-text">
<b>
<!--글씨 두껍게-->
                Have a nice day.<br/>
                Have a nice day2.
            </b>
</p>
</body>

In [15]:
soup.body

<body>
<div>
<p class="inner-text first-item" id="first">
                Naver Home
                <a href="http://www.naver.com">Naver</a>
</p>
<p class="inner-text second_item">
                Happy Data Science.
                <a href="http://www.python.org" id="py-link">Python</a>
</p>
</div>
<p class="outer-text">
<b>
<!--글씨 두껍게-->
                Have a nice day.<br/>
                Have a nice day2.
            </b>
</p>
</body>

In [16]:
body.p # 제일 처음 나타나는 p태그

<p class="inner-text first-item" id="first">
                Naver Home
                <a href="http://www.naver.com">Naver</a>
</p>

In [17]:
body.p.next_sibling.next_sibling # 두번째 등장하는 p태그

<p class="inner-text second_item">
                Happy Data Science.
                <a href="http://www.python.org" id="py-link">Python</a>
</p>

In [18]:
for each_tag in soup.find_all('p'):
    print(each_tag.get_text())


                Naver Home
                Naver


                Happy Data Science.
                Python




                Have a nice day.
                Have a nice day2.
            



In [19]:
soup.find('p',class_='outer-text').get_text()

'\n\n\n                Have a nice day.\n                Have a nice day2.\n            \n'

In [20]:
for each_tag in soup.find_all('p')[2]:
    print(each_tag.get_text())





                Have a nice day.
                Have a nice day2.
            




In [21]:
soup.find_all('p')[2]

<p class="outer-text">
<b>
<!--글씨 두껍게-->
                Have a nice day.<br/>
                Have a nice day2.
            </b>
</p>

In [22]:
#태그가 있던 자리는 줄바꿈, 전체 텍스트
body.get_text()

'\n\n\n                Naver Home\n                Naver\n\n\n                Happy Data Science.\n                Python\n\n\n\n\n\n                Have a nice day.\n                Have a nice day2.\n            \n\n'

In [23]:
# 클릭 가능한 링크인 a 태그를 찾음
links = soup.find_all('a')
links

[<a href="http://www.naver.com">Naver</a>,
 <a href="http://www.python.org" id="py-link">Python</a>]

In [24]:
# href속성을 찾으면 링크 주소를 얻을 수 있음
for each in links:
    href = each['href']
    text = each.string
    print(text+'->'+href)

Naver->http://www.naver.com
Python->http://www.python.org


## request 모듈
http 요청.응답 구조
- 사용자가 원하는 정보 요청, 서버는 요청 확인 후 응답

## 크롬 개발자 도구를 이용해서 원하는 태그 찾기

In [25]:
from urllib.request import urlopen

In [26]:
url = "https://finance.naver.com/marketindex/"
page = urlopen(url)
soup = BeautifulSoup(page,'html.parser')
print(soup.prettify())

<script language="javascript" src="/template/head_js.naver?referer=info.finance.naver.com&amp;menu=marketindex&amp;submenu=market">
</script>
<script src="https://ssl.pstatic.net/imgstock/static.pc/20230629114642/js/info/jindo.min.ns.1.5.3.euckr.js" type="text/javascript">
</script>
<script src="https://ssl.pstatic.net/imgstock/static.pc/20230629114642/js/jindo.1.5.3.element-text-patch.js" type="text/javascript">
</script>
<div id="container" style="padding-bottom:0px;">
 <div class="market_include">
  <div class="market_data">
   <div class="market1">
    <div class="title">
     <h2 class="h_market1">
      <span>
       환전 고시 환율
      </span>
     </h2>
    </div>
    <!-- data -->
    <div class="data">
     <ul class="data_lst" id="exchangeList">
      <li class="on">
       <a class="head usd" href="/marketindex/exchangeDetail.naver?marketindexCd=FX_USDKRW" onclick="clickcr(this, 'fr1.usdt', '', '', event);">
        <h3 class="h_lst">
         <span class="blind">
          미국 U

In [27]:
soup.find_all()

[<script language="javascript" src="/template/head_js.naver?referer=info.finance.naver.com&amp;menu=marketindex&amp;submenu=market"></script>,
 <script src="https://ssl.pstatic.net/imgstock/static.pc/20230629114642/js/info/jindo.min.ns.1.5.3.euckr.js" type="text/javascript"></script>,
 <script src="https://ssl.pstatic.net/imgstock/static.pc/20230629114642/js/jindo.1.5.3.element-text-patch.js" type="text/javascript"></script>,
 <div id="container" style="padding-bottom:0px;">
 <div class="market_include">
 <div class="market_data">
 <div class="market1">
 <div class="title">
 <h2 class="h_market1"><span>환전 고시 환율</span></h2>
 </div>
 <!-- data -->
 <div class="data">
 <ul class="data_lst" id="exchangeList">
 <li class="on">
 <a class="head usd" href="/marketindex/exchangeDetail.naver?marketindexCd=FX_USDKRW" onclick="clickcr(this, 'fr1.usdt', '', '', event);">
 <h3 class="h_lst"><span class="blind">미국 USD</span></h3>
 <div class="head_info point_up">
 <span class="value">1,320.50</span>


In [28]:
soup.find_all('span','value')

[<span class="value">1,320.50</span>,
 <span class="value">912.74</span>,
 <span class="value">1,431.95</span>,
 <span class="value">181.39</span>,
 <span class="value">144.6200</span>,
 <span class="value">1.0888</span>,
 <span class="value">1.2617</span>,
 <span class="value">103.0000</span>,
 <span class="value">69.86</span>,
 <span class="value">1570.7</span>,
 <span class="value">1917.9</span>,
 <span class="value">80987.65</span>]

## 시카고 샌드위치 맛집 소개 사이트 접근

In [29]:
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen

url_base='https://www.chicagomag.com'
url_sub = '/Chicago-Magazine/November-2012/Best-Sandwiches-Chicago/'
url = Request(url_base+url_sub,headers = {'User-Agent':'Mozilla/5.0'})
html = urlopen(url)
soup = BeautifulSoup(html,'html.parser')
soup

<!DOCTYPE html>

<html lang="en-US">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible">
<link href="https://gmpg.org/xfn/11" rel="profile"/>
<script src="https://cmp.osano.com/16A1AnRt2Fn8i1unj/f15ebf08-7008-40fe-9af3-db96dc3e8266/osano.js"></script>
<title>The 50 Best Sandwiches in Chicago – Chicago Magazine</title>
<style type="text/css">
					.heateor_sss_button_instagram span.heateor_sss_svg,a.heateor_sss_instagram span.heateor_sss_svg{background:radial-gradient(circle at 30% 107%,#fdf497 0,#fdf497 5%,#fd5949 45%,#d6249f 60%,#285aeb 90%)}
						div.heateor_sss_horizontal_sharing a.heateor_sss_button_instagram span{background:#000!important;}div.heateor_sss_standard_follow_icons_container a.heateor_sss_button_instagram span{background:#000;}
										.heateor_sss_horizontal_sharing .heateor_sss_svg,.heateor_sss_standard_follow_icons_container .heateor_sss_svg{
							background-color: #000!important;
				background: #000!important;
							color: #f

In [30]:
soup.find_all('div','sammy')

[<div class="sammy" style="position: relative;">
 <div class="sammyRank">1</div>
 <div class="sammyListing"><a href="/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Old-Oak-Tap-BLT/"><b>BLT</b><br/>
 Old Oak Tap<br/>
 <em>Read more</em> </a></div>
 </div>,
 <div class="sammy" style="position: relative;">
 <div class="sammyRank">2</div>
 <div class="sammyListing"><a href="/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Au-Cheval-Fried-Bologna/"><b>Fried Bologna</b><br/>
 Au Cheval<br/>
 <em>Read more</em> </a></div>
 </div>,
 <div class="sammy" style="position: relative;">
 <div class="sammyRank">3</div>
 <div class="sammyListing"><a href="/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Xoco-Woodland-Mushroom/"><b>Woodland Mushroom</b><br/>
 Xoco<br/>
 <em>Read more</em> </a></div>
 </div>,
 <div class="sammy" style="position: relative;">
 <div class="sammyRank">4</div>
 <div class="sammyListing"><a href="/Chicago-Magazine/November-2012/Best-Sandwiches-i

In [31]:
len(soup.find_all('div','sammy'))

50

In [32]:
soup.find_all('div','sammy')[0]

<div class="sammy" style="position: relative;">
<div class="sammyRank">1</div>
<div class="sammyListing"><a href="/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Old-Oak-Tap-BLT/"><b>BLT</b><br/>
Old Oak Tap<br/>
<em>Read more</em> </a></div>
</div>

## 접근한 웹 페이지에서 원하는 데이터 추출하고 정리

In [33]:
tmp_one = soup.find_all('div','sammy')[0]
type(tmp_one)

bs4.element.Tag

In [34]:
tmp_one.find(class_='sammyRank')

<div class="sammyRank">1</div>

In [35]:
tmp_one.find(class_='sammyRank').get_text()

'1'

In [36]:
tmp_one.find(class_='sammyListing')

<div class="sammyListing"><a href="/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Old-Oak-Tap-BLT/"><b>BLT</b><br/>
Old Oak Tap<br/>
<em>Read more</em> </a></div>

In [37]:
# 메뉴 이름과 가게 이름이 같이 나옴
tmp_one.find(class_='sammyListing').get_text()

'BLT\nOld Oak Tap\nRead more '

In [38]:
tmp_one.find('a')['href']

'/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Old-Oak-Tap-BLT/'

In [39]:
# 정규표현식(regular express)
import re

tmp_string=tmp_one.find(class_='sammyListing').get_text()

#spㅣit -> 지정한 특정 패턴이 일치하면 분리
re.split(('\n'),tmp_string)
print(re.split(('\n'),tmp_string)[0])# 메뉴 이름
print(re.split(('\n'),tmp_string)[1])# 가게 이름

BLT
Old Oak Tap


In [40]:
# 절대 경로로 잡힌 URL은 그대로 두고
# 상대 경로로 잡힌 URL은 절대경로로 변경

from urllib.parse import urljoin

In [41]:
# 순위를 담을 빈 리스트 하나 
rank = []
# 메뉴를 담을 빈 리스트 하나
main_menu = []
# 카페 이름을 담을 빈 리스트 하나
cafe_name = []
# 접근 주소 url을 담을 빈 리스트 하나
url_add=[]

list_soup = soup.find_all('div','sammy')
for item in list_soup:
    rank.append(item.find(class_='sammyRank').get_text())
    tmp_string = item.find(class_='sammyListing').get_text()
    main_menu.append(re.split(('\n'),tmp_string)[0])
    cafe_name.append(re.split(('\n'),tmp_string)[1])
    url_add.append(urljoin(url_base,item.find('a')['href']))

In [42]:
print(rank[:5])
print(main_menu[:5])
print(cafe_name[:5])
print(url_add[:5])

['1', '2', '3', '4', '5']
['BLT', 'Fried Bologna', 'Woodland Mushroom', 'Roast Beef', 'PB&L']
['Old Oak Tap', 'Au Cheval', 'Xoco', 'Al’s Deli', 'Publican Quality Meats']
['https://www.chicagomag.com/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Old-Oak-Tap-BLT/', 'https://www.chicagomag.com/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Au-Cheval-Fried-Bologna/', 'https://www.chicagomag.com/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Xoco-Woodland-Mushroom/', 'https://www.chicagomag.com/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Als-Deli-Roast-Beef/', 'https://www.chicagomag.com/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Publican-Quality-Meats-PB-L/']


## 주피터 노트북에서 상태 진행바를 쉽게 만들어 주는 모듈

In [43]:
from tqdm import tqdm_notebook
import time
# 순위를 담을 빈 리스트 하나 
rank = []
# 메뉴를 담을 빈 리스트 하나
main_menu = []
# 카페 이름을 담을 빈 리스트 하나
cafe_name = []
# 접근 주소 url을 담을 빈 리스트 하나
url_add=[]

list_soup = soup.find_all('div','sammy')
bar_total = tqdm_notebook(list_soup)

for item in bar_total:
    rank.append(item.find(class_='sammyRank').get_text())
    tmp_string = item.find(class_='sammyListing').get_text()
    main_menu.append(re.split(('\n'),tmp_string)[0])
    cafe_name.append(re.split(('\n'),tmp_string)[1])
    url_add.append(urljoin(url_base,item.find('a')['href']))
    # 크롤링 하기 전 잠시 타임 딜레이
    time.sleep(0.05)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar_total = tqdm_notebook(list_soup)


  0%|          | 0/50 [00:00<?, ?it/s]

In [44]:
import pandas as pd
data = {'Rank':rank,'Menu':main_menu,'Cafe':cafe_name,'URL':url_add}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Rank,Menu,Cafe,URL
0,1,BLT,Old Oak Tap,https://www.chicagomag.com/Chicago-Magazine/No...
1,2,Fried Bologna,Au Cheval,https://www.chicagomag.com/Chicago-Magazine/No...
2,3,Woodland Mushroom,Xoco,https://www.chicagomag.com/Chicago-Magazine/No...
3,4,Roast Beef,Al’s Deli,https://www.chicagomag.com/Chicago-Magazine/No...
4,5,PB&L,Publican Quality Meats,https://www.chicagomag.com/Chicago-Magazine/No...


In [45]:
df.to_csv('../../data/python_data/best_sandwiches_chicago.csv',sep=',',encoding='utf-8')

## 다수의 웹 페이지에 접근하여 원하는 정보 가져오기

In [46]:
from bs4 import BeautifulSoup
from urllib.request import urlopen

import pandas as pd

In [47]:
df = pd.read_csv('../../data/python_data/best_sandwiches_chicago.csv',index_col=0)
df

Unnamed: 0,Rank,Menu,Cafe,URL
0,1,BLT,Old Oak Tap,https://www.chicagomag.com/Chicago-Magazine/No...
1,2,Fried Bologna,Au Cheval,https://www.chicagomag.com/Chicago-Magazine/No...
2,3,Woodland Mushroom,Xoco,https://www.chicagomag.com/Chicago-Magazine/No...
3,4,Roast Beef,Al’s Deli,https://www.chicagomag.com/Chicago-Magazine/No...
4,5,PB&L,Publican Quality Meats,https://www.chicagomag.com/Chicago-Magazine/No...
5,6,Belgian Chicken Curry Salad,Hendrickx Belgian Bread Crafter,https://www.chicagomag.com/Chicago-Magazine/No...
6,7,Lobster Roll,Acadia,https://www.chicagomag.com/Chicago-Magazine/No...
7,8,Smoked Salmon Salad,Birchwood Kitchen,https://www.chicagomag.com/Chicago-Magazine/No...
8,9,Atomica Cemitas,Cemitas Puebla,https://www.chicagomag.com/Chicago-Magazine/No...
9,10,Grilled Laughing Bird Shrimp and Fried Po’ Boy,Nana,https://www.chicagomag.com/Chicago-Magazine/No...


In [48]:
df['URL'][0]

'https://www.chicagomag.com/Chicago-Magazine/November-2012/Best-Sandwiches-in-Chicago-Old-Oak-Tap-BLT/'

In [49]:
url = Request(df['URL'][0],headers = {'User-Agent':'Mozilla/5.0'})
html = urlopen(url)
soup_tmp = BeautifulSoup(html,"html.parser")
soup_tmp

<!DOCTYPE html>

<html lang="en-US">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible">
<link href="https://gmpg.org/xfn/11" rel="profile"/>
<script src="https://cmp.osano.com/16A1AnRt2Fn8i1unj/f15ebf08-7008-40fe-9af3-db96dc3e8266/osano.js"></script>
<title>1. Old Oak Tap BLT – Chicago Magazine</title>
<style type="text/css">
					.heateor_sss_button_instagram span.heateor_sss_svg,a.heateor_sss_instagram span.heateor_sss_svg{background:radial-gradient(circle at 30% 107%,#fdf497 0,#fdf497 5%,#fd5949 45%,#d6249f 60%,#285aeb 90%)}
						div.heateor_sss_horizontal_sharing a.heateor_sss_button_instagram span{background:#000!important;}div.heateor_sss_standard_follow_icons_container a.heateor_sss_button_instagram span{background:#000;}
										.heateor_sss_horizontal_sharing .heateor_sss_svg,.heateor_sss_standard_follow_icons_container .heateor_sss_svg{
							background-color: #000!important;
				background: #000!important;
							color: #fff;
						borde

In [50]:
print(soup_tmp.find('p','addy'))

<p class="addy">
<em>$10. 2109 W. Chicago Ave., 773-772-0406, <a href="http://www.theoldoaktap.com/">theoldoaktap.com</a></em></p>


In [51]:
price_tmp = soup_tmp.find('p','addy').get_text()
price_tmp

'\n$10. 2109 W. Chicago Ave., 773-772-0406, theoldoaktap.com'

In [52]:
price_tmp.split()[0]

'$10.'

In [53]:
price_tmp.split()[0][:-1]

'$10'

In [54]:
price_tmp.split()[1:-2]

['2109', 'W.', 'Chicago', 'Ave.,']

In [55]:
' '.join(price_tmp.split()[1:-2])

'2109 W. Chicago Ave.,'

## 샌드위치 페이지 50개에 접근하기

In [56]:
# tqdm => 현 상태를 바(bar)의 형태로 표현
from tqdm import tqdm_notebook
import time
# 가격을 담을 빈 리스트 하나
price=[]
# 카페의 주소를 담을 빈 리스트 하나
address=[]

for n in tqdm_notebook(df.index):
    url = Request(df['URL'][n],headers = {'User-Agent':'Mozilla/5.0'})
    html = urlopen(url)
    soup_tmp = BeautifulSoup(html,"lxml")
    
    gettings = soup_tmp.find('p','addy').get_text()
    
    price.append(gettings.split()[0][:-1])
    address.append(gettings.split()[1:-2])

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for n in tqdm_notebook(df.index):


  0%|          | 0/50 [00:00<?, ?it/s]

In [57]:
print(price)

['$10', '$9', '$9.50', '$9.40', '$10', '$7.25', '$16', '$10', '$9', '$17', '$11', '$5.49', '$14', '$10', '$13', '$4.50', '$11.95', '$11.50', '$6.25', '$15', '$5', '$6', '$8', '$5.99', '$7.52', '$11.95', '$7.50', '$12.95', '$7', '$21', '$9.79', '$9.75', '$13', '$7.95', '$9', '$9', '$8', '$8', '$7', '$6', '$7.25', '$11', '$6', '$9', '$5.49', '$8', '$6.50', '$7.50', '$8.75', '$6.85']


In [58]:
df['Price']= price
df['Address']=address
df = df.loc[:,['Rank','Cafe','Menu','Price','Address']]
df.set_index('Rank',inplace=True)
df.head()

Unnamed: 0_level_0,Cafe,Menu,Price,Address
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Old Oak Tap,BLT,$10,"[2109, W., Chicago, Ave.,]"
2,Au Cheval,Fried Bologna,$9,"[800, W., Randolph, St.,]"
3,Xoco,Woodland Mushroom,$9.50,"[445, N., Clark, St.,]"
4,Al’s Deli,Roast Beef,$9.40,"[914, Noyes, St.,, Evanston,]"
5,Publican Quality Meats,PB&L,$10,"[825, W., Fulton, Mkt.,]"


In [59]:
df.to_csv('../../data/python_data/best_sandwiches_chicago2.csv',sep=',',encoding='utf-8')

## 맛집 위치를 지도에 표기하기

In [60]:
import folium
import pandas as pd
import googlemaps
import numpy as np

In [61]:
df = pd.read_csv('../../data/python_data/best_sandwiches_chicago2.csv',index_col=0)
df.head(5)

Unnamed: 0_level_0,Cafe,Menu,Price,Address
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Old Oak Tap,BLT,$10,"['2109', 'W.', 'Chicago', 'Ave.,']"
2,Au Cheval,Fried Bologna,$9,"['800', 'W.', 'Randolph', 'St.,']"
3,Xoco,Woodland Mushroom,$9.50,"['445', 'N.', 'Clark', 'St.,']"
4,Al’s Deli,Roast Beef,$9.40,"['914', 'Noyes', 'St.,', 'Evanston,']"
5,Publican Quality Meats,PB&L,$10,"['825', 'W.', 'Fulton', 'Mkt.,']"


In [62]:
gmaps_key= "AIzaSyBAgdunCaHubS6cDbxNrQyzoU5KReTwus0"
gmaps = googlemaps.Client(key=gmaps_key)

In [63]:
df['Address']

Rank
1                    ['2109', 'W.', 'Chicago', 'Ave.,']
2                     ['800', 'W.', 'Randolph', 'St.,']
3                        ['445', 'N.', 'Clark', 'St.,']
4                 ['914', 'Noyes', 'St.,', 'Evanston,']
5                      ['825', 'W.', 'Fulton', 'Mkt.,']
6                               ['100', 'E.', 'Walton']
7                     ['1639', 'S.', 'Wabash', 'Ave.,']
8                      ['2211', 'W.', 'North', 'Ave.,']
9                      ['3619', 'W.', 'North', 'Ave.,']
10                    ['3267', 'S.', 'Halsted', 'St.,']
11                   ['2537', 'N.', 'Kedzie', 'Blvd.,']
12                                         ['Multiple']
13                          ['3124', 'N.', 'Broadway,']
14                 ['3455', 'N.', 'Southport', 'Ave.,']
15                    ['2657', 'N.', 'Kedzie', 'Ave.,']
16                     ['1120', 'W.', 'Grand', 'Ave.,']
17                  ['1141', 'S.', 'Jefferson', 'St.,']
18                      ['333', 'E.', 'Bent

In [65]:
lat = []
lng = []

for n in tqdm_notebook(df.index):
    if df['Address'][n] != 'Multiple':
        target_name = df['Address'][n] + ', '+'Cicago'
        gmaps_output = gmaps.geocode(target_name)
        location_output = gmaps_output[0].get('geometry')
        lat.append(location_output['location']['lat'])
        lng.append(location_output['location']['lng'])
    else:
        lat.append(np.nan)
        lng.append(np.nan)
df['lat'] = lat
df['lng'] = lng
df.head()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for n in tqdm_notebook(df.index):


  0%|          | 0/50 [00:00<?, ?it/s]

Unnamed: 0_level_0,Cafe,Menu,Price,Address,lat,lng
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Old Oak Tap,BLT,$10,"['2109', 'W.', 'Chicago', 'Ave.,']",41.895558,-87.679967
2,Au Cheval,Fried Bologna,$9,"['800', 'W.', 'Randolph', 'St.,']",41.884639,-87.64759
3,Xoco,Woodland Mushroom,$9.50,"['445', 'N.', 'Clark', 'St.,']",41.890523,-87.630783
4,Al’s Deli,Roast Beef,$9.40,"['914', 'Noyes', 'St.,', 'Evanston,']",42.058322,-87.683748
5,Publican Quality Meats,PB&L,$10,"['825', 'W.', 'Fulton', 'Mkt.,']",41.886604,-87.648536


In [67]:
mapping = folium.Map(location=[df['lat'].mean(),df['lng'].mean()],
                    zoom_start=11)

for n in df.index:
    if df['Address'][n]!= 'Multiple':
        folium.Marker([df['lat'][n],df['lng'][n]],
                     popup=df['Cafe'][n]).add_to(mapping)
        
mapping

In [68]:
from folium.plugins import MarkerCluster

mc = MarkerCluster()

mapping = folium.Map(location=[df['lat'].mean(),df['lng'].mean()],
                    zoom_start=11)

for n in df.index:
    if df['Address'][n]!= 'Multiple':
          mc.add_child(
                folium.Marker([df['lat'][n],df['lng'][n]],
                         popup=df['Cafe'][n])
    )

mapping.add_child(mc)
        
        
mapping