## Preparation

In [1]:
# Data processing
import pandas as pd

# Scraping web content
import requests # For downloading the website
from bs4 import BeautifulSoup # For parsing the website
import time # To put the system to sleep
import random # For random numbers

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

## Extract Relevant Links

In [2]:
# DC Michelin url
DC_url = 'https://guide.michelin.com/us/en/washington/washington-dc/restaurants'

# Download the webpage
DC_page = requests.get(DC_url)
DC_page.status_code # 200 == Connection

200

In [3]:
# Parse the content
DC_soup = BeautifulSoup(DC_page.content,'html.parser')

In [4]:
# Extract page urls
DC_urls = set([DC_url])
DC_page_no = 6

for i in range(2, DC_page_no+1):
    DC_urls.update([DC_url+'/page/'+str(i)])

DC_urls

{'https://guide.michelin.com/us/en/washington/washington-dc/restaurants',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurants/page/2',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurants/page/3',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurants/page/4',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurants/page/5',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurants/page/6'}

In [5]:
# Extract relevant links for one page
links = set()

for tag in DC_soup.find_all('a'):
    href = tag.attrs.get('href')
    if 'restaurant/' in href and 'https:' not in href:
        links.update(['https://guide.michelin.com'+href])

links

{'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/american-son',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/bidwell',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/china-chilcano',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/daikaya',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/dbgb-kitchen-and-bar',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/fancy-radish',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/indique',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/marcel-s',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/metier',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/mola',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/nazca-mochica',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/oyame

In [6]:
# Build a scraper to extract all relevant links
def page_scraper(url=None,page_no=None,sleep=3):
    """
    Scrape a Michelin url to extract all relevant links.

    Args:
        url (str): string of a Michelin url.
        page_no (int): integer value specifying the number of pages.
        sleep (int): integer value specifying how long the machine should be put to sleep (random uniform); defaults to 3.

    Returns:
        set: set containing all relevant restaurant links.
    """
    # Extract page urls
    urls = set([url])
    for i in range(2, page_no+1):
        urls.update([url+'/page/'+str(i)])
    
    links = set()

    for url in urls:

        # Keep track of where we are at
        print(url)

        # Download the webpage
        page = requests.get(url)

        # If a connection was reached
        if page.status_code == 200:

                # Parse
                soup = BeautifulSoup(page.content,'html.parser')

                for tag in soup.find_all('a'):
                    href = tag.attrs.get('href')
                    if 'restaurant/' in href and 'https:' not in href:
                        links.update(['https://guide.michelin.com'+href])

        # Put the system to sleep for a random draw of time
        time.sleep(random.uniform(0,sleep))

    # Return data
    return links

In [7]:
# Scrape DC_url
DC_links = page_scraper(url=DC_url,page_no=6)

https://guide.michelin.com/us/en/washington/washington-dc/restaurants/page/4
https://guide.michelin.com/us/en/washington/washington-dc/restaurants/page/2
https://guide.michelin.com/us/en/washington/washington-dc/restaurants/page/6
https://guide.michelin.com/us/en/washington/washington-dc/restaurants
https://guide.michelin.com/us/en/washington/washington-dc/restaurants/page/5
https://guide.michelin.com/us/en/washington/washington-dc/restaurants/page/3


In [8]:
# View DC_links
DC_links

{'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/1789',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/all-purpose',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/ambar',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/american-son',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/ana',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/anxo',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/astoria-dc',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/bad-saint',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/bidwell',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/blacksalt',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/blue-duck-tavern',
 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/bombay-club',
 'https://g

In [9]:
# Check number of DC_links
len(DC_links)

116

In [10]:
# Chicago Michelin url
Chicago_url = 'https://guide.michelin.com/us/en/illinois/chicago/restaurants'

# Scrape Chicago_url
Chicago_links = page_scraper(url=Chicago_url,page_no=9)

https://guide.michelin.com/us/en/illinois/chicago/restaurants/page/8
https://guide.michelin.com/us/en/illinois/chicago/restaurants/page/6
https://guide.michelin.com/us/en/illinois/chicago/restaurants/page/3
https://guide.michelin.com/us/en/illinois/chicago/restaurants/page/9
https://guide.michelin.com/us/en/illinois/chicago/restaurants/page/7
https://guide.michelin.com/us/en/illinois/chicago/restaurants/page/4
https://guide.michelin.com/us/en/illinois/chicago/restaurants/page/5
https://guide.michelin.com/us/en/illinois/chicago/restaurants
https://guide.michelin.com/us/en/illinois/chicago/restaurants/page/2


In [11]:
# Check number of Chicago_links
len(Chicago_links)

176

In [12]:
# CA Michelin url
CA_url = 'https://guide.michelin.com/us/en/california/restaurants'

# Scrape CA_url
CA_links = page_scraper(url=CA_url,page_no=32)

https://guide.michelin.com/us/en/california/restaurants/page/24
https://guide.michelin.com/us/en/california/restaurants/page/22
https://guide.michelin.com/us/en/california/restaurants/page/14
https://guide.michelin.com/us/en/california/restaurants/page/3
https://guide.michelin.com/us/en/california/restaurants/page/29
https://guide.michelin.com/us/en/california/restaurants/page/32
https://guide.michelin.com/us/en/california/restaurants/page/4
https://guide.michelin.com/us/en/california/restaurants/page/2
https://guide.michelin.com/us/en/california/restaurants/page/15
https://guide.michelin.com/us/en/california/restaurants/page/11
https://guide.michelin.com/us/en/california/restaurants/page/17
https://guide.michelin.com/us/en/california/restaurants/page/9
https://guide.michelin.com/us/en/california/restaurants/page/30
https://guide.michelin.com/us/en/california/restaurants/page/20
https://guide.michelin.com/us/en/california/restaurants/page/6
https://guide.michelin.com/us/en/california/r

In [13]:
# Check number of CA_links
len(CA_links)

624

In [14]:
# NY Michelin url
NY_url = 'https://guide.michelin.com/us/en/new-york-state/restaurants'

# Scrape NY_url
NY_links = page_scraper(url=NY_url,page_no=26)

https://guide.michelin.com/us/en/new-york-state/restaurants/page/26
https://guide.michelin.com/us/en/new-york-state/restaurants/page/16
https://guide.michelin.com/us/en/new-york-state/restaurants/page/19
https://guide.michelin.com/us/en/new-york-state/restaurants/page/25
https://guide.michelin.com/us/en/new-york-state/restaurants/page/15
https://guide.michelin.com/us/en/new-york-state/restaurants/page/14
https://guide.michelin.com/us/en/new-york-state/restaurants/page/20
https://guide.michelin.com/us/en/new-york-state/restaurants/page/21
https://guide.michelin.com/us/en/new-york-state/restaurants/page/17
https://guide.michelin.com/us/en/new-york-state/restaurants/page/23
https://guide.michelin.com/us/en/new-york-state/restaurants/page/6
https://guide.michelin.com/us/en/new-york-state/restaurants/page/9
https://guide.michelin.com/us/en/new-york-state/restaurants/page/3
https://guide.michelin.com/us/en/new-york-state/restaurants/page/11
https://guide.michelin.com/us/en/new-york-state/res

In [15]:
# Check number of NY_links
len(NY_links)

513

## Scrape Michelin Restaurant Page

In [16]:
# Das Michelin url
Das_url = 'https://guide.michelin.com/us/en/washington/washington-dc/restaurant/das'

# Download the webpage
Das_page = requests.get(Das_url)
Das_page.status_code # 200 == Connection

200

In [17]:
# Parse the content
Das_soup = BeautifulSoup(Das_page.content,'html.parser')

In [18]:
# Create an empty DataFrame
Das = pd.DataFrame(columns=['name','url'])

Das = Das.append({'name':Das_soup.select('h2')[0].get_text(),
                  'url':Das_url},
                 ignore_index=True)

Das

Unnamed: 0,name,url
0,Das,https://guide.michelin.com/us/en/washington/wa...


In [19]:
# Build a scraper for Michelin restaurant page
def Michelin_scraper(url=None):
    """
    Scrape a Michelin restaurant page to extract business information.

    Args:
        url (str): string of a Michelin restaurant url.

    Returns:
        DataFrame: frame containing business information of the url.
    """
    # Download the webpage
    page = requests.get(url)

    # If a connection was reached
    if page.status_code == 200:

        # Parse
        soup = BeautifulSoup(page.content,'html.parser')

        # Create an empty DataFrame
        info = pd.DataFrame(columns=['name','url'])

        info = info.append({'name':soup.select('h2')[0].get_text(),
                            'url':url},
                           ignore_index=True)

    # Return data
    return info

In [20]:
# Build a function to scrape multiple Michelin restaurant links
def link_scrape(urls=None,sleep=3):
    """
    Scrape multiple Michelin restaurant links.

    Args:
        urls (list): list of Michelin restaurant urls.
        sleep (int): integer value specifying how long the machine should be put to sleep (random uniform); defaults to 3.

    Returns:
        DataFrame: frame containing business information of all urls.
    """
    dat = pd.DataFrame([])

    for url in urls:

        print(url) # Keep track of where we are at

        try:

            # Scrape the content
            dat = dat.append(Michelin_scraper(url))

            # Put the system to sleep for a random draw of time
            time.sleep(random.uniform(0,sleep))

        except ImportError:
            pass

    dat = dat.reset_index(drop=True)
    return dat

In [21]:
# Scrape DC_links
DC_Michelin = link_scrape(urls=DC_links)

https://guide.michelin.com/us/en/washington/washington-dc/restaurant/himitsu
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/blacksalt
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/tico
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/ottoman-taverna
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/1789
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/ivy-city-smokehouse
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/all-purpose
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/fiola
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/cane
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/i-m-eddie-cano
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/oyamel
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/sababa
https://guide.michelin.com/us/en/washington/washington-d

https://guide.michelin.com/us/en/washington/washington-dc/restaurant/daikaya
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/keren
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/maketto
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/kaliwa
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/sushi-taro
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/lupo-verde
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/american-son
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/rasika
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/bidwell
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/doi-moi
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/masseria
https://guide.michelin.com/us/en/washington/washington-dc/restaurant/kinship
https://guide.michelin.com/us/en/washington/washington-dc/restaurant

In [22]:
# Scrape Chicago_links
Chicago_Michelin = link_scrape(urls=Chicago_links)

https://guide.michelin.com/us/en/illinois/chicago/restaurant/the-albert
https://guide.michelin.com/us/en/illinois/chicago/restaurant/chicago-curry-house
https://guide.michelin.com/us/en/illinois/chicago/restaurant/bixi-beer
https://guide.michelin.com/us/en/illinois/chicago/restaurant/pelago
https://guide.michelin.com/us/en/illinois/chicago/restaurant/kie-gol-lanee
https://guide.michelin.com/us/en/illinois/chicago/restaurant/virtue
https://guide.michelin.com/us/en/illinois/chicago/restaurant/flat-point
https://guide.michelin.com/us/en/illinois/chicago/restaurant/roister
https://guide.michelin.com/us/en/illinois/chicago/restaurant/girl-the-goat
https://guide.michelin.com/us/en/illinois/chicago/restaurant/wasabi
https://guide.michelin.com/us/en/illinois/chicago/restaurant/kikko
https://guide.michelin.com/us/en/illinois/chicago/restaurant/hugo-s-frog-bar-fish-house
https://guide.michelin.com/us/en/illinois/chicago/restaurant/steak-48
https://guide.michelin.com/us/en/illinois/chicago/restau

https://guide.michelin.com/us/en/illinois/chicago/restaurant/the-warbler
https://guide.michelin.com/us/en/illinois/chicago/restaurant/chicago-cut
https://guide.michelin.com/us/en/illinois/chicago/restaurant/el-ideas
https://guide.michelin.com/us/en/illinois/chicago/restaurant/mott-st
https://guide.michelin.com/us/en/illinois/chicago/restaurant/pleasant-house-pub
https://guide.michelin.com/us/en/illinois/chicago/restaurant/shokran-moroccan-grill
https://guide.michelin.com/us/en/illinois/chicago/restaurant/hopleaf
https://guide.michelin.com/us/en/illinois/chicago/restaurant/minghin
https://guide.michelin.com/us/en/illinois/chicago/restaurant/twin-anchors
https://guide.michelin.com/us/en/illinois/chicago/restaurant/au-cheval
https://guide.michelin.com/us/en/illinois/chicago/restaurant/giordano-s
https://guide.michelin.com/us/en/illinois/chicago/restaurant/parachute
https://guide.michelin.com/us/en/illinois/chicago/restaurant/jam
https://guide.michelin.com/us/en/illinois/chicago/restaurant

In [24]:
# Scrape CA_links
CA_Michelin = link_scrape(urls=CA_links)

https://guide.michelin.com/us/en/california/san-francisco/restaurant/atelier-crenn
https://guide.michelin.com/us/en/california/oakland/restaurant/cafe-romanat
https://guide.michelin.com/us/en/california/oakland/restaurant/dyafa
https://guide.michelin.com/us/en/california/carmel-by-the-sea/restaurant/cultura
https://guide.michelin.com/us/en/california/san-francisco/restaurant/acquerello
https://guide.michelin.com/us/en/california/san-francisco/restaurant/bar-crudo
https://guide.michelin.com/us/en/california/us-san-diego/restaurant/cowboy-star
https://guide.michelin.com/us/en/california/san-francisco/restaurant/liholiho-yacht-club
https://guide.michelin.com/us/en/california/point-loma/restaurant/el-jardin
https://guide.michelin.com/us/en/california/pasadena/restaurant/parkway-grill
https://guide.michelin.com/us/en/california/san-francisco/restaurant/farmhouse-kitchen-thai
https://guide.michelin.com/us/en/california/belmont/restaurant/shalizaar
https://guide.michelin.com/us/en/california/

https://guide.michelin.com/us/en/california/yountville/restaurant/the-french-laundry
https://guide.michelin.com/us/en/california/us-san-diego/restaurant/soichi
https://guide.michelin.com/us/en/california/redwood-city/restaurant/la-viga
https://guide.michelin.com/us/en/california/west-hollywood/restaurant/norah
https://guide.michelin.com/us/en/california/redwood-city/restaurant/vesta
https://guide.michelin.com/us/en/california/alhambra/restaurant/sichuan-impression
https://guide.michelin.com/us/en/california/us-los-angeles/restaurant/hinoki-the-bird
https://guide.michelin.com/us/en/california/west-hollywood/restaurant/katana
https://guide.michelin.com/us/en/california/us-los-angeles/restaurant/alta-adams
https://guide.michelin.com/us/en/california/us-los-angeles/restaurant/guelaguetza
https://guide.michelin.com/us/en/california/santa-barbara/restaurant/blackbird564365
https://guide.michelin.com/us/en/california/us-los-angeles/restaurant/woodspoon
https://guide.michelin.com/us/en/califor

https://guide.michelin.com/us/en/california/west-hollywood/restaurant/rosaline
https://guide.michelin.com/us/en/california/napa/restaurant/la-toque
https://guide.michelin.com/us/en/california/pasadena/restaurant/the-raymond
https://guide.michelin.com/us/en/california/us-anaheim/restaurant/the-ranch
https://guide.michelin.com/us/en/california/manhattan-beach/restaurant/sushi-i-naba
https://guide.michelin.com/us/en/california/alhambra/restaurant/jiang-nan-spring
https://guide.michelin.com/us/en/california/us-los-angeles/restaurant/mi-lindo-nayrit-mariscos
https://guide.michelin.com/us/en/california/fairfax/restaurant/village-sake
https://guide.michelin.com/us/en/california/newport-beach/restaurant/bluefin
https://guide.michelin.com/us/en/california/san-jose/restaurant/zeni
https://guide.michelin.com/us/en/california/santa-monica/restaurant/the-lobster
https://guide.michelin.com/us/en/california/us-los-angeles/restaurant/jar
https://guide.michelin.com/us/en/california/us-los-angeles/resta

https://guide.michelin.com/us/en/california/hollywood/restaurant/providence
https://guide.michelin.com/us/en/california/west-hollywood/restaurant/tesse
https://guide.michelin.com/us/en/california/palo-alto/restaurant/baume
https://guide.michelin.com/us/en/california/boonville/restaurant/boonville-hotel-restaurant
https://guide.michelin.com/us/en/california/us-san-diego/restaurant/crack-shack
https://guide.michelin.com/us/en/california/pasadena/restaurant/maestro
https://guide.michelin.com/us/en/california/santa-barbara/restaurant/yoichi-s
https://guide.michelin.com/us/en/california/us-los-angeles/restaurant/okiboru-ramen
https://guide.michelin.com/us/en/california/us-los-angeles/restaurant/carlitos-gardel
https://guide.michelin.com/us/en/california/calabasas/restaurant/saddle-peak-lodge
https://guide.michelin.com/us/en/california/san-francisco/restaurant/the-progress
https://guide.michelin.com/us/en/california/berkeley/restaurant/tacubaya
https://guide.michelin.com/us/en/california/us-

https://guide.michelin.com/us/en/california/encino/restaurant/scratch-bar-kitchen
https://guide.michelin.com/us/en/california/san-anselmo/restaurant/insalata-s
https://guide.michelin.com/us/en/california/us-san-diego/restaurant/sushi-tadokoro
https://guide.michelin.com/us/en/california/san-francisco/restaurant/aziza
https://guide.michelin.com/us/en/california/chula-vista/restaurant/tacos-el-gordo
https://guide.michelin.com/us/en/california/la-jolla/restaurant/a-r-valentien
https://guide.michelin.com/us/en/california/us-san-diego/restaurant/ironside-fish-oyster
https://guide.michelin.com/us/en/california/us-sacramento/restaurant/localis
https://guide.michelin.com/us/en/california/tustin/restaurant/j-zhou
https://guide.michelin.com/us/en/california/us-sacramento/restaurant/bacon-butter
https://guide.michelin.com/us/en/california/manhattan-beach/restaurant/love-salt
https://guide.michelin.com/us/en/california/yountville/restaurant/ciccio
https://guide.michelin.com/us/en/california/san-fra

https://guide.michelin.com/us/en/california/la-jolla/restaurant/nine-ten
https://guide.michelin.com/us/en/california/palo-alto/restaurant/zola
https://guide.michelin.com/us/en/california/studio-city/restaurant/asanebo
https://guide.michelin.com/us/en/california/us-los-angeles/restaurant/awash
https://guide.michelin.com/us/en/california/saint-helena/restaurant/cook-st-helena
https://guide.michelin.com/us/en/california/berkeley/restaurant/comal
https://guide.michelin.com/us/en/california/geyserville/restaurant/rustic
https://guide.michelin.com/us/en/california/san-mateo/restaurant/pausa
https://guide.michelin.com/us/en/california/san-jose/restaurant/adega
https://guide.michelin.com/us/en/california/us-sacramento/restaurant/yue-huang
https://guide.michelin.com/us/en/california/us-los-angeles/restaurant/cosa-buona
https://guide.michelin.com/us/en/california/palo-alto/restaurant/vina-enoteca
https://guide.michelin.com/us/en/california/san-francisco/restaurant/sorrel
https://guide.michelin.c

In [25]:
# Scrape NY_links
NY_Michelin = link_scrape(urls=NY_links)

https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/sushi-yasaka
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/ruffian
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/maison-harlem
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/mala-project
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/coarse
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/saint-julivert-fisherie
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/lamalo
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/guan-fu-sichuan
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/spy-c-cuisine
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/the-flower-shop
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/georgian-dream-cafe
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/jajaja
https://guide.michelin.co

https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/dirt-candy
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/minetta-tavern
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/yopparai
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/l-atelier-de-joel-robuchon562505
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/tra-di-noi
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/emilio-s-ballato
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/maya
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/markjoseph
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/new-wonjo
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/the-fulton
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/hahm-ji-bach
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/contra
https://guide.michelin.com/us/e

https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/locanda-vini-e-olii
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/atomix
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/camillo
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/sushi-nakazawa
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/kung-fu-little-steamed-buns-ramen
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/popina
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/mar-s
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/suyo-gastrofusion
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/bell-book-candle
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/sip-sak
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/jeju-noodle-bar
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/aldo-sohm-wine-bar
https://g

https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/taboon
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/shalom-japan
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/the-loyal
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/il-divo
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/tsushima
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/dons-bogam
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/avant-garden
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/high-street-on-hudson
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/the-standard-grill
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/saraghina
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/hao-noodle-and-tea
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/king
https://guide.michelin.com/us/en/new-y

https://guide.michelin.com/us/en/new-york-state/dobbs-ferry/restaurant/the-cookery
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/casellula
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/tastings-social-presents-mountain-bird
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/chiko
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/le-gigot
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/casa
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/no-7
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/east-harbor-seafood-palace
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/vesta-trattoria
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/hunky-dory
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/havana-cafe
https://guide.michelin.com/us/en/new-york-state/new-york/restaurant/faun
https://guide.michelin.c

In [26]:
# Print row counts
print("DC:",DC_Michelin.shape[0],
      "\nChicago:",Chicago_Michelin.shape[0],
      "\nCA:",CA_Michelin.shape[0],
      "\nNY:",NY_Michelin.shape[0])

DC: 116 
Chicago: 176 
CA: 624 
NY: 513


## Merge All Data

In [27]:
# Fill in the region information
DC_Michelin['region'] = 'Washington DC'
Chicago_Michelin['region'] = 'Chicago'
CA_Michelin['region'] = 'California'
NY_Michelin['region'] = 'New York City'

In [36]:
# Row bind all data
Michelin = pd.concat([DC_Michelin,Chicago_Michelin,CA_Michelin,NY_Michelin],
                     ignore_index=True,sort=True)

# Add michelin_guide indicator column 
Michelin['michelin_guide'] = 1

Michelin

Unnamed: 0,name,region,url,michelin_guide
0,Himitsu,Washington DC,https://guide.michelin.com/us/en/washington/wa...,1
1,BlackSalt,Washington DC,https://guide.michelin.com/us/en/washington/wa...,1
2,Tico,Washington DC,https://guide.michelin.com/us/en/washington/wa...,1
3,Ottoman Taverna,Washington DC,https://guide.michelin.com/us/en/washington/wa...,1
4,1789,Washington DC,https://guide.michelin.com/us/en/washington/wa...,1
...,...,...,...,...
1424,Baar Baar,New York City,https://guide.michelin.com/us/en/new-york-stat...,1
1425,Enoteca Maria,New York City,https://guide.michelin.com/us/en/new-york-stat...,1
1426,Little Park,New York City,https://guide.michelin.com/us/en/new-york-stat...,1
1427,I Sodi,New York City,https://guide.michelin.com/us/en/new-york-stat...,1


In [37]:
# Export to a CSV file
Michelin.to_csv('Data/Michelin/Michelin.csv',index=False)