<h1>An analysis of Michelin stars awarded in 2020 - Data capturing</h1>

This file shows how the relevant data was scraped and put together for the following project XXX.

<h2>Import information</h2>

In [1]:
#Import relevant libraries

import pandas as pd
import requests
from bs4 import BeautifulSoup
import pprint

In [136]:
#Import data

one_star = pd.read_csv("one-star-michelin-restaurants.csv")
two_star = pd.read_csv("two-stars-michelin-restaurants.csv")
three_star = pd.read_csv("three-stars-michelin-restaurants.csv")

<h2>Webscraping to gather data</h2>

<h3>France</h3>

First we will webscrape the France data, following the same code for each star.

Three star data:

In [456]:
#We want to create a list of dictionaries, where each restaurant is contained in its own dictionary

#First, create empty list

restaurants = []

#loop through multiple webpages

pages = ['https://guide.michelin.com/fr/en/restaurants/3-stars-michelin', 
        'https://guide.michelin.com/fr/en/restaurants/3-stars-michelin/page/2']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

#Loop through each restaurant on the webpage

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
    #Matches elements with all of these classes "col-md-6 col-lg-6 col-xl-3" 

    #Select_one selects the first matched element of the css selector. 
    #'.text' returns the text
    #To get the child element of a parent, write the parent and a space bar and then the tag/selector
    #Get rid of white space using split() and new lines using replace('\n','')
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        restaurants.append(restaurant)
    
#Turn the dictionaries into a dataframe, adding two new columns for country and number of stars

france_three_star = pd.DataFrame.from_dict(restaurants)
france_three_star.insert(0,'Country', 'France')
france_three_star.insert(1,'no_of_stars',3)
france_three_star.head()

Unnamed: 0,Country,no_of_stars,name,city,cuisine
0,France,3,Le Louis XV - Alain Ducasse à l'Hôtel de Paris,Monte-Carlo,Mediterranean
1,France,3,Assiette Champenoise,Reims,Creative
2,France,3,Les Prés d'Eugénie - Michel Guérard,Eugénie-les-Bains,Classic cuisine
3,France,3,Georges Blanc,Vonnas,Creative
4,France,3,La Vague d'Or - Cheval Blanc St-Tropez,Saint-Tropez,Creative


In [457]:
#Check the dataframe has all restaurants
france_three_star.shape

(29, 5)

Two star data:

In [458]:
france_two_star_restaurants = []

pages = ['https://guide.michelin.com/fr/en/restaurants/2-stars-michelin', 
        'https://guide.michelin.com/fr/en/restaurants/2-stars-michelin/page/2',
        'https://guide.michelin.com/fr/en/restaurants/2-stars-michelin/page/3',
        'https://guide.michelin.com/fr/en/restaurants/2-stars-michelin/page/4',
        'https://guide.michelin.com/fr/en/restaurants/2-stars-michelin/page/5']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
    
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        france_two_star_restaurants.append(restaurant)

france_two_star = pd.DataFrame.from_dict(france_two_star_restaurants)
france_two_star.insert(0,'Country', 'France')
france_two_star.insert(1,'no_of_stars',2)
france_two_star.head()

Unnamed: 0,Country,no_of_stars,name,city,cuisine
0,France,2,Joël Robuchon Monte-Carlo,Monte-Carlo,Modern cuisine
1,France,2,Auberge du Cheval Blanc,Lembach,Creative
2,France,2,La Table d'Olivier Nasti,Kaysersberg,Modern cuisine
3,France,2,La Table de l'Alpaga,Megève,Modern cuisine
4,France,2,Jean Sulpice,Talloires,Creative


In [333]:
#Check the dataframe has all restaurants
france_two_star.shape

(86, 5)

France one star data:

In [459]:
france_one_star_restaurants = []

pages = ['https://guide.michelin.com/fr/en/restaurants/1-star-michelin', 
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/2',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/3',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/4',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/5',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/6',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/7',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/8',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/9',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/10',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/11',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/12',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/13',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/14',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/15',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/16',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/17',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/18',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/19',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/20',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/21',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/22',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/23',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/24',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/25',
        'https://guide.michelin.com/fr/en/restaurants/1-star-michelin/page/26']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        france_one_star_restaurants.append(restaurant)

france_one_star = pd.DataFrame.from_dict(france_one_star_restaurants)
france_one_star.insert(0,'Country', 'France')
france_one_star.insert(1,'no_of_stars',1)
france_one_star.head()

Unnamed: 0,Country,no_of_stars,name,city,cuisine
0,France,1,Vistamar,Monte-Carlo,Modern cuisine
1,France,1,Le Blue Bay,Monte-Carlo,Creative
2,France,1,Yoshi,Monte-Carlo,Japanese
3,France,1,Le Grill,Monte-Carlo,Classic cuisine
4,France,1,Le Georges,Chartres,Modern cuisine


In [460]:
#Check the dataframe has all restaurants
france_one_star.shape

(513, 5)

Now we will merge the three dataframes and save as a csv file.

In [461]:
france_one_two = pd.merge(france_one_star,france_two_star,how='outer')
france_data = pd.merge(france_one_two,france_three_star, how = 'outer')

print(france_data.head())
print(france_data.shape)

france_data.to_csv('france_michelin_stars_2020.csv')

  Country  no_of_stars         name         city          cuisine
0  France            1     Vistamar  Monte-Carlo   Modern cuisine
1  France            1  Le Blue Bay  Monte-Carlo         Creative
2  France            1        Yoshi  Monte-Carlo         Japanese
3  France            1     Le Grill  Monte-Carlo  Classic cuisine
4  France            1   Le Georges     Chartres   Modern cuisine
(628, 5)


Using the above code, we can now scrape the data for all other countries, saving them all as individual country csv files.

<h3>Spain</h3>

In [342]:
#One star restaurant data
spain_one_restaurants = []

pages = ['https://guide.michelin.com/es/en/restaurants/1-star-michelin', 
        'https://guide.michelin.com/es/en/restaurants/1-star-michelin/page/2',
        'https://guide.michelin.com/es/en/restaurants/1-star-michelin/page/3',
        'https://guide.michelin.com/es/en/restaurants/1-star-michelin/page/4',
        'https://guide.michelin.com/es/en/restaurants/1-star-michelin/page/5',
        'https://guide.michelin.com/es/en/restaurants/1-star-michelin/page/6',
        'https://guide.michelin.com/es/en/restaurants/1-star-michelin/page/7',
        'https://guide.michelin.com/es/en/restaurants/1-star-michelin/page/8',
        'https://guide.michelin.com/es/en/restaurants/1-star-michelin/page/9']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        spain_one_restaurants.append(restaurant)

spain_one_star = pd.DataFrame.from_dict(spain_one_restaurants)
spain_one_star.insert(0,'Country', 'Spain')
spain_one_star.insert(1,'no_of_stars',1)
print(spain_one_star.head())
print(spain_one_star.shape)

#Two star restaurant data
spain_two_restaurants = []

pages = ['https://guide.michelin.com/es/en/restaurants/2-stars-michelin', 
        'https://guide.michelin.com/es/en/restaurants/2-stars-michelin/page/2']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        spain_two_restaurants.append(restaurant)

spain_two_star = pd.DataFrame.from_dict(spain_two_restaurants)
spain_two_star.insert(0,'Country', 'Spain')
spain_two_star.insert(1,'no_of_stars',2)
print(spain_two_star.head())
print(spain_two_star.shape)

#Three star restaurant data
spain_three_restaurants = []

pages = ['https://guide.michelin.com/es/en/restaurants/3-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        spain_three_restaurants.append(restaurant)

spain_three_star = pd.DataFrame.from_dict(spain_three_restaurants)
spain_three_star.insert(0,'Country', 'Spain')
spain_three_star.insert(1,'no_of_stars',3)
print(spain_three_star.head())
print(spain_three_star.shape)

  Country  no_of_stars             name       city              cuisine
0   Spain            1     Maruja Limón       Vigo             Creative
1   Spain            1        Ca l'Arpa   Banyoles  Traditional cuisine
2   Spain            1        Les Moles  Ulldecona       Modern cuisine
3   Spain            1  Rincón de Diego   Cambrils  Traditional cuisine
4   Spain            1         Acánthum     Huelva       Modern cuisine
(173, 5)
  Country  no_of_stars                  name             city   cuisine
0   Spain            2             Disfrutar        Barcelona  Creative
1   Spain            2   Ramón Freixa Madrid           Retiro  Creative
2   Spain            2            Santceloni         Chamberí  Creative
3   Spain            2                  Noor          Córdoba  Creative
4   Spain            2  El Molino de Urdániz  Urdaitz/Urdániz  Creative
(29, 5)
  Country  no_of_stars                   name                      city  \
0   Spain            3                Lasart

In [343]:
spain_one_two = pd.merge(spain_one_star,spain_two_star,how='outer')
spain_data = pd.merge(spain_one_two,spain_three_star, how = 'outer')

print(spain_data.head())
print(spain_data.shape)

spain_data.to_csv('spain_michelin_stars_2020.csv')

  Country  no_of_stars             name       city              cuisine
0   Spain            1     Maruja Limón       Vigo             Creative
1   Spain            1        Ca l'Arpa   Banyoles  Traditional cuisine
2   Spain            1        Les Moles  Ulldecona       Modern cuisine
3   Spain            1  Rincón de Diego   Cambrils  Traditional cuisine
4   Spain            1         Acánthum     Huelva       Modern cuisine
(213, 5)


<h3>Italy</h3>

In [345]:
#One star restaurant data
italy_one_restaurants = []

pages = ['https://guide.michelin.com/it/en/restaurants/1-star-michelin', 
        'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/2',
        'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/3',
        'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/4',
        'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/5',
        'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/6',
        'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/7',
        'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/8',
        'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/9',
         'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/10',
         'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/11',
         'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/12',
         'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/13',
         'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/14',
         'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/15',
         'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/16',
         'https://guide.michelin.com/it/en/restaurants/1-star-michelin/page/17'
        ]

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        italy_one_restaurants.append(restaurant)

italy_one_star = pd.DataFrame.from_dict(italy_one_restaurants)
italy_one_star.insert(0,'Country', 'Italy')
italy_one_star.insert(1,'no_of_stars',1)
print(italy_one_star.head())
print(italy_one_star.shape)

#Two star restaurant data
italy_two_restaurants = []

pages = ['https://guide.michelin.com/it/en/restaurants/2-stars-michelin', 
        'https://guide.michelin.com/it/en/restaurants/2-stars-michelin/page/2']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        italy_two_restaurants.append(restaurant)

italy_two_star = pd.DataFrame.from_dict(italy_two_restaurants)
italy_two_star.insert(0,'Country', 'Italy')
italy_two_star.insert(1,'no_of_stars',2)
print(italy_two_star.head())
print(italy_two_star.shape)

#Three star restaurant data
italy_three_restaurants = []

pages = ['https://guide.michelin.com/it/en/restaurants/3-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        italy_three_restaurants.append(restaurant)

italy_three_star = pd.DataFrame.from_dict(italy_three_restaurants)
italy_three_star.insert(0,'Country', 'Italy')
italy_three_star.insert(1,'no_of_stars',3)
print(italy_three_star.head())
print(italy_three_star.shape)

  Country  no_of_stars                          name               city  \
0   Italy            1                       Caracol             BACOLI   
1   Italy            1                     Villa Naj          Stradella   
2   Italy            1            Umberto De Martino  San Paolo d'Argon   
3   Italy            1              Già Sotto l'Arco          CAROVIGNO   
4   Italy            1  Arnaldo-Clinica Gastronomica            RUBIERA   

          cuisine  
0  Modern cuisine  
1  Modern cuisine  
2   Mediterranean  
3        Creative  
4         Emilian  
(326, 5)
  Country  no_of_stars          name               city         cuisine
0   Italy            2    Agli Amici              GODIA  Modern cuisine
1   Italy            2   daní maison             ISCHIA        Creative
2   Italy            2         Terra  Sarentino/Sarntal        Creative
3   Italy            2  Trenkerstube       Tirol/Tirolo        Creative
4   Italy            2       La Peca             LONIGO     

In [346]:
italy_one_two = pd.merge(italy_one_star,italy_two_star,how='outer')
italy_data = pd.merge(italy_one_two,italy_three_star, how = 'outer')

print(italy_data.head())
print(italy_data.shape)

italy_data.to_csv('italy_michelin_stars_2020.csv')

  Country  no_of_stars                          name               city  \
0   Italy            1                       Caracol             BACOLI   
1   Italy            1                     Villa Naj          Stradella   
2   Italy            1            Umberto De Martino  San Paolo d'Argon   
3   Italy            1              Già Sotto l'Arco          CAROVIGNO   
4   Italy            1  Arnaldo-Clinica Gastronomica            RUBIERA   

          cuisine  
0  Modern cuisine  
1  Modern cuisine  
2   Mediterranean  
3        Creative  
4         Emilian  
(372, 5)


<h3>United Kingdom</h3>

In [347]:
#One star restaurant data
uk_one_restaurants = []

pages = ['https://guide.michelin.com/gb/en/restaurants/1-star-michelin', 
        'https://guide.michelin.com/gb/en/restaurants/1-star-michelin/page/2',
        'https://guide.michelin.com/gb/en/restaurants/1-star-michelin/page/3',
        'https://guide.michelin.com/gb/en/restaurants/1-star-michelin/page/4',
        'https://guide.michelin.com/gb/en/restaurants/1-star-michelin/page/5',
        'https://guide.michelin.com/gb/en/restaurants/1-star-michelin/page/6',
        'https://guide.michelin.com/gb/en/restaurants/1-star-michelin/page/7',
        'https://guide.michelin.com/gb/en/restaurants/1-star-michelin/page/8'
        ]

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        uk_one_restaurants.append(restaurant)

uk_one_star = pd.DataFrame.from_dict(uk_one_restaurants)
uk_one_star.insert(0,'Country', 'UK')
uk_one_star.insert(1,'no_of_stars',1)
print(uk_one_star.head())
print(uk_one_star.shape)

#Two star restaurant data
uk_two_restaurants = []

pages = ['https://guide.michelin.com/gb/en/restaurants/2-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        uk_two_restaurants.append(restaurant)

uk_two_star = pd.DataFrame.from_dict(uk_two_restaurants)
uk_two_star.insert(0,'Country', 'UK')
uk_two_star.insert(1,'no_of_stars',2)
print(uk_two_star.head())
print(uk_two_star.shape)

#Three star restaurant data
uk_three_restaurants = []

pages = ['https://guide.michelin.com/gb/en/restaurants/3-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        uk_three_restaurants.append(restaurant)

uk_three_star = pd.DataFrame.from_dict(uk_three_restaurants)
uk_three_star.insert(0,'Country', 'UK')
uk_three_star.insert(1,'no_of_stars',3)
print(uk_three_star.head())
print(uk_three_star.shape)

  Country  no_of_stars                        name        city  \
0      UK            1                  Olive Tree        Bath   
1      UK            1                        Brat  Shoreditch   
2      UK            1                 The Neptune  Hunstanton   
3      UK            1                  The Cellar  Anstruther   
4      UK            1  The Man Behind The Curtain       Leeds   

               cuisine  
0       Modern cuisine  
1  Traditional British  
2       Modern cuisine  
3       Modern cuisine  
4             Creative  
(144, 5)
  Country  no_of_stars                  name            city           cuisine
0      UK            2           Le Gavroche         Mayfair            French
1      UK            2       The Dining Room      Malmesbury  Asian influences
2      UK            2        La Dame de Pic  City of London     Modern French
3      UK            2             L'Enclume         Cartmel          Creative
4      UK            2  Restaurant Sat Bains     

In [348]:
uk_one_two = pd.merge(uk_one_star,uk_two_star,how='outer')
uk_data = pd.merge(uk_one_two,uk_three_star, how = 'outer')

print(uk_data.head())
print(uk_data.shape)

uk_data.to_csv('uk_michelin_stars_2020.csv')

  Country  no_of_stars                        name        city  \
0      UK            1                  Olive Tree        Bath   
1      UK            1                        Brat  Shoreditch   
2      UK            1                 The Neptune  Hunstanton   
3      UK            1                  The Cellar  Anstruther   
4      UK            1  The Man Behind The Curtain       Leeds   

               cuisine  
0       Modern cuisine  
1  Traditional British  
2       Modern cuisine  
3       Modern cuisine  
4             Creative  
(169, 5)


<h3> California, Chicago, New York State, Washington DC </h3>

In [3]:
#One star restaurant data
us_one_restaurants = []

pages = ['https://guide.michelin.com/us/en/california/restaurants/1-star-michelin',
        'https://guide.michelin.com/us/en/california/restaurants/1-star-michelin/page/2',
        'https://guide.michelin.com/us/en/california/restaurants/1-star-michelin/page/3',
        'https://guide.michelin.com/us/en/california/restaurants/1-star-michelin/page/4',
        'https://guide.michelin.com/us/en/illinois/chicago/restaurants/1-star-michelin',
        'https://guide.michelin.com/us/en/illinois/chicago/restaurants/1-star-michelin/page/2',
        'https://guide.michelin.com/us/en/new-york-state/restaurants/1-star-michelin',
        'https://guide.michelin.com/us/en/new-york-state/restaurants/1-star-michelin/page/2',
        'https://guide.michelin.com/us/en/new-york-state/restaurants/1-star-michelin/page/3',
        'https://guide.michelin.com/us/en/washington/washington-dc/restaurants/1-star-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        us_one_restaurants.append(restaurant)

us_one_star = pd.DataFrame.from_dict(us_one_restaurants)
us_one_star.insert(0,'Country', 'United States')
us_one_star.insert(1,'no_of_stars',1)
print(us_one_star.head())
print(us_one_star.shape)

#Two star restaurant data
us_two_restaurants = []

pages = ['https://guide.michelin.com/us/en/california/restaurants/2-stars-michelin',
        'https://guide.michelin.com/us/en/illinois/chicago/restaurants/2-stars-michelin',
        'https://guide.michelin.com/us/en/new-york-state/restaurants/2-stars-michelin',
        'https://guide.michelin.com/us/en/washington/washington-dc/restaurants/2-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        us_two_restaurants.append(restaurant)

us_two_star = pd.DataFrame.from_dict(us_two_restaurants)
us_two_star.insert(0,'Country', 'United States')
us_two_star.insert(1,'no_of_stars',2)
print(us_two_star.head())
print(us_two_star.shape)

#Three star restaurant data
us_three_restaurants = []

pages = ['https://guide.michelin.com/us/en/california/restaurants/3-stars-michelin',
        'https://guide.michelin.com/us/en/illinois/chicago/restaurants/3-stars-michelin',
        'https://guide.michelin.com/us/en/new-york-state/restaurants/3-stars-michelin',
        'https://guide.michelin.com/us/en/washington/washington-dc/restaurants/3-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        us_three_restaurants.append(restaurant)

us_three_star = pd.DataFrame.from_dict(us_three_restaurants)
us_three_star.insert(0,'Country', 'United States')
us_three_star.insert(1,'no_of_stars',3)
print(us_three_star.head())
print(us_three_star.shape)

         Country  no_of_stars          name           city       cuisine
0  United States            1    Shin Sushi         Encino      Japanese
1  United States            1        Hayato    Los Angeles      Japanese
2  United States            1        Angler  San Francisco  Contemporary
3  United States            1  Harbor House            Elk   Californian
4  United States            1       Addison      San Diego  Contemporary
(160, 5)
         Country  no_of_stars                 name           city  \
0  United States            2           Vespertine    Culver City   
1  United States            2               n/naka    Los Angeles   
2  United States            2  Sushi Ginza Onodera      Hollywood   
3  United States            2           Providence      Hollywood   
4  United States            2                Somni  Beverly Hills   

        cuisine  
0  Contemporary  
1  Contemporary  
2      Japanese  
3       Seafood  
4  Contemporary  
(33, 5)
         Country  no_o

In [4]:
us_one_two = pd.merge(us_one_star,us_two_star,how='outer')
us_data = pd.merge(us_one_two,us_three_star, how = 'outer')

print(us_data.head())
print(us_data.shape)
us_data.to_csv('united_states_michelin_stars_2020.csv')

         Country  no_of_stars          name           city       cuisine
0  United States            1    Shin Sushi         Encino      Japanese
1  United States            1        Hayato    Los Angeles      Japanese
2  United States            1        Angler  San Francisco  Contemporary
3  United States            1  Harbor House            Elk   Californian
4  United States            1       Addison      San Diego  Contemporary
(207, 5)


<h3>Hong Kong </h3>

In [357]:
#One star restaurant data
hongkong_one_restaurants = []

pages = ['https://guide.michelin.com/hk/en/hong-kong-region/hong-kong/restaurants/1-star-michelin',
        'https://guide.michelin.com/hk/en/hong-kong-region/hong-kong/restaurants/1-star-michelin/page/2',
        'https://guide.michelin.com/hk/en/hong-kong-region/hong-kong/restaurants/1-star-michelin/page/3']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        hongkong_one_restaurants.append(restaurant)

hongkong_one_star = pd.DataFrame.from_dict(hongkong_one_restaurants)
hongkong_one_star.insert(0,'Country', 'Hong Kong')
hongkong_one_star.insert(1,'no_of_stars',1)
print(hongkong_one_star.head())
print(hongkong_one_star.shape)

#Two star restaurant data
hongkong_two_restaurants = []

pages = ['https://guide.michelin.com/hk/en/hong-kong-region/hong-kong/restaurants/2-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        hongkong_two_restaurants.append(restaurant)

hongkong_two_star = pd.DataFrame.from_dict(hongkong_two_restaurants)
hongkong_two_star.insert(0,'Country', 'Hong Kong')
hongkong_two_star.insert(1,'no_of_stars',2)
print(hongkong_two_star.head())
print(hongkong_two_star.shape)

#Three star restaurant data
hongkong_three_restaurants = []

pages = ['https://guide.michelin.com/hk/en/hong-kong-region/hong-kong/restaurants/3-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        hongkong_three_restaurants.append(restaurant)

hongkong_three_star = pd.DataFrame.from_dict(hongkong_three_restaurants)
hongkong_three_star.insert(0,'Country', 'Hong Kong')
hongkong_three_star.insert(1,'no_of_stars',3)
print(hongkong_three_star.head())
print(hongkong_three_star.shape)

     Country  no_of_stars               name       city              cuisine
0  Hong Kong            1  Liu Yuan Pavilion  Hong Kong         Shanghainese
1  Hong Kong            1                Vea  Hong Kong           Innovative
2  Hong Kong            1              Épure  Hong Kong  French contemporary
3  Hong Kong            1     Pang's Kitchen  Hong Kong            Cantonese
4  Hong Kong            1    New Punjab Club  Hong Kong               Indian
(51, 5)
     Country  no_of_stars           name       city              cuisine
0  Hong Kong            2         Pierre  Hong Kong  French contemporary
1  Hong Kong            2      Kashiwaya  Hong Kong             Japanese
2  Hong Kong            2  Bo Innovation  Hong Kong           Innovative
3  Hong Kong            2   Sun Tung Lok  Hong Kong            Cantonese
4  Hong Kong            2       Écriture  Hong Kong  French contemporary
(12, 5)
     Country  no_of_stars                          name       city  \
0  Hong Kong  

In [358]:
hongkong_one_two = pd.merge(hongkong_one_star,hongkong_two_star,how='outer')
hongkong_data = pd.merge(hongkong_one_two,hongkong_three_star, how = 'outer')

print(hongkong_data.head())
print(hongkong_data.shape)
hongkong_data.to_csv('hong_kong_michelin_stars_2020.csv')

     Country  no_of_stars               name       city              cuisine
0  Hong Kong            1  Liu Yuan Pavilion  Hong Kong         Shanghainese
1  Hong Kong            1                Vea  Hong Kong           Innovative
2  Hong Kong            1              Épure  Hong Kong  French contemporary
3  Hong Kong            1     Pang's Kitchen  Hong Kong            Cantonese
4  Hong Kong            1    New Punjab Club  Hong Kong               Indian
(70, 5)


<h3>South Korea</h3>

In [359]:
#One star restaurant data
southkorea_one_restaurants = []

pages = ['https://guide.michelin.com/kr/en/restaurants/1-star-michelin',
        'https://guide.michelin.com/kr/en/restaurants/1-star-michelin/page/2'
        ]

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        southkorea_one_restaurants.append(restaurant)

southkorea_one_star = pd.DataFrame.from_dict(southkorea_one_restaurants)
southkorea_one_star.insert(0,'Country', 'South Korea')
southkorea_one_star.insert(1,'no_of_stars',1)
print(southkorea_one_star.head())
print(southkorea_one_star.shape)

#Two star restaurant data
southkorea_two_restaurants = []

pages = ['https://guide.michelin.com/kr/en/restaurants/2-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        southkorea_two_restaurants.append(restaurant)

southkorea_two_star = pd.DataFrame.from_dict(southkorea_two_restaurants)
southkorea_two_star.insert(0,'Country', 'South Korea')
southkorea_two_star.insert(1,'no_of_stars',2)
print(southkorea_two_star.head())
print(southkorea_two_star.shape)

#Three star restaurant data
southkorea_three_restaurants = []

pages = ['https://guide.michelin.com/kr/en/restaurants/3-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        southkorea_three_restaurants.append(restaurant)

southkorea_three_star = pd.DataFrame.from_dict(southkorea_three_restaurants)
southkorea_three_star.insert(0,'Country', 'South Korea')
southkorea_three_star.insert(1,'no_of_stars',3)
print(southkorea_three_star.head())
print(southkorea_three_star.shape)

       Country  no_of_stars             name   city              cuisine
0  South Korea            1  Pierre Gagnaire  Seoul  French contemporary
1  South Korea            1           Onjium  Seoul               Korean
2  South Korea            1            Evett  Seoul           Innovative
3  South Korea            1     Votre Maison  Seoul               French
4  South Korea            1            Myomi  Seoul  Korean contemporary
(22, 5)
       Country  no_of_stars          name   city              cuisine
0  South Korea            2  L'Impression  Seoul           Innovative
1  South Korea            2          Mosu  Seoul           Innovative
2  South Korea            2       Mingles  Seoul  Korean contemporary
3  South Korea            2   Kwonsooksoo  Seoul               Korean
4  South Korea            2        Kojima  Seoul                Sushi
(7, 5)
       Country  no_of_stars     name   city cuisine
0  South Korea            3  La Yeon  Seoul  Korean
1  South Korea         

In [360]:
southkorea_one_two = pd.merge(southkorea_one_star,southkorea_two_star,how='outer')
southkorea_data = pd.merge(southkorea_one_two,southkorea_three_star, how = 'outer')

print(southkorea_data.head())
print(southkorea_data.shape)
southkorea_data.to_csv('south_korea_michelin_stars_2020.csv')

       Country  no_of_stars             name   city              cuisine
0  South Korea            1  Pierre Gagnaire  Seoul  French contemporary
1  South Korea            1           Onjium  Seoul               Korean
2  South Korea            1            Evett  Seoul           Innovative
3  South Korea            1     Votre Maison  Seoul               French
4  South Korea            1            Myomi  Seoul  Korean contemporary
(31, 5)


<h3>Singapore</h3>

In [361]:
#One star restaurant data
singapore_one_restaurants = []

pages = ['https://guide.michelin.com/sg/en/restaurants/1-star-michelin',
        'https://guide.michelin.com/sg/en/restaurants/1-star-michelin/page/2'
        ]

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        singapore_one_restaurants.append(restaurant)

singapore_one_star = pd.DataFrame.from_dict(singapore_one_restaurants)
singapore_one_star.insert(0,'Country', 'Singapore')
singapore_one_star.insert(1,'no_of_stars',1)
print(singapore_one_star.head())
print(singapore_one_star.shape)

#Two star restaurant data
singapore_two_restaurants = []

pages = ['https://guide.michelin.com/sg/en/restaurants/2-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        singapore_two_restaurants.append(restaurant)

singapore_two_star = pd.DataFrame.from_dict(singapore_two_restaurants)
singapore_two_star.insert(0,'Country', 'Singapore')
singapore_two_star.insert(1,'no_of_stars',2)
print(singapore_two_star.head())
print(singapore_two_star.shape)

#Three star restaurant data
singapore_three_restaurants = []

pages = ['https://guide.michelin.com/sg/en/restaurants/3-stars-michelin']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.col-md-6.col-lg-6.col-xl-3'):
  
        restaurant = {              
           'name': restaurant_content.select_one('.card__menu-content--title a').text.replace('\n','').strip(),
           'city': restaurant_content.select_one('.card__menu-footer--location').text.replace('\n','').strip(),
           'cuisine': restaurant_content.select_one('.card__menu-footer--price').text.replace('\n','').strip(),
        }
        singapore_three_restaurants.append(restaurant)

singapore_three_star = pd.DataFrame.from_dict(singapore_three_restaurants)
singapore_three_star.insert(0,'Country', 'Singapore')
singapore_three_star.insert(1,'no_of_stars',3)
print(singapore_three_star.head())
print(singapore_three_star.shape)

     Country  no_of_stars            name       city                cuisine
0  Singapore            1  Vianney Massot  Singapore    French contemporary
1  Singapore            1         Table65  Singapore  European contemporary
2  Singapore            1            Meta  Singapore             Innovative
3  Singapore            1          Lerouy  Singapore    French contemporary
4  Singapore            1             Jag  Singapore    French contemporary
(37, 5)
     Country  no_of_stars           name       city                cuisine
0  Singapore            2            Zén  Singapore  European contemporary
1  Singapore            2      Waku Ghin  Singapore  Japanese contemporary
2  Singapore            2      Shoukouwa  Singapore                  Sushi
3  Singapore            2  Shisen Hanten  Singapore      Cantonese-Sichuan
4  Singapore            2   Saint Pierre  Singapore    French contemporary
(5, 5)
     Country  no_of_stars      name       city              cuisine
0  Singapor

In [362]:
singapore_one_two = pd.merge(singapore_one_star,singapore_two_star,how='outer')
singapore_data = pd.merge(singapore_one_two,singapore_three_star, how = 'outer')

print(singapore_data.head())
print(singapore_data.shape)
singapore_data.to_csv('singapore_michelin_stars_2020.csv')

     Country  no_of_stars            name       city                cuisine
0  Singapore            1  Vianney Massot  Singapore    French contemporary
1  Singapore            1         Table65  Singapore  European contemporary
2  Singapore            1            Meta  Singapore             Innovative
3  Singapore            1          Lerouy  Singapore    French contemporary
4  Singapore            1             Jag  Singapore    French contemporary
(44, 5)


<h3> Belgium </h3>

Belgium, along with the countries which follow below, have a different web page format, therefore the code has been adapted to accommodate the new layout.
We will need to concatenate these dataframes together first, so that we can modify the city column, before then
merging with the previous dataframes.

In [388]:
#One star
belgium_one_restaurants = []
    
pages = ['https://fr.viamichelin.be/web/Restaurants/Restaurants-Belgique?stars=1',
        'https://fr.viamichelin.be/web/Restaurants/Restaurants-Belgique?stars=1&page=2',
        'https://fr.viamichelin.be/web/Restaurants/Restaurants-Belgique?stars=1&page=3']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            belgium_one_restaurants.append(restaurant)
        
belgium_one_star = pd.DataFrame.from_dict(belgium_one_restaurants)
belgium_one_star.insert(0,'Country', 'Belgium')
belgium_one_star.insert(1,'no_of_stars',1)
print(belgium_one_star.head())
print(belgium_one_star.shape)

#Two stars

belgium_two_restaurants = []

pages = ['https://fr.viamichelin.be/web/Restaurants/Restaurants-Belgique?stars=2']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            belgium_two_restaurants.append(restaurant)
        
belgium_two_star = pd.DataFrame.from_dict(belgium_two_restaurants)
belgium_two_star.insert(0,'Country', 'Belgium')
belgium_two_star.insert(1,'no_of_stars',2)
print(belgium_two_star.head())
print(belgium_two_star.shape)

#Three stars

belgium_three_restaurants = []

pages = ['https://fr.viamichelin.be/web/Restaurants/Restaurants-Belgique?stars=3']

for page in pages:
    r = requests.get(page)
    soup = BeautifulSoup(r.content,'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            belgium_three_restaurants.append(restaurant)
        
belgium_three_star = pd.DataFrame.from_dict(belgium_three_restaurants)
belgium_three_star.insert(0,'Country', 'Belgium')
belgium_three_star.insert(1,'no_of_stars',3)
print(belgium_three_star.head())
print(belgium_three_star.shape)

   Country  no_of_stars               name  \
0  Belgium            1  The Butcher's son   
1  Belgium            1      Chai Gourmand   
2  Belgium            1    Brasserie Julie   
3  Belgium            1      Hof Ter Hulst   
4  Belgium            1             Nathan   

                                      city  
0        Boomgaardstraat 1, 2018 Antwerpen  
1            Rue Chainisse 45, 5030 Beuzet  
2  Dorpsplein 3, 1700 Sint-Martens-Bodegem  
3             Kerkstraat 33, 2235 Hulshout  
4  Lange Koepoortstraat 13, 2000 Antwerpen  
(106, 4)
   Country  no_of_stars                   name  \
0  Belgium            2             L'Eau Vive   
1  Belgium            2              La Source   
2  Belgium            2                bon bon   
3  Belgium            2  Le Chalet de la Forêt   
4  Belgium            2     La Table de Maxime   

                                             city  
0                route de Floreffe 37, 5170 Arbre  
1                Paalsteenlaan 90, 3620

In [384]:
belgium_one_two = pd.merge(belgium_one_star,belgium_two_star,how='outer')
belgium_data = pd.merge(belgium_one_two,belgium_three_star, how = 'outer')

print(belgium_data.head())
print(belgium_data.shape)

belgium_data.to_csv('belgium_michelin_stars_2020.csv')

   Country  no_of_stars                name  \
0  Belgium            1        Little Paris   
1  Belgium            1  Aux petits oignons   
2  Belgium            1            Carcasse   
3  Belgium            1            Da Mimmo   
4  Belgium            1                 JER   

                                                city  
0            chaussée de Bruxelles 89, 1410 Waterloo  
1           chaussée de Tirlemont 260, 1370 Jodoigne  
2         Henri Christiaenlaan 5, 8670 Sint-Idesbald  
3  avenue du Roi Chevalier 24, 1200 Woluwe-Saint-...  
4                     Persoonstraat 16, 3500 Hasselt  
(72, 4)


<h3> Germany </h3>

In [389]:
#One star
germany_one_restaurants = []
    
pages = ['https://www.viamichelin.de/web/Suchen_Restaurants/Restaurants-Deutschland?stars=1',
        'https://www.viamichelin.de/web/Suchen_Restaurants/Restaurants-Deutschland?stars=1&page=2',
        'https://www.viamichelin.de/web/Suchen_Restaurants/Restaurants-Deutschland?stars=1&page=3',
        'https://www.viamichelin.de/web/Suchen_Restaurants/Restaurants-Deutschland?stars=1&page=4',
        'https://www.viamichelin.de/web/Suchen_Restaurants/Restaurants-Deutschland?stars=1&page=5',
        'https://www.viamichelin.de/web/Suchen_Restaurants/Restaurants-Deutschland?stars=1&page=6']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            germany_one_restaurants.append(restaurant)
        
germany_one_star = pd.DataFrame.from_dict(germany_one_restaurants)
germany_one_star.insert(0,'Country', 'Germany')
germany_one_star.insert(1,'no_of_stars',1)
print(germany_one_star.head())
print(germany_one_star.shape)

#Two stars
germany_two_restaurants = []
    
pages = ['https://www.viamichelin.de/web/Suchen_Restaurants/Restaurants-Deutschland?stars=2']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            germany_two_restaurants.append(restaurant)
        
germany_two_star = pd.DataFrame.from_dict(germany_two_restaurants)
germany_two_star.insert(0,'Country', 'Germany')
germany_two_star.insert(1,'no_of_stars',2)
print(germany_two_star.head())
print(germany_two_star.shape)

#Three stars
germany_three_restaurants = []
    
pages = ['https://www.viamichelin.de/web/Suchen_Restaurants/Restaurants-Deutschland?stars=3']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            germany_three_restaurants.append(restaurant)
        
germany_three_star = pd.DataFrame.from_dict(germany_three_restaurants)
germany_three_star.insert(0,'Country', 'Germany')
germany_three_star.insert(1,'no_of_stars',3)
print(germany_three_star.head())
print(germany_three_star.shape)

   Country  no_of_stars                      name  \
0  Germany            1       Schranners Waldhorn   
1  Germany            1  Camers Schlossrestaurant   
2  Germany            1                      Tian   
3  Germany            1             Landhaus Köpp   
4  Germany            1                     Ernst   

                                           city  
0  Schönbuchstr. 49, 72074 Tübingen-Bebenhausen  
1             Schlossstr. 25, 85411 HOHENKAMMER  
2                   Frauenstr. 4, 80469 München  
3        Husenweg 147, 46509 Xanten-Obermörmter  
4          Gerichtstr. 54, 13347 Berlin-Wedding  
(254, 4)
   Country  no_of_stars                                  name  \
0  Germany            2  ammolite - The Lighthouse Restaurant   
1  Germany            2            Schwarzenstein Nils Henkel   
2  Germany            2                   Keilings Restaurant   
3  Germany            2                                 FACIL   
4  Germany            2                        

In [390]:
germany_one_two = pd.merge(germany_one_star,germany_two_star,how='outer')
germany_data = pd.merge(germany_one_two,germany_three_star, how = 'outer')

print(germany_data.head())
print(germany_data.shape)

germany_data.to_csv('germany_michelin_stars_2020.csv')

   Country  no_of_stars                      name  \
0  Germany            1       Schranners Waldhorn   
1  Germany            1  Camers Schlossrestaurant   
2  Germany            1                      Tian   
3  Germany            1             Landhaus Köpp   
4  Germany            1                     Ernst   

                                           city  
0  Schönbuchstr. 49, 72074 Tübingen-Bebenhausen  
1             Schlossstr. 25, 85411 HOHENKAMMER  
2                   Frauenstr. 4, 80469 München  
3        Husenweg 147, 46509 Xanten-Obermörmter  
4          Gerichtstr. 54, 13347 Berlin-Wedding  
(302, 4)


<h3>Luxembourg</h3>

In [391]:
#One star
luxembourg_one_restaurants = []
    
pages = ['https://fr.viamichelin.be/web/Restaurants/Restaurants-Luxembourg?stars=1']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            luxembourg_one_restaurants.append(restaurant)
        
luxembourg_one_star = pd.DataFrame.from_dict(luxembourg_one_restaurants)
luxembourg_one_star.insert(0,'Country', 'Luxembourg')
luxembourg_one_star.insert(1,'no_of_stars',1)
print(luxembourg_one_star.head())
print(luxembourg_one_star.shape)

#Two stars
luxembourg_two_restaurants = []
    
pages = ['https://fr.viamichelin.be/web/Restaurants/Restaurants-Luxembourg?stars=2']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            luxembourg_two_restaurants.append(restaurant)
        
luxembourg_two_star = pd.DataFrame.from_dict(luxembourg_two_restaurants)
luxembourg_two_star.insert(0,'Country', 'Luxembourg')
luxembourg_two_star.insert(1,'no_of_stars',2)
print(luxembourg_two_star.head())
print(luxembourg_two_star.shape)


      Country  no_of_stars                 name  \
0  Luxembourg            1     Guillou Campagne   
1  Luxembourg            1                 Fani   
2  Luxembourg            1              Mosconi   
3  Luxembourg            1      La Cristallerie   
4  Luxembourg            1  Les Jardins d'Anaïs   

                                        city  
0  17 rue de la Résistance, 4996 Schouweiler  
1                  51 Grand Rue, 3394 Roeser  
2            13 rue Münster, 2160 Luxembourg  
3          18 place d'Armes, 1136 Luxembourg  
4  2 place Sainte Cunégonde, 1367 Luxembourg  
(8, 4)
      Country  no_of_stars              name                         city
0  Luxembourg            2  Ma Langue Sourit  1 rue Remich, 5331 Oetrange
(1, 4)


In [393]:
luxembourg_data = pd.merge(luxembourg_one_star,luxembourg_two_star,how='outer')

print(luxembourg_data.head())
print(luxembourg_data.shape)

luxembourg_data.to_csv('luxembourg_michelin_stars_2020.csv')

      Country  no_of_stars                 name  \
0  Luxembourg            1     Guillou Campagne   
1  Luxembourg            1                 Fani   
2  Luxembourg            1              Mosconi   
3  Luxembourg            1      La Cristallerie   
4  Luxembourg            1  Les Jardins d'Anaïs   

                                        city  
0  17 rue de la Résistance, 4996 Schouweiler  
1                  51 Grand Rue, 3394 Roeser  
2            13 rue Münster, 2160 Luxembourg  
3          18 place d'Armes, 1136 Luxembourg  
4  2 place Sainte Cunégonde, 1367 Luxembourg  
(9, 4)


<h3>Netherlands</h3>

In [394]:
#One star
netherlands_one_restaurants = []
    
pages = ['https://www.viamichelin.nl/web/Restaurants/Restaurants-Nederland?stars=1',
        'https://www.viamichelin.nl/web/Restaurants/Restaurants-Nederland?stars=1&page=2']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            netherlands_one_restaurants.append(restaurant)
        
netherlands_one_star = pd.DataFrame.from_dict(netherlands_one_restaurants)
netherlands_one_star.insert(0,'Country', 'Netherlands')
netherlands_one_star.insert(1,'no_of_stars',1)
print(netherlands_one_star.head())
print(netherlands_one_star.shape)

#Two stars
netherlands_two_restaurants = []
    
pages = ['https://www.viamichelin.nl/web/Restaurants/Restaurants-Nederland?stars=2']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            netherlands_two_restaurants.append(restaurant)
        
netherlands_two_star = pd.DataFrame.from_dict(netherlands_two_restaurants)
netherlands_two_star.insert(0,'Country', 'Netherlands')
netherlands_two_star.insert(1,'no_of_stars',2)
print(netherlands_two_star.head())
print(netherlands_two_star.shape)

#Three stars
netherlands_three_restaurants = []
    
pages = ['https://www.viamichelin.nl/web/Restaurants/Restaurants-Nederland?stars=3']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            netherlands_three_restaurants.append(restaurant)
        
netherlands_three_star = pd.DataFrame.from_dict(netherlands_three_restaurants)
netherlands_three_star.insert(0,'Country', 'Netherlands')
netherlands_three_star.insert(1,'no_of_stars',3)
print(netherlands_three_star.head())
print(netherlands_three_star.shape)

       Country  no_of_stars                   name  \
0  Netherlands            1    Kaatje bij de Sluis   
1  Netherlands            1              Derozario   
2  Netherlands            1  De Karpendonkse Hoeve   
3  Netherlands            1              Wollerich   
4  Netherlands            1               Basiliek   

                                  city  
0   Brouwerstraat 20, 8356 DV Blokzijl  
1          Steenweg 8, 5707 CG Helmond  
2     Sumatralaan 3, 5631 AA Eindhoven  
3    Heuvel 23, 5492 AC Sint-Oedenrode  
4  Vischmarkt 57 L, 3841 BE Harderwijk  
(92, 4)
       Country  no_of_stars                  name  \
0  Netherlands            2                  Fred   
1  Netherlands            2            Parkheuvel   
2  Netherlands            2     De Treeswijkhoeve   
3  Netherlands            2  FG - François Geurds   
4  Netherlands            2               &Moshik   

                                   city  
0   Honingerdijk 263, 3063 AM Rotterdam  
1      Heuvellaan 

In [395]:
netherlands_one_two = pd.merge(netherlands_one_star,netherlands_two_star,how='outer')
netherlands_data = pd.merge(netherlands_one_two,netherlands_three_star, how = 'outer')

print(netherlands_data.head())
print(netherlands_data.shape)

netherlands_data.to_csv('netherlands_michelin_stars_2020.csv')

       Country  no_of_stars                   name  \
0  Netherlands            1    Kaatje bij de Sluis   
1  Netherlands            1              Derozario   
2  Netherlands            1  De Karpendonkse Hoeve   
3  Netherlands            1              Wollerich   
4  Netherlands            1               Basiliek   

                                  city  
0   Brouwerstraat 20, 8356 DV Blokzijl  
1          Steenweg 8, 5707 CG Helmond  
2     Sumatralaan 3, 5631 AA Eindhoven  
3    Heuvel 23, 5492 AC Sint-Oedenrode  
4  Vischmarkt 57 L, 3841 BE Harderwijk  
(111, 4)


<h3>Portugal</h3>

In [396]:
#One star
portugal_one_restaurants = []
    
pages = ['https://www.viamichelin.pt/web/Restaurantes/Restaurantes-Portugal?stars=1']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            portugal_one_restaurants.append(restaurant)
        
portugal_one_star = pd.DataFrame.from_dict(portugal_one_restaurants)
portugal_one_star.insert(0,'Country', 'Portugal')
portugal_one_star.insert(1,'no_of_stars',1)
print(portugal_one_star.head())
print(portugal_one_star.shape)

#Two stars
portugal_two_restaurants = []
    
pages = ['https://www.viamichelin.pt/web/Restaurantes/Restaurantes-Portugal?stars=2']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            portugal_two_restaurants.append(restaurant)
        
portugal_two_star = pd.DataFrame.from_dict(portugal_two_restaurants)
portugal_two_star.insert(0,'Country', 'Portugal')
portugal_two_star.insert(1,'no_of_stars',2)
print(portugal_two_star.head())
print(portugal_two_star.shape)


    Country  no_of_stars           name  \
0  Portugal            1         Midori   
1  Portugal            1  Fifty Seconds   
2  Portugal            1         Eleven   
3  Portugal            1          Vista   
4  Portugal            1        Bon Bon   

                                                city  
0                                    2714-511 Sintra  
1  Cais das Naus, Lote 2.21.01 (Parque das Nações...  
2          Rua Marquês de Fronteira, 1070-051 Lisboa  
3          Avenida Tomás Cabreira, 8500-802 Portimão  
4  Estrada de Sesmarias - Urbanização Cabeço de P...  
(20, 4)
    Country  no_of_stars                     name  \
0  Portugal            2              The Yeatman   
1  Portugal            2                 Belcanto   
2  Portugal            2           Il Gallo d'Oro   
3  Portugal            2  Casa de Chá da Boa Nova   
4  Portugal            2                    Ocean   

                                                city  
0  Rua do Choupelo (Santa Mar

In [397]:
portugal_data = pd.merge(portugal_one_star,portugal_two_star,how='outer')

print(portugal_data.head())
print(portugal_data.shape)

portugal_data.to_csv('portugal_michelin_stars_2020.csv')

    Country  no_of_stars           name  \
0  Portugal            1         Midori   
1  Portugal            1  Fifty Seconds   
2  Portugal            1         Eleven   
3  Portugal            1          Vista   
4  Portugal            1        Bon Bon   

                                                city  
0                                    2714-511 Sintra  
1  Cais das Naus, Lote 2.21.01 (Parque das Nações...  
2          Rua Marquês de Fronteira, 1070-051 Lisboa  
3          Avenida Tomás Cabreira, 8500-802 Portimão  
4  Estrada de Sesmarias - Urbanização Cabeço de P...  
(27, 4)


<h3>Switzerland</h3>

In [398]:
#One star
switzerland_one_restaurants = []
    
pages = ['https://de.viamichelin.ch/web/Restaurants/Restaurants-Schweiz?stars=1',
        'https://de.viamichelin.ch/web/Restaurants/Restaurants-Schweiz?stars=1&page=2',
        'https://de.viamichelin.ch/web/Restaurants/Restaurants-Schweiz?stars=1&page=3']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            switzerland_one_restaurants.append(restaurant)
        
switzerland_one_star = pd.DataFrame.from_dict(switzerland_one_restaurants)
switzerland_one_star.insert(0,'Country', 'Switzerland')
switzerland_one_star.insert(1,'no_of_stars',1)
print(switzerland_one_star.head())
print(switzerland_one_star.shape)

#Two stars
switzerland_two_restaurants = []
    
pages = ['https://de.viamichelin.ch/web/Restaurants/Restaurants-Schweiz?stars=2']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            switzerland_two_restaurants.append(restaurant)
        
switzerland_two_star = pd.DataFrame.from_dict(switzerland_two_restaurants)
switzerland_two_star.insert(0,'Country', 'Switzerland')
switzerland_two_star.insert(1,'no_of_stars',2)
print(switzerland_two_star.head())
print(switzerland_two_star.shape)

#Three stars
switzerland_three_restaurants = []
    
pages = ['https://de.viamichelin.ch/web/Restaurants/Restaurants-Schweiz?stars=3']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip(),
               'city': restaurant_content.select_one('.poi-item-details.truncate').text.replace('\n','').strip()
            }
            switzerland_three_restaurants.append(restaurant)
        
switzerland_three_star = pd.DataFrame.from_dict(switzerland_three_restaurants)
switzerland_three_star.insert(0,'Country', 'Switzerland')
switzerland_three_star.insert(1,'no_of_stars',3)
print(switzerland_three_star.head())
print(switzerland_three_star.shape)

       Country  no_of_stars                                name  \
0  Switzerland            1                               roots   
1  Switzerland            1                    Talvo By Dalsass   
2  Switzerland            1                      Regina Montium   
3  Switzerland            1  Gasthaus Zur Fernsicht - Incantare   
4  Switzerland            1                 Le Berceau des Sens   

                                     city  
0           Mühlhauserstr. 17, 4056 Basel  
1            Via Gunels 15, 7512 Champfèr  
2   Staffelhöhenweg 61, 6356 Rigi Kaltbad  
3                Seeallee 10, 9410 Heiden  
4  Route de Cojonnex 18, 1000 Lausanne 25  
(101, 4)
       Country  no_of_stars                     name  \
0  Switzerland            2                Adelboden   
1  Switzerland            2                     Ecco   
2  Switzerland            2                    focus   
3  Switzerland            2  Stucki - Tanja Grandits   
4  Switzerland            2         Einstein

In [399]:
switzerland_one_two = pd.merge(switzerland_one_star,switzerland_two_star,how='outer')
switzerland_data = pd.merge(switzerland_one_two,switzerland_three_star, how = 'outer')

print(switzerland_data.head())
print(switzerland_data.shape)

switzerland_data.to_csv('switzerland_michelin_stars_2020.csv')

       Country  no_of_stars                                name  \
0  Switzerland            1                               roots   
1  Switzerland            1                    Talvo By Dalsass   
2  Switzerland            1                      Regina Montium   
3  Switzerland            1  Gasthaus Zur Fernsicht - Incantare   
4  Switzerland            1                 Le Berceau des Sens   

                                     city  
0           Mühlhauserstr. 17, 4056 Basel  
1            Via Gunels 15, 7512 Champfèr  
2   Staffelhöhenweg 61, 6356 Rigi Kaltbad  
3                Seeallee 10, 9410 Heiden  
4  Route de Cojonnex 18, 1000 Lausanne 25  
(124, 4)


<h3>Shanghai</h3>

In [454]:
#One star
shanghai_one_restaurants = []
    
pages = ['https://www.viamichelin.co.uk/web/Restaurants-search/Restaurants-Shanghai-_-Shanghai-China?stars=1',
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip()
            }
            shanghai_one_restaurants.append(restaurant)
        
shanghai_one_star = pd.DataFrame.from_dict(shanghai_one_restaurants)
shanghai_one_star.insert(0,'Country', 'China')
shanghai_one_star.insert(1,'no_of_stars',1)
shanghai_one_star.insert(3,'city','Shanghai')
print(shanghai_one_star.head())
print(shanghai_one_star.shape)

#Two stars
shanghai_two_restaurants = []
    
pages = ['https://www.viamichelin.co.uk/web/Restaurants-search/Restaurants-Shanghai-_-Shanghai-China?stars=2']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip()
            }
            shanghai_two_restaurants.append(restaurant)
        
shanghai_two_star = pd.DataFrame.from_dict(shanghai_two_restaurants)
shanghai_two_star.insert(0,'Country', 'China')
shanghai_two_star.insert(1,'no_of_stars',2)
shanghai_two_star.insert(3,'city','Shanghai')
print(shanghai_two_star.shape)

#Three stars
shanghai_three_restaurants = []
    
pages = ['https://www.viamichelin.co.uk/web/Restaurants-search/Restaurants-Shanghai-_-Shanghai-China?stars=3']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.poi-item.poi-item-restaurant'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.poi-item-name a').text.replace('\n','').strip()
            }
            shanghai_three_restaurants.append(restaurant)
        
shanghai_three_star = pd.DataFrame.from_dict(shanghai_three_restaurants)
shanghai_three_star.insert(0,'Country', 'China')
shanghai_three_star.insert(1,'no_of_stars',3)
shanghai_three_star.insert(3,'city','Shanghai')
print(shanghai_three_star.shape)

  Country  no_of_stars                  name      city
0   China            1             Fu He Hui  Shanghai
1   China            1                Phénix  Shanghai
2   China            1      Da Dong (Jingan)  Shanghai
3   China            1  Seventh Son (Jingan)  Shanghai
4   China            1            Yu Zhi Lan  Shanghai
(31, 4)
(8, 4)
(1, 4)


In [455]:
shanghai_one_two = pd.merge(shanghai_one_star,shanghai_two_star,how='outer')
shanghai_data = pd.merge(shanghai_one_two,shanghai_three_star, how = 'outer')

print(shanghai_data.head())
print(shanghai_data.shape)

shanghai_data.to_csv('shanghai_michelin_stars_2020.csv')

  Country  no_of_stars                  name      city
0   China            1             Fu He Hui  Shanghai
1   China            1                Phénix  Shanghai
2   China            1      Da Dong (Jingan)  Shanghai
3   China            1  Seventh Son (Jingan)  Shanghai
4   China            1            Yu Zhi Lan  Shanghai
(40, 4)


<h3>Japan</h3>

The next two countries have a third web format, so an altered code will be used for these.

In [412]:
#One star
#Tokyo
tokyo_one_restaurants = []
    
pages = ['https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p2/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p3/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p4/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p5/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p6/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p7/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p8/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p9/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p10/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p11/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p12/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p13/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p14/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p15/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p16/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/1star/p17/']

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            tokyo_one_restaurants.append(restaurant)
        
tokyo_one_star = pd.DataFrame.from_dict(tokyo_one_restaurants)
tokyo_one_star.insert(0,'Country', 'Japan')
tokyo_one_star.insert(1,'no_of_stars',1)
tokyo_one_star.insert(3,'city','Tokyo')
print(tokyo_one_star.head())
print(tokyo_one_star.shape)

  Country  no_of_stars             name   city   cuisine
0   Japan            1     Adachi Naoto  Tokyo  Japanese
1   Japan            1  Les Alchimistes  Tokyo    French
2   Japan            1     Aroma Fresca  Tokyo   Italian
3   Japan            1        Ishibashi  Tokyo     Unagi
4   Japan            1        Ishibashi  Tokyo  Sukiyaki
(167, 5)


In [414]:
#Two star
#Tokyo
tokyo_two_restaurants = []
    
pages = ['https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/2star/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/2star/p2/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/2star/p3/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/2star/p4/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/2star/p5/'
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            tokyo_two_restaurants.append(restaurant)
        
tokyo_two_star = pd.DataFrame.from_dict(tokyo_two_restaurants)
tokyo_two_star.insert(0,'Country', 'Japan')
tokyo_two_star.insert(1,'no_of_stars',2)
tokyo_two_star.insert(3,'city','Tokyo')
print(tokyo_two_star.head())
print(tokyo_two_star.shape)

  Country  no_of_stars                    name   city   cuisine
0   Japan            2      Usukifugu Yamadaya  Tokyo      Fugu
1   Japan            2      Tentempura Uchitsu  Tokyo   Tempura
2   Japan            2                Esquisse  Tokyo    French
3   Japan            2  Édition Koji Shimomura  Tokyo    French
4   Japan            2                 Okamoto  Tokyo  Japanese
(48, 5)


In [415]:
#Three star
#Tokyo
tokyo_three_restaurants = []
    
pages = ['https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/3star/',
        'https://gm.gnavi.co.jp/restaurant/list/tokyo/all_area/all_small_area/all_food/3star/p2/'
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            tokyo_three_restaurants.append(restaurant)
        
tokyo_three_star = pd.DataFrame.from_dict(tokyo_three_restaurants)
tokyo_three_star.insert(0,'Country', 'Japan')
tokyo_three_star.insert(1,'no_of_stars',3)
tokyo_three_star.insert(3,'city','Tokyo')
print(tokyo_three_star.head())
print(tokyo_three_star.shape)

  Country  no_of_stars                                               name  \
0   Japan            3                                Kagurazaka Ishikawa   
1   Japan            3  KadowakiRestaurant promoted from 1 to 2 stars ...   
2   Japan            3                                              Kanda   
3   Japan            3                                       Quintessence   
4   Japan            3                                             Kohaku   

    city   cuisine  
0  Tokyo  Japanese  
1  Tokyo  Japanese  
2  Tokyo  Japanese  
3  Tokyo    French  
4  Tokyo  Japanese  
(11, 5)


In [416]:
#One star
#Kyoto
kyoto_one_restaurants = []
    
pages = ['https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/1star/',
        'https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/1star/p2/',
        'https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/1star/p3/',
        'https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/1star/p4/',
        'https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/1star/p5/',
        'https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/1star/p6/',
        'https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/1star/p7/',
        'https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/1star/p8/'
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            kyoto_one_restaurants.append(restaurant)
        
kyoto_one_star = pd.DataFrame.from_dict(kyoto_one_restaurants)
kyoto_one_star.insert(0,'Country', 'Japan')
kyoto_one_star.insert(1,'no_of_stars',1)
kyoto_one_star.insert(3,'city','Kyoto')
print(kyoto_one_star.head())
print(kyoto_one_star.shape)

  Country  no_of_stars              name   city   cuisine
0   Japan            1              Akai  Kyoto  Japanese
1   Japan            1  Kamigamo Akiyama  Kyoto  Japanese
2   Japan            1     Aji Fukushima  Kyoto  Japanese
3   Japan            1       Aji Rakuzan  Kyoto  Japanese
4   Japan            1           Iwasaki  Kyoto  Japanese
(76, 5)


In [417]:
#Two star
#Kyoto
kyoto_two_restaurants = []
    
pages = ['https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/2star/',
        'https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/2star/p2/',
        'https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/2star/p3/'
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            kyoto_two_restaurants.append(restaurant)
        
kyoto_two_star = pd.DataFrame.from_dict(kyoto_two_restaurants)
kyoto_two_star.insert(0,'Country', 'Japan')
kyoto_two_star.insert(1,'no_of_stars',2)
kyoto_two_star.insert(3,'city','Kyoto')
print(kyoto_two_star.head())
print(kyoto_two_star.shape)

  Country  no_of_stars                 name   city   cuisine
0   Japan            2   Sumibi Kappo Ifuki  Kyoto  Japanese
1   Japan            2         Gion Owatari  Kyoto  Japanese
2   Japan            2                Ogata  Kyoto  Japanese
3   Japan            2         Kikunoi Roan  Kyoto  Japanese
4   Japan            2  Kyokaiseki Kichisen  Kyoto  Japanese
(21, 5)


In [420]:
#Three star
#Kyoto
kyoto_three_restaurants = []
    
pages = ['https://gm.gnavi.co.jp/restaurant/list/kyoto/all_area/all_small_area/all_food/3star/',
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            kyoto_three_restaurants.append(restaurant)
        
kyoto_three_star = pd.DataFrame.from_dict(kyoto_three_restaurants)
kyoto_three_star.insert(0,'Country', 'Japan')
kyoto_three_star.insert(1,'no_of_stars',3)
kyoto_three_star.insert(3,'city','Kyoto')
print(kyoto_three_star.head())
print(kyoto_three_star.shape)

  Country  no_of_stars                                               name  \
0   Japan            3                                               Iida   
1   Japan            3                                     Kikunoi Honten   
2   Japan            3                           Kitcho Arashiyama Honten   
3   Japan            3  Gion SasakiRestaurant promoted from 1 to 2 sta...   
4   Japan            3                                Isshisoden Nakamura   

    city   cuisine  
0  Kyoto  Japanese  
1  Kyoto  Japanese  
2  Kyoto  Japanese  
3  Kyoto  Japanese  
4  Kyoto  Japanese  
(8, 5)


In [421]:
#One star
#Osaka
osaka_one_restaurants = []
    
pages = ['https://gm.gnavi.co.jp/restaurant/list/osaka/all_area/all_small_area/all_food/1star/',
        'https://gm.gnavi.co.jp/restaurant/list/osaka/all_area/all_small_area/all_food/1star/p2/',
        'https://gm.gnavi.co.jp/restaurant/list/osaka/all_area/all_small_area/all_food/1star/p3/',
        'https://gm.gnavi.co.jp/restaurant/list/osaka/all_area/all_small_area/all_food/1star/p4/',
        'https://gm.gnavi.co.jp/restaurant/list/osaka/all_area/all_small_area/all_food/1star/p5/',
        'https://gm.gnavi.co.jp/restaurant/list/osaka/all_area/all_small_area/all_food/1star/p6/',
        'https://gm.gnavi.co.jp/restaurant/list/osaka/all_area/all_small_area/all_food/1star/p7/',
        'https://gm.gnavi.co.jp/restaurant/list/osaka/all_area/all_small_area/all_food/1star/p8/'
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            osaka_one_restaurants.append(restaurant)
        
osaka_one_star = pd.DataFrame.from_dict(osaka_one_restaurants)
osaka_one_star.insert(0,'Country', 'Japan')
osaka_one_star.insert(1,'no_of_stars',1)
osaka_one_star.insert(3,'city','Osaka')
print(osaka_one_star.head())
print(osaka_one_star.shape)

  Country  no_of_stars                name   city   cuisine
0   Japan            1   Ajikitcho Bumbuan  Osaka  Japanese
1   Japan            1  Ajikitcho Horieten  Osaka  Japanese
2   Japan            1   Sushidokoro Amano  Osaka     Sushi
3   Japan            1             Ayamuya  Osaka  Yakitori
4   Japan            1             Ayamedo  Osaka      Soba
(78, 5)


In [423]:
#Two star
#Osaka
osaka_two_restaurants = []
    
pages = ['https://gm.gnavi.co.jp/restaurant/list/osaka/all_area/all_small_area/all_food/2star/',
        'https://gm.gnavi.co.jp/restaurant/list/osaka/all_area/all_small_area/all_food/2star/p2/'
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            osaka_two_restaurants.append(restaurant)
        
osaka_two_star = pd.DataFrame.from_dict(osaka_two_restaurants)
osaka_two_star.insert(0,'Country', 'Japan')
osaka_two_star.insert(1,'no_of_stars',2)
osaka_two_star.insert(3,'city','Osaka')
print(osaka_two_star.head())
print(osaka_two_star.shape)

  Country  no_of_stars                       name   city     cuisine
0   Japan            2                       Aoki  Osaka    Japanese
1   Japan            2  Ichiju Nisai Ueno Minoten  Osaka    Japanese
2   Japan            2                      Kasho  Osaka    Japanese
3   Japan            2                     Kahala  Osaka  Innovative
4   Japan            2                  Kitahachi  Osaka        Fugu
(15, 5)


In [424]:
#Three star
#Osaka
osaka_three_restaurants = []
    
pages = ['https://gm.gnavi.co.jp/restaurant/list/osaka/all_area/all_small_area/all_food/3star/'
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            osaka_three_restaurants.append(restaurant)
        
osaka_three_star = pd.DataFrame.from_dict(osaka_three_restaurants)
osaka_three_star.insert(0,'Country', 'Japan')
osaka_three_star.insert(1,'no_of_stars',3)
osaka_three_star.insert(3,'city','Osaka')
print(osaka_three_star.head())
print(osaka_three_star.shape)

  Country  no_of_stars       name   city     cuisine
0   Japan            3  Kashiwaya  Osaka    Japanese
1   Japan            3      Taian  Osaka    Japanese
2   Japan            3     Hajime  Osaka  Innovative
(3, 5)


In [433]:
#Toyama
#One star
toyama_one_restaurants = []
r = requests.get('https://gmtoyamakanazawa.gnavi.co.jp/restaurant/list/toyama/all_area/all_small_area/all_food/1star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    toyama_one_restaurants.append(restaurant)

toyama_one_star = pd.DataFrame.from_dict(toyama_one_restaurants)
toyama_one_star.insert(0,'Country', 'Japan')
toyama_one_star.insert(1,'no_of_stars',1)
toyama_one_star.insert(3,'city','Toyama')
        
#Three star
toyama_three_restaurants = []
r = requests.get('https://gmtoyamakanazawa.gnavi.co.jp/restaurant/list/toyama/all_area/all_small_area/all_food/3star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    toyama_three_restaurants.append(restaurant)

toyama_three_star = pd.DataFrame.from_dict(toyama_three_restaurants)
toyama_three_star.insert(0,'Country', 'Japan')
toyama_three_star.insert(1,'no_of_stars',3)
toyama_three_star.insert(3,'city','Toyama')

print(toyama_one_star.shape)
print(toyama_three_star.shape)

(7, 5)
(1, 5)


In [434]:
#Aichi
#One star

aichi_one_restaurants = []
    
pages = ['https://gmagm.gnavi.co.jp/restaurant/list/aichi/all_area/all_small_area/all_food/1star/',
         'https://gmagm.gnavi.co.jp/restaurant/list/aichi/all_area/all_small_area/all_food/1star/p2/',
         'https://gmagm.gnavi.co.jp/restaurant/list/aichi/all_area/all_small_area/all_food/1star/p3/',
         'https://gmagm.gnavi.co.jp/restaurant/list/aichi/all_area/all_small_area/all_food/1star/p4/'
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            aichi_one_restaurants.append(restaurant)
        
aichi_one_star = pd.DataFrame.from_dict(aichi_one_restaurants)
aichi_one_star.insert(0,'Country', 'Japan')
aichi_one_star.insert(1,'no_of_stars',1)
aichi_one_star.insert(3,'city','Aichi')
       
#Two star
aichi_two_restaurants = []
r = requests.get('https://gmagm.gnavi.co.jp/restaurant/list/aichi/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    aichi_two_restaurants.append(restaurant)

aichi_two_star = pd.DataFrame.from_dict(aichi_two_restaurants)
aichi_two_star.insert(0,'Country', 'Japan')
aichi_two_star.insert(1,'no_of_stars',2)
aichi_two_star.insert(3,'city','Aichi')
        
#Three star
aichi_three_restaurants = []
r = requests.get('https://gmagm.gnavi.co.jp/restaurant/list/aichi/all_area/all_small_area/all_food/3star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    aichi_three_restaurants.append(restaurant)

aichi_three_star = pd.DataFrame.from_dict(aichi_three_restaurants)
aichi_three_star.insert(0,'Country', 'Japan')
aichi_three_star.insert(1,'no_of_stars',3)
aichi_three_star.insert(3,'city','Aichi')

print(aichi_one_star.shape)
print(aichi_two_star.shape)
print(aichi_three_star.shape)

(34, 5)
(9, 5)
(2, 5)


In [436]:
#Gifu
#One star
gifu_one_restaurants = []
r = requests.get('https://gmagm.gnavi.co.jp/restaurant/list/gifu/all_area/all_small_area/all_food/1star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    gifu_one_restaurants.append(restaurant)

gifu_one_star = pd.DataFrame.from_dict(gifu_one_restaurants)
gifu_one_star.insert(0,'Country', 'Japan')
gifu_one_star.insert(1,'no_of_stars',1)
gifu_one_star.insert(3,'city','Gifu')

#Two star
gifu_two_restaurants = []
r = requests.get('https://gmagm.gnavi.co.jp/restaurant/list/gifu/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    gifu_two_restaurants.append(restaurant)

gifu_two_star = pd.DataFrame.from_dict(gifu_two_restaurants)
gifu_two_star.insert(0,'Country', 'Japan')
gifu_two_star.insert(1,'no_of_stars',2)
gifu_two_star.insert(3,'city','Gifu')

print(gifu_one_star.shape)
print(gifu_two_star.shape)

(9, 5)
(3, 5)


In [437]:
#Mie
#One star
mie_one_restaurants = []
r = requests.get('https://gmagm.gnavi.co.jp/restaurant/list/mie/all_area/all_small_area/all_food/1star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    mie_one_restaurants.append(restaurant)

mie_one_star = pd.DataFrame.from_dict(mie_one_restaurants)
mie_one_star.insert(0,'Country', 'Japan')
mie_one_star.insert(1,'no_of_stars',1)
mie_one_star.insert(3,'city','Mie')

#Two star
mie_two_restaurants = []
r = requests.get('https://gmagm.gnavi.co.jp/restaurant/list/mie/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    mie_two_restaurants.append(restaurant)

mie_two_star = pd.DataFrame.from_dict(mie_two_restaurants)
mie_two_star.insert(0,'Country', 'Japan')
mie_two_star.insert(1,'no_of_stars',2)
mie_two_star.insert(3,'city','Mie')

#Three star
mie_three_restaurants = []
r = requests.get('https://gmagm.gnavi.co.jp/restaurant/list/mie/all_area/all_small_area/all_food/3star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    mie_three_restaurants.append(restaurant)

mie_three_star = pd.DataFrame.from_dict(mie_three_restaurants)
mie_three_star.insert(0,'Country', 'Japan')
mie_three_star.insert(1,'no_of_stars',3)
mie_three_star.insert(3,'city','Mie')

print(mie_one_star.shape)
print(mie_two_star.shape)
print(mie_three_star.shape)

(8, 5)
(2, 5)
(1, 5)


In [438]:
#Tottori - 2019 data - need to check still valid
#One star
tottori_one_restaurants = []
r = requests.get('https://gmtottori.gnavi.co.jp/restaurant/list/tottori/all_area/all_small_area/all_food/1star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    tottori_one_restaurants.append(restaurant)

tottori_one_star = pd.DataFrame.from_dict(tottori_one_restaurants)
tottori_one_star.insert(0,'Country', 'Japan')
tottori_one_star.insert(1,'no_of_stars',1)
tottori_one_star.insert(3,'city','Tottori')

#Two star
tottori_two_restaurants = []
r = requests.get('https://gmtottori.gnavi.co.jp/restaurant/list/tottori/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    tottori_two_restaurants.append(restaurant)

tottori_two_star = pd.DataFrame.from_dict(tottori_two_restaurants)
tottori_two_star.insert(0,'Country', 'Japan')
tottori_two_star.insert(1,'no_of_stars',2)
tottori_two_star.insert(3,'city','Tottori')

print(tottori_one_star.shape)
print(tottori_two_star.shape)

(4, 5)
(2, 5)


In [440]:
#Hiroshima - 2018 data
#One star
hiroshima_one_restaurants = []
    
pages = ['https://gmhiroshimaehime.gnavi.co.jp/restaurant/list/hiroshima/all_area/all_small_area/all_food/1star/',
         'https://gmhiroshimaehime.gnavi.co.jp/restaurant/list/hiroshima/all_area/all_small_area/all_food/1star/p2',
         'https://gmhiroshimaehime.gnavi.co.jp/restaurant/list/hiroshima/all_area/all_small_area/all_food/1star/p3/',
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            hiroshima_one_restaurants.append(restaurant)
        
hiroshima_one_star = pd.DataFrame.from_dict(hiroshima_one_restaurants)
hiroshima_one_star.insert(0,'Country', 'Japan')
hiroshima_one_star.insert(1,'no_of_stars',1)
hiroshima_one_star.insert(3,'city','Hiroshima')

#Two star
hiroshima_two_restaurants = []
r = requests.get('https://gmhiroshimaehime.gnavi.co.jp/restaurant/list/hiroshima/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    hiroshima_two_restaurants.append(restaurant)

hiroshima_two_star = pd.DataFrame.from_dict(hiroshima_two_restaurants)
hiroshima_two_star.insert(0,'Country', 'Japan')
hiroshima_two_star.insert(1,'no_of_stars',2)
hiroshima_two_star.insert(3,'city','Hiroshima')

#Three star
hiroshima_three_restaurants = []
r = requests.get('https://gmhiroshimaehime.gnavi.co.jp/restaurant/list/hiroshima/all_area/all_small_area/all_food/3star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    hiroshima_three_restaurants.append(restaurant)

hiroshima_three_star = pd.DataFrame.from_dict(hiroshima_three_restaurants)
hiroshima_three_star.insert(0,'Country', 'Japan')
hiroshima_three_star.insert(1,'no_of_stars',3)
hiroshima_three_star.insert(3,'city','Hiroshima')

print(hiroshima_one_star.shape)
print(hiroshima_two_star.shape)
print(hiroshima_three_star.shape)

(29, 5)
(4, 5)
(1, 5)


In [441]:
#Ehime - 2018 data
#One star
ehime_one_restaurants = []
r = requests.get('https://gmhiroshimaehime.gnavi.co.jp/restaurant/list/ehime/all_area/all_small_area/all_food/1star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    ehime_one_restaurants.append(restaurant)

ehime_one_star = pd.DataFrame.from_dict(ehime_one_restaurants)
ehime_one_star.insert(0,'Country', 'Japan')
ehime_one_star.insert(1,'no_of_stars',1)
ehime_one_star.insert(3,'city','Ehime')

#Two star
ehime_two_restaurants = []
r = requests.get('https://gmhiroshimaehime.gnavi.co.jp/restaurant/list/ehime/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    ehime_two_restaurants.append(restaurant)

ehime_two_star = pd.DataFrame.from_dict(ehime_two_restaurants)
ehime_two_star.insert(0,'Country', 'Japan')
ehime_two_star.insert(1,'no_of_stars',2)
ehime_two_star.insert(3,'city','Ehime')

print(ehime_one_star.shape)
print(ehime_two_star.shape)

(9, 5)
(2, 5)


In [443]:
#Fukuoka
#One star
fukuoka_one_restaurants = []
    
pages = ['https://gmfsn.gnavi.co.jp/restaurant/list/fukuoka/all_area/all_small_area/all_food/1star/',
         'https://gmfsn.gnavi.co.jp/restaurant/list/fukuoka/all_area/all_small_area/all_food/1star/p2/',
         'https://gmfsn.gnavi.co.jp/restaurant/list/fukuoka/all_area/all_small_area/all_food/1star/p3/',
         'https://gmfsn.gnavi.co.jp/restaurant/list/fukuoka/all_area/all_small_area/all_food/1star/p4',
         'https://gmfsn.gnavi.co.jp/restaurant/list/fukuoka/all_area/all_small_area/all_food/1star/p5'
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            fukuoka_one_restaurants.append(restaurant)
        
fukuoka_one_star = pd.DataFrame.from_dict(fukuoka_one_restaurants)
fukuoka_one_star.insert(0,'Country', 'Japan')
fukuoka_one_star.insert(1,'no_of_stars',1)
fukuoka_one_star.insert(3,'city','Fukuoka')

#Two star
fukuoka_two_restaurants = []
r = requests.get('https://gmfsn.gnavi.co.jp/restaurant/list/fukuoka/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    fukuoka_two_restaurants.append(restaurant)

fukuoka_two_star = pd.DataFrame.from_dict(fukuoka_two_restaurants)
fukuoka_two_star.insert(0,'Country', 'Japan')
fukuoka_two_star.insert(1,'no_of_stars',2)
fukuoka_two_star.insert(3,'city','Fukuoka')

#Three star
fukuoka_three_restaurants = []
r = requests.get('https://gmfsn.gnavi.co.jp/restaurant/list/fukuoka/all_area/all_small_area/all_food/3star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    fukuoka_three_restaurants.append(restaurant)

fukuoka_three_star = pd.DataFrame.from_dict(fukuoka_three_restaurants)
fukuoka_three_star.insert(0,'Country', 'Japan')
fukuoka_three_star.insert(1,'no_of_stars',3)
fukuoka_three_star.insert(3,'city','Fukuoka')

print(fukuoka_one_star.shape)
print(fukuoka_two_star.shape)
print(fukuoka_three_star.shape)

(46, 5)
(10, 5)
(2, 5)


In [444]:
#Saga
#One star
saga_one_restaurants = []
r = requests.get('https://gmfsn.gnavi.co.jp/restaurant/list/saga/all_area/all_small_area/all_food/1star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    saga_one_restaurants.append(restaurant)

saga_one_star = pd.DataFrame.from_dict(saga_one_restaurants)
saga_one_star.insert(0,'Country', 'Japan')
saga_one_star.insert(1,'no_of_stars',1)
saga_one_star.insert(3,'city','Saga')

#Two star
saga_two_restaurants = []
r = requests.get('https://gmfsn.gnavi.co.jp/restaurant/list/saga/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    saga_two_restaurants.append(restaurant)

saga_two_star = pd.DataFrame.from_dict(saga_two_restaurants)
saga_two_star.insert(0,'Country', 'Japan')
saga_two_star.insert(1,'no_of_stars',2)
saga_two_star.insert(3,'city','Saga')

print(saga_one_star.shape)
print(saga_two_star.shape)

(10, 5)
(1, 5)


In [445]:
#Nagasaki
#One star
nagasaki_one_restaurants = []
r = requests.get('https://gmfsn.gnavi.co.jp/restaurant/list/nagasaki/all_area/all_small_area/all_food/1star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    nagasaki_one_restaurants.append(restaurant)

nagasaki_one_star = pd.DataFrame.from_dict(nagasaki_one_restaurants)
nagasaki_one_star.insert(0,'Country', 'Japan')
nagasaki_one_star.insert(1,'no_of_stars',1)
nagasaki_one_star.insert(3,'city','Nagasaki')

#Two star
nagasaki_two_restaurants = []
r = requests.get('https://gmfsn.gnavi.co.jp/restaurant/list/nagasaki/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    nagasaki_two_restaurants.append(restaurant)

nagasaki_two_star = pd.DataFrame.from_dict(nagasaki_two_restaurants)
nagasaki_two_star.insert(0,'Country', 'Japan')
nagasaki_two_star.insert(1,'no_of_stars',2)
nagasaki_two_star.insert(3,'city','Nagasaki')

print(nagasaki_one_star.shape)
print(nagasaki_two_star.shape)

(10, 5)
(2, 5)


In [446]:
#Kumamoto
#One star
kumamoto_one_restaurants = []
    
pages = ['https://gmkumamotooita.gnavi.co.jp/restaurant/list/kumamoto/all_area/all_small_area/all_food/1star/',
         'https://gmkumamotooita.gnavi.co.jp/restaurant/list/kumamoto/all_area/all_small_area/all_food/1star/p2'
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            kumamoto_one_restaurants.append(restaurant)
        
kumamoto_one_star = pd.DataFrame.from_dict(kumamoto_one_restaurants)
kumamoto_one_star.insert(0,'Country', 'Japan')
kumamoto_one_star.insert(1,'no_of_stars',1)
kumamoto_one_star.insert(3,'city','Kumamoto')

#Two star
kumamoto_two_restaurants = []
r = requests.get('https://gmkumamotooita.gnavi.co.jp/restaurant/list/kumamoto/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    kumamoto_two_restaurants.append(restaurant)

kumamoto_two_star = pd.DataFrame.from_dict(kumamoto_two_restaurants)
kumamoto_two_star.insert(0,'Country', 'Japan')
kumamoto_two_star.insert(1,'no_of_stars',2)
kumamoto_two_star.insert(3,'city','Kumamoto')

print(kumamoto_one_star.shape)
print(kumamoto_two_star.shape)

(11, 5)
(3, 5)


In [448]:
#Oita
#One star
oita_one_restaurants = []
r = requests.get('https://gmkumamotooita.gnavi.co.jp/restaurant/list/oita/all_area/all_small_area/all_food/1star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    oita_one_restaurants.append(restaurant)

oita_one_star = pd.DataFrame.from_dict(oita_one_restaurants)
oita_one_star.insert(0,'Country', 'Japan')
oita_one_star.insert(1,'no_of_stars',1)
oita_one_star.insert(3,'city','Oita')

#Two star
oita_two_restaurants = []
r = requests.get('https://gmkumamotooita.gnavi.co.jp/restaurant/list/oita/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    oita_two_restaurants.append(restaurant)

oita_two_star = pd.DataFrame.from_dict(oita_two_restaurants)
oita_two_star.insert(0,'Country', 'Japan')
oita_two_star.insert(1,'no_of_stars',2)
oita_two_star.insert(3,'city','Oita')

print(oita_one_star.shape)
print(oita_two_star.shape)

(6, 5)
(2, 5)


In [449]:
#Miyagi
#One star
miyagi_one_restaurants = []
    
pages = ['https://gmmiyagi.gnavi.co.jp/restaurant/list/miyagi/all_area/all_small_area/all_food/1star/',
         'https://gmmiyagi.gnavi.co.jp/restaurant/list/miyagi/all_area/all_small_area/all_food/1star/p2'
        ]

for page in pages:
    
    r = requests.get(page)
    soup = BeautifulSoup(r.content, 'html.parser')

    for restaurant_content in soup.select('.part.clfix'):
  
            restaurant = {              
               'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
               'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
            }
            miyagi_one_restaurants.append(restaurant)
        
miyagi_one_star = pd.DataFrame.from_dict(miyagi_one_restaurants)
miyagi_one_star.insert(0,'Country', 'Japan')
miyagi_one_star.insert(1,'no_of_stars',1)
miyagi_one_star.insert(3,'city','Miyagi')

#Two star
miyagi_two_restaurants = []
r = requests.get('https://gmmiyagi.gnavi.co.jp/restaurant/list/miyagi/all_area/all_small_area/all_food/2star/')
soup = BeautifulSoup(r.content, 'html.parser')

for restaurant_content in soup.select('.part.clfix'):
  
    restaurant = {              
        'name': restaurant_content.select_one('.rname.px14 a').text.replace('\n','').strip(),
        'cuisine': restaurant_content.select_one('.rinfo.px12 li.cuisine').text.replace('\n','').strip()
    }
    miyagi_two_restaurants.append(restaurant)

miyagi_two_star = pd.DataFrame.from_dict(miyagi_two_restaurants)
miyagi_two_star.insert(0,'Country', 'Japan')
miyagi_two_star.insert(1,'no_of_stars',2)
miyagi_two_star.insert(3,'city','Miyagi')

print(miyagi_one_star.shape)
print(miyagi_two_star.shape)

(11, 5)
(1, 5)


In [451]:
japan_list = [tokyo_one_star, tokyo_two_star, tokyo_three_star, osaka_one_star, osaka_two_star, osaka_three_star,\
             kyoto_one_star, kyoto_two_star, kyoto_three_star, toyama_one_star, toyama_three_star, aichi_one_star,\
             aichi_two_star, aichi_three_star, gifu_one_star, gifu_two_star, mie_one_star, mie_two_star,\
             mie_three_star, tottori_one_star, tottori_two_star, hiroshima_one_star, hiroshima_two_star,\
             hiroshima_three_star, ehime_one_star, ehime_two_star, fukuoka_one_star, fukuoka_two_star,\
             fukuoka_three_star, saga_one_star, saga_two_star, nagasaki_one_star, nagasaki_two_star,\
             kumamoto_one_star, kumamoto_two_star, oita_one_star, oita_two_star, miyagi_one_star, miyagi_two_star]
japan_data = pd.concat(japan_list)

print(japan_data.head())
print(japan_data.shape)

japan_data.to_csv('japan_michelin_stars_2020.csv')


  Country  no_of_stars             name   city   cuisine
0   Japan            1     Adachi Naoto  Tokyo  Japanese
1   Japan            1  Les Alchimistes  Tokyo    French
2   Japan            1     Aroma Fresca  Tokyo   Italian
3   Japan            1        Ishibashi  Tokyo     Unagi
4   Japan            1        Ishibashi  Tokyo  Sukiyaki
(669, 5)


<h2>World's 50 best restaurants</h2>

Now we will hscrape data from the world's best restaurants list.


In [2]:
#Webscraping to gather data.


webpage = requests.get('https://www.theworlds50best.com/list/1-50', 'html.parser')
soup = BeautifulSoup(webpage.content)


top_50 = soup.find_all(attrs={'class':'row list visible-list'})

name = soup.find_all('h2')

for restaurant in top_50:
    if restaurant: #This is to check that all elements have h2 tags, otherwise an error occurs
        restaurants = restaurant.get_text(('|'))

print(restaurants)   

1|Mirazur|Menton, France|2|Noma|Copenhagen, Denmark|3|Asador Etxebarri|Axpe, Spain|4|Gaggan|Bangkok, Thailand|5|Geranium|Copenhagen, Denmark|6|Central|Lima, Peru|7|Mugaritz|San Sebastián, Spain|8|Arpège|Paris, France|9|Disfrutar|Barcelona, Spain|10|Maido|Lima, Peru|11|Den|Tokyo, Japan|12|Pujol|Mexico City, Mexico|13|White Rabbit|Moscow, Russia|14|Azurmendi|Larrabetzu, Spain|15|Septime|Paris, France|16|Alain Ducasse au Plaza Athénée|Paris, France|17|Steirereck|Vienna, Austria|18|Odette|Singapore|19|Twins Garden|Moscow, Russia|20|Tickets|Barcelona, Spain|21|Frantzén|Stockholm, Sweden|22|Narisawa|Tokyo, Japan|23|Cosme|New York, USA|24|Quintonil|Mexico City, Mexico|25|Alléno Paris au Pavillon Ledoyen|Paris, France|26|Boragó|Santiago, Chile|27|The Clove Club|London, UK|28|Blue Hill at Stone Barns|Pocantico Hills, USA|29|Piazza Duomo|Alba, Italy|30|Elkano|Getaria, Spain|31|Le Calandre|Rubano, Italy|32|Nerua Guggenheim Bilbao|Bilbao, Spain|33|Lyle's|London, UK|34|Don Julio|Buenos Aires, Argen

In [3]:
#Split on the '|'' character, then split into smaller chunks based on range which returns a zip type of tuples. 
#Then convert each tuple into a list.

data = restaurants
data = [list(item) for item in zip(*[data.split('|')[i::3] for i in range(3)])]
pprint.pprint (data)

[['1', 'Mirazur', 'Menton, France'],
 ['2', 'Noma', 'Copenhagen, Denmark'],
 ['3', 'Asador Etxebarri', 'Axpe, Spain'],
 ['4', 'Gaggan', 'Bangkok, Thailand'],
 ['5', 'Geranium', 'Copenhagen, Denmark'],
 ['6', 'Central', 'Lima, Peru'],
 ['7', 'Mugaritz', 'San Sebastián, Spain'],
 ['8', 'Arpège', 'Paris, France'],
 ['9', 'Disfrutar', 'Barcelona, Spain'],
 ['10', 'Maido', 'Lima, Peru'],
 ['11', 'Den', 'Tokyo, Japan'],
 ['12', 'Pujol', 'Mexico City, Mexico'],
 ['13', 'White Rabbit', 'Moscow, Russia'],
 ['14', 'Azurmendi', 'Larrabetzu, Spain'],
 ['15', 'Septime', 'Paris, France'],
 ['16', 'Alain Ducasse au Plaza Athénée', 'Paris, France'],
 ['17', 'Steirereck', 'Vienna, Austria'],
 ['18', 'Odette', 'Singapore'],
 ['19', 'Twins Garden', 'Moscow, Russia'],
 ['20', 'Tickets', 'Barcelona, Spain'],
 ['21', 'Frantzén', 'Stockholm, Sweden'],
 ['22', 'Narisawa', 'Tokyo, Japan'],
 ['23', 'Cosme', 'New York, USA'],
 ['24', 'Quintonil', 'Mexico City, Mexico'],
 ['25', 'Alléno Paris au Pavillon Ledoyen'

In [4]:
#Create a datframe from the above data
worldbest=pd.DataFrame(data, columns = ['Rating', 'Restaurant_Name', 'Location'])
worldbest.head()

Unnamed: 0,Rating,Restaurant_Name,Location
0,1,Mirazur,"Menton, France"
1,2,Noma,"Copenhagen, Denmark"
2,3,Asador Etxebarri,"Axpe, Spain"
3,4,Gaggan,"Bangkok, Thailand"
4,5,Geranium,"Copenhagen, Denmark"


It will be easier to analyse the data if we split out the location column into city and country first.

In [5]:
newcolumns = worldbest['Location'].str.split(',', n = 1, expand = True) 
worldbest['City']= newcolumns[0] 
worldbest['Country']= newcolumns[1] 
worldbest.drop([2])
print(worldbest.head())

#Save as csv file
worldbest.to_csv('worlds_best_restaurants_2019.csv')

  Rating   Restaurant_Name             Location        City    Country
0      1           Mirazur       Menton, France      Menton     France
1      2              Noma  Copenhagen, Denmark  Copenhagen    Denmark
2      3  Asador Etxebarri          Axpe, Spain        Axpe      Spain
3      4            Gaggan    Bangkok, Thailand     Bangkok   Thailand
4      5          Geranium  Copenhagen, Denmark  Copenhagen    Denmark


<h3>2018 results</h3>
Webscraping 2018 results of top 50 restaurants

In [42]:
webpage = requests.get('https://www.theworlds50best.com/previous-list/2018', 'html.parser')
soup = BeautifulSoup(webpage.content)

top_50_18 = soup.find_all(attrs={'class':'row-background the-list no-image cd-h-timeline__event-description color-contrast-medium'})

for restaurant in top_50_18:
    if restaurant: #This is to check that all elements have h2 tags, otherwise an error occurs
        restaurants_18 = restaurant.get_text(('|'))

#Remove the title from the string
restaurants_18_new = restaurants_18.replace("The World's 50 Best Restaurants 2018|", "")

print(restaurants_18_new) 


1|Osteria Francescana|Modena, Italy|2|El Celler de Can Roca|Girona, Spain|3|Mirazur|Menton, France|4|Eleven Madison Park|New York, USA|5|Gaggan|Bangkok, Thailand|6|Central|Lima, Peru|7|Maido|Lima, Peru|8|Arpège|Paris, France|9|Mugaritz|San Sebastian, Spain|10|Asador Etxebarri|Axpe, Spain|11|Quintonil|Mexico City, Mexico|12|Blue Hill at Stone Barns|Pocantico Hills, USA|13|Pujol|Mexico City, Mexico|14|Steirereck|Vienna, Austria|15|White Rabbit|Moscow, Russia|16|Piazza Duomo|Alba, Italy|17|Den|Tokyo, Japan|18|Disfrutar|Barcelona, Spain|19|Geranium|Copenhagen, Denmark|20|Attica|Melbourne, Australia|21|Alain Ducasse au Plaza Athénée|Paris, France|22|Narisawa|Tokyo, Japan|23|Le Calandre|Rubano, Italy|24|Ultraviolet by Paul Pairet|Shanghai, China|25|Cosme|New York, USA|26|Le Bernardin|New York, USA|27|Boragó|Santiago, Chile|28|Odette|Singapore|29|Alléno Paris au Pavillon Ledoyen|Paris, France|30|D.O.M|São Paulo, Brazil|31|Arzak|San Sebastian, Spain|32|Tickets|Barcelona, Spain|33|The Clove Clu

In [43]:
data = restaurants_18_new
data = [list(item) for item in zip(*[data.split('|')[i::3] for i in range(3)])]
pprint.pprint (data)

[['1', 'Osteria Francescana', 'Modena, Italy'],
 ['2', 'El Celler de Can Roca', 'Girona, Spain'],
 ['3', 'Mirazur', 'Menton, France'],
 ['4', 'Eleven Madison Park', 'New York, USA'],
 ['5', 'Gaggan', 'Bangkok, Thailand'],
 ['6', 'Central', 'Lima, Peru'],
 ['7', 'Maido', 'Lima, Peru'],
 ['8', 'Arpège', 'Paris, France'],
 ['9', 'Mugaritz', 'San Sebastian, Spain'],
 ['10', 'Asador Etxebarri', 'Axpe, Spain'],
 ['11', 'Quintonil', 'Mexico City, Mexico'],
 ['12', 'Blue Hill at Stone Barns', 'Pocantico Hills, USA'],
 ['13', 'Pujol', 'Mexico City, Mexico'],
 ['14', 'Steirereck', 'Vienna, Austria'],
 ['15', 'White Rabbit', 'Moscow, Russia'],
 ['16', 'Piazza Duomo', 'Alba, Italy'],
 ['17', 'Den', 'Tokyo, Japan'],
 ['18', 'Disfrutar', 'Barcelona, Spain'],
 ['19', 'Geranium', 'Copenhagen, Denmark'],
 ['20', 'Attica', 'Melbourne, Australia'],
 ['21', 'Alain Ducasse au Plaza Athénée', 'Paris, France'],
 ['22', 'Narisawa', 'Tokyo, Japan'],
 ['23', 'Le Calandre', 'Rubano, Italy'],
 ['24', 'Ultraviolet

In [44]:
worldbest18=pd.DataFrame(data, columns = ['Rating', 'Restaurant_Name','Location'])

worldbest18.head()

Unnamed: 0,Rating,Restaurant_Name,Location
0,1,Osteria Francescana,"Modena, Italy"
1,2,El Celler de Can Roca,"Girona, Spain"
2,3,Mirazur,"Menton, France"
3,4,Eleven Madison Park,"New York, USA"
4,5,Gaggan,"Bangkok, Thailand"


In [45]:
newcolumns18 = worldbest18['Location'].str.split(',', n = 1, expand = True) 
worldbest18['City']= newcolumns18[0] 
worldbest18['Country']= newcolumns18[1] 
worldbest18.drop([0])
print(worldbest18.head())

#Save as csv file
worldbest18.to_csv('worlds_best_restaurants_2018.csv')

  Rating        Restaurant_Name           Location      City    Country
0      1    Osteria Francescana      Modena, Italy    Modena      Italy
1      2  El Celler de Can Roca      Girona, Spain    Girona      Spain
2      3                Mirazur     Menton, France    Menton     France
3      4    Eleven Madison Park      New York, USA  New York        USA
4      5                 Gaggan  Bangkok, Thailand   Bangkok   Thailand


<h3>2017 results</h3>

In [70]:
webpage = requests.get('https://www.theworlds50best.com/previous-list/2016').content
soup = BeautifulSoup(webpage, 'html.parser')

top_50_16 = soup.find_all(attrs={'class':'row'})

for restaurant2 in top_50_16:
    if restaurant2: #This is to check that all elements have h2 tags, otherwise an error occurs
        restaurants_16 = restaurant2.get_text(('|'))

#Remove the title from the string
#restaurants_16_new = restaurants_16.replace("The World's 50 Best Restaurants 2016|", "")

restaurants_16


"1|Osteria Francescana|Modena, Italy|2|El Celler de Can Roca|Girona, Spain|3|Mirazur|Menton, France|4|Eleven Madison Park|New York, USA|5|Gaggan|Bangkok, Thailand|6|Central|Lima, Peru|7|Maido|Lima, Peru|8|Arpège|Paris, France|9|Mugaritz|San Sebastian, Spain|10|Asador Etxebarri|Axpe, Spain|11|Quintonil|Mexico City, Mexico|12|Blue Hill at Stone Barns|Pocantico Hills, USA|13|Pujol|Mexico City, Mexico|14|Steirereck|Vienna, Austria|15|White Rabbit|Moscow, Russia|16|Piazza Duomo|Alba, Italy|17|Den|Tokyo, Japan|18|Disfrutar|Barcelona, Spain|19|Geranium|Copenhagen, Denmark|20|Attica|Melbourne, Australia|21|Alain Ducasse au Plaza Athénée|Paris, France|22|Narisawa|Tokyo, Japan|23|Le Calandre|Rubano, Italy|24|Ultraviolet by Paul Pairet|Shanghai, China|25|Cosme|New York, USA|26|Le Bernardin|New York, USA|27|Boragó|Santiago, Chile|28|Odette|Singapore|29|Alléno Paris au Pavillon Ledoyen|Paris, France|30|D.O.M|São Paulo, Brazil|31|Arzak|San Sebastian, Spain|32|Tickets|Barcelona, Spain|33|The Clove Cl