## Scraping "Guide Michelin" with Selenium

In [None]:
%pip install selenium

In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import csv
import re

In [12]:
class SeleniumScraper:
    def __init__(self, query, max_pages):
        self.base_url = "https://guide.michelin.com/"
        self.query = query
        self.max_pages = max_pages
        
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        self.driver = webdriver.Chrome(options=chrome_options)
        self.restaurants = []
        self.rate_limiter_delay = 1.5

    def scrape(self):
        page = 1
        while page <= self.max_pages:
            print(f"Scraping page: {page}")
            url = f"{self.base_url}{self.query}{page}"
            self.driver.get(url)
            
            try:
                # Wait for cards to load dynamically
                WebDriverWait(self.driver, 10).until(
                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".card__menu"))
                )
                
                # Extract restaurant data
                self.extract_restaurants()
                page += 1

                # Rate limiter
                time.sleep(self.rate_limiter_delay)
            except Exception as e:
                print(f"No more pages or error: {e}")
                break

        self.driver.quit()
        
    def _extract_number_stars(self, element):
        if element == None:
            return 0 # Just no stars
        attribute = element.get_attribute("src")
        img_name = attribute.split("/")[-1].split('.')[0]
        match = re.findall(r'\d', img_name)
        if len(match) > 0:
            return match[0] # Nb of stars in the pictures' file name
            
        return 0 # Bib Gourmand
           
    def extract_restaurants(self):
        cards = self.driver.find_elements(By.CSS_SELECTOR, ".card__menu")

        for card in cards:
            try:
                try:
                    # Attempt to find the element
                    rating_container = card.find_element(By.CSS_SELECTOR, ".flex-fill div div div div span img")
                except Exception:
                    # If not found, assign a default value
                    rating_container = None

                rating = self._extract_number_stars(rating_container)
                id_ = card.get_attribute("data-id")
                img = card.find_element(By.CSS_SELECTOR, ".card__menu-image a").get_attribute("ci-bg-url")
                name = card.find_element(By.CSS_SELECTOR, ".card__menu-content--title a").text.strip()
                link = card.find_element(By.CSS_SELECTOR, ".card__menu-content--title a").get_attribute("href")
                lat = float(card.get_attribute("data-lat"))
                lon = float(card.get_attribute("data-lng"))
                
                data_container = card.find_element(By.CSS_SELECTOR, ".card__menu-image .card__menu-image--top div div .js-favorite-restaurant")
                location = data_container.get_attribute("data-dtm-city")
                country = data_container.get_attribute("data-restaurant-selection")
                type_ = data_container.get_attribute("data-cooking-type")
                price_category = len(data_container.get_attribute("data-dtm-price"))


                self.restaurants.append({
                    "id": id_,
                    "name": name,
                    "location": location,
                    "country": country,
                    "latitude": lat,
                    "longitude": lon,
                    "rating": rating,
                    "priceCategory": price_category,
                    "type": type_,
                    #"year": year,
                    "img": img,
                    "link": link,
                })
            except Exception as e:
                print(f"Error extracting card: {e}")

    def save_to_csv(self, filename="restaurants.csv"):
        headers = ["id", "name", "location", "country", "latitude", "longitude", "rating", "priceCategory", "type", "img", "link"]

        try:
            with open(filename, mode="w", encoding="utf-8", newline="") as file:
                writer = csv.DictWriter(file, fieldnames=headers)
                writer.writeheader()
                for restaurant in self.restaurants:
                    writer.writerow(restaurant)
            print(f"Data saved to {filename}")
        except Exception as e:
            print(f"Error saving to CSV: {e}")

In [None]:
scraper = SeleniumScraper(query="us/en/selection/france/restaurants/page/", max_pages=500)
scraper.scrape()
scraper.save_to_csv("../data/raw/michelin_restaurants.csv")

Scraping page: 1
Scraping page: 2
Scraping page: 3
Scraping page: 4
Scraping page: 5
Scraping page: 6
Scraping page: 7
Scraping page: 8
Scraping page: 9
Error extracting card: Message: stale element reference: stale element not found in the current frame
  (Session info: chrome=131.0.6778.108); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception
Stacktrace:
	GetHandleVerifier [0x00007FF7F1CB6CF5+28821]
	(No symbol) [0x00007FF7F1C23880]
	(No symbol) [0x00007FF7F1AC578A]
	(No symbol) [0x00007FF7F1ACC2ED]
	(No symbol) [0x00007FF7F1ACF308]
	(No symbol) [0x00007FF7F1B606B0]
	(No symbol) [0x00007FF7F1B3F2FA]
	(No symbol) [0x00007FF7F1B5F412]
	(No symbol) [0x00007FF7F1B3F0A3]
	(No symbol) [0x00007FF7F1B0A778]
	(No symbol) [0x00007FF7F1B0B8E1]
	GetHandleVerifier [0x00007FF7F1FEFCED+3408013]
	GetHandleVerifier [0x00007FF7F200745F+3504127]
	GetHandleVerifier [0x00007FF7F1FFB63D+3455453]
	GetHandl

### Avoid `stale element reference` error

In [10]:
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException

class SeleniumScraper:
    def __init__(self, query, max_pages):
        self.base_url = "https://guide.michelin.com/"
        self.query = query
        self.max_pages = max_pages
        
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        self.driver = webdriver.Chrome(options=chrome_options)
        self.restaurants = []
        self.rate_limiter_delay = 1.5

    def _safely_get_element(self, container, by, value, timeout=10, max_attempts=3):
        """
        Safely retrieve a web element, handling stale element references.
        
        :param container: Parent WebElement or WebDriver to search within
        :param by: Selenium By locator (e.g., By.CSS_SELECTOR, By.XPATH)
        :param value: Locator value
        :param timeout: Maximum wait time for element
        :param max_attempts: Maximum number of retry attempts
        :return: Web element or None if not found
        """
        for attempt in range(max_attempts):
            try:
                # Find the element
                element = container.find_element(by, value)
                
                # Additional check to ensure element is not stale
                element.tag_name
                return element
            
            except (StaleElementReferenceException, Exception):
                # If it's not the last attempt, continue to retry
                if attempt < max_attempts - 1:
                    print(f"Attempt {attempt + 1} failed. Retrying element location...")
                    time.sleep(timeout)
                    continue
                else:
                    print(f"Failed to locate element after {max_attempts} attempts")
                    return None

    def scrape(self):
        page = 1
        while page <= self.max_pages:
            print(f"Scraping page: {page}")
            url = f"{self.base_url}{self.query}{page}"
            self.driver.get(url)
            
            try:
                # Wait for cards to load dynamically
                WebDriverWait(self.driver, 40).until(
                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".card__menu"))
                )
                
                # Extract restaurant data
                self._extract_restaurants()
                page += 1

                # Rate limiter
                time.sleep(self.rate_limiter_delay)
            except Exception as e:
                print(f"No more pages or error: {e}")
                break

        self.driver.quit()
        
    def _extract_number_stars(self, element):
        try:
            images = element.find_elements(By.TAG_NAME, 'img')
            if len(images) - 1 == 0:
                return -1 # Just no stars
            else:
                nb_stars = 0 # Bib Gourmand
                for img in images:
                    attribute = img.get_attribute("src")
                    if "1star.svg" in attribute:
                        nb_stars += 1 # Nb of stars in the container
                return nb_stars
        except Exception:
            pass
           
    def _extract_image_url(self, card):
        """
        Robust method to extract image URL from the Michelin Guide card element
        
        Args:
            card (WebElement): The card element to extract image from
        
        Returns:
            str: Image URL or empty string if not found
        """
        try:
            # First, try the ci-bg-url attribute on the image wrapper
            image_wrapper = self._safely_get_element(
                card, 
                By.CSS_SELECTOR, 
                ".card__menu-image .image-wrapper[ci-bg-url]"
            )
            
            if image_wrapper:
                url = image_wrapper.get_attribute("ci-bg-url")
                if url and url.strip():
                    return url
            
            # Fallback: try finding the image element with data-gallery-image
            image_element = self._safely_get_element(
                card, 
                By.CSS_SELECTOR, 
                ".card__menu-image .icon-box img[data-gallery-image]"
            )
            
            if image_element:
                gallery_images = image_element.get_attribute("data-gallery-image")
                if gallery_images:
                    # Split gallery images and return the first one
                    image_urls = gallery_images.split(',')
                    return image_urls[0] if image_urls else ""
            
            # Fallback: try to get background image via JavaScript
            url = self.driver.execute_script("""
                var card = arguments[0];
                var imageWrapper = card.querySelector('.card__menu-image .image-wrapper[ci-bg-url]');
                if (imageWrapper) {
                    return imageWrapper.getAttribute('ci-bg-url');
                }
                return '';
            """, card)
            
            if url and url.strip():
                return url
        
        except Exception as e:
            print(f"Error extracting image URL: {e}")
        
        print("No image URL found for this card")
        return ""
    
    def _extract_restaurants(self):
        try:
            # Use WebDriverWait to ensure cards are loaded and stable
            cards = WebDriverWait(self.driver, 40).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".card__menu"))
            )

            for card in cards:
                try:
                    # Use safe element retrieval for each extraction
                    try:
                        rating_container = self._safely_get_element(
                            card, 
                            By.CSS_SELECTOR, 
                            ".flex-fill div div div div span"
                        )
                    except Exception:
                        rating_container = None

                    rating = self._extract_number_stars(rating_container)
                    
                    # Safely get each element with retry mechanism
                    
                    img_link = self._extract_image_url(card)
                    if not img_link:
                        print(f"Could not extract image for restaurant: {name}")
                    
                    name_element = self._safely_get_element(
                        card, 
                        By.CSS_SELECTOR, 
                        ".card__menu-content--title a"
                    )
                    
                    name = name_element.text.strip()
                    link = name_element.get_attribute("href")
                    
                    id_ = card.get_attribute("data-id")
                    lat = float(card.get_attribute("data-lat"))
                    lon = float(card.get_attribute("data-lng"))
                    
                    data_container = self._safely_get_element(
                        card, 
                        By.CSS_SELECTOR, 
                        ".card__menu-image .card__menu-image--top div div .js-favorite-restaurant"
                    )
                    
                    if name == None:
                        name = data_container.get_attribute("data-restaurant-name")
                    location = data_container.get_attribute("data-dtm-city")
                    country = data_container.get_attribute("data-restaurant-selection")
                    type_ = data_container.get_attribute("data-cooking-type")
                    price_category = len(data_container.get_attribute("data-dtm-price"))

                    self.restaurants.append({
                        "id": id_,
                        "name": name,
                        "location": location,
                        "country": country,
                        "latitude": lat,
                        "longitude": lon,
                        "rating": rating,
                        "priceCategory": price_category,
                        "type": type_,
                        "img": img_link,
                        "link": link,
                    })
                except Exception as e:
                    print(f"Error extracting individual card: {e}")
        except Exception as e:
            print(f"Error in extracting restaurant cards: {e}")

    def save_to_csv(self, filename="restaurants.csv"):
        headers = ["id", "name", "location", "country", "latitude", "longitude", "rating", "priceCategory", "type", "img", "link"]

        try:
            with open(filename, mode="w", encoding="utf-8", newline="") as file:
                writer = csv.DictWriter(file, fieldnames=headers)
                writer.writeheader()
                for restaurant in self.restaurants:
                    writer.writerow(restaurant)
            print(f"Data saved to {filename}")
        except Exception as e:
            print(f"Error saving to CSV: {e}")

### Michelin's Green Star

![green_star_michelin](https://d3h1lg3ksw6i6b.cloudfront.net/media/image/2022/10/11/31d3d763e68745dca54c19db6978db5f_Green-Star-hero-image.jpg)

> The MICHELIN Green Star is an annual award which highlights restaurants at the forefront of the industry when it comes to their sustainable practices. They hold themselves accountable for both their ethical and environmental standards, and work with sustainable producers and suppliers to avoid waste and reduce or even remove plastic and other non-recyclable materials from their supply chain.
> 
> These restaurants offer dining experiences that combine culinary excellence with outstanding eco-friendly commitments and are a source of inspiration both for keen foodies and the hospitality industry as a whole.

[What is a MICHELIN Green Star?](https://guide.michelin.com/kr/en/article/features/what-is-a-michelin-green-star-kr)

In [11]:
%%time
scraper = SeleniumScraper(query="us/en/restaurants/sustainable_gastronomy/page/", max_pages=500)
scraper.scrape()
scraper.save_to_csv("../data/raw/green_star_michelin_restaurants.csv")

Scraping page: 1
Scraping page: 2
Scraping page: 3
Scraping page: 4
Scraping page: 5
Scraping page: 6
Scraping page: 7
Scraping page: 8
Scraping page: 9
Attempt 1 failed. Retrying element location...
Attempt 2 failed. Retrying element location...
Failed to locate element after 3 attempts
Attempt 1 failed. Retrying element location...
Attempt 2 failed. Retrying element location...
Failed to locate element after 3 attempts
No image URL found for this card
Could not extract image for restaurant: Caffè La Crepa
Scraping page: 10
Scraping page: 11
Scraping page: 12
Scraping page: 13
Scraping page: 14
Scraping page: 15
Scraping page: 16
Scraping page: 17
Scraping page: 18
Scraping page: 19
Scraping page: 20
Scraping page: 21
Scraping page: 22
Scraping page: 23
Scraping page: 24
Scraping page: 25
Scraping page: 26
Scraping page: 27
Scraping page: 28
Scraping page: 29
Scraping page: 30
Scraping page: 31
No more pages or error: Message: 

Data saved to ../data/raw/green_star_michelin_restaurant

KeyboardInterrupt: 

## API Fetching to get Reviews

### [Google Places API](https://developers.google.com/maps/documentation/places/web-service/op-overview?hl=fr)

The HTTP request to send to the API is the following
```http
POST https://places.googleapis.com/v1/places:searchText?fields=places.id%2Cplaces.formattedAddress%2Cplaces.websiteUri%2Cplaces.rating%2Cplaces.reviews%2Cplaces.regularOpeningHours.periods%2Cplaces.googleMapsLinks.directionsUri HTTP/1.1

Authorization: Bearer [YOUR_ACCESS_TOKEN]
Accept: application/json
Content-Type: application/json

{
  "textQuery": "Choko Ona,Espelette",
  "includedType": "restaurant"
}
```

In [13]:
import pandas as pd

In [14]:
restaurants = pd.read_csv("../data/raw/green_star_michelin_restaurants.csv")
restaurants.head()

Unnamed: 0,id,name,location,country,latitude,longitude,rating,priceCategory,type,img,link
0,110681,One White Street,New York,USA,40.719355,-74.006101,1,4,Contemporary,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...
1,110644,Dirt Candy,New York,USA,40.71794,-73.99087,1,4,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...
2,110607,Family Meal at Blue Hill,New York,USA,40.731971,-73.999639,1,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...
3,110478,Blue Hill at Stone Barns,Tarrytown,USA,41.100715,-73.829297,2,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...
4,110338,Oyster Oyster,Washington,USA,38.909302,-77.023015,1,3,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/district-of-c...


In [15]:
restaurants.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 581 entries, 0 to 580
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             581 non-null    int64  
 1   name           581 non-null    object 
 2   location       581 non-null    object 
 3   country        581 non-null    object 
 4   latitude       581 non-null    float64
 5   longitude      581 non-null    float64
 6   rating         581 non-null    int64  
 7   priceCategory  581 non-null    int64  
 8   type           581 non-null    object 
 9   img            580 non-null    object 
 10  link           581 non-null    object 
dtypes: float64(2), int64(3), object(6)
memory usage: 50.1+ KB


I keep having an issue with that one image, I don't get why... So let's manually declare it

In [16]:
restaurants[restaurants['img'].isnull()]

Unnamed: 0,id,name,location,country,latitude,longitude,rating,priceCategory,type,img,link
176,105897,Ahimè,Bologna,Italia,44.496621,11.337969,0,2,Country cooking,,https://guide.michelin.com/us/en/emilia-romagn...


In [18]:
restaurants.at[176, 'img'] = "https://d3h1lg3ksw6i6b.cloudfront.net/guide/placeholder/pic_poilist_default_3.jpg"
restaurants.iloc[176]

id                                                          105897
name                                                         Ahimè
location                                                   Bologna
country                                                     Italia
latitude                                                 44.496621
longitude                                                11.337969
rating                                                           0
priceCategory                                                    2
type                                               Country cooking
img              https://d3h1lg3ksw6i6b.cloudfront.net/guide/pl...
link             https://guide.michelin.com/us/en/emilia-romagn...
Name: 176, dtype: object

#### GET [`v1/places:searchText`](https://developers.google.com/maps/documentation/places/web-service/text-search?hl=fr)
To get the necessary `PLACE_ID` for the `v1/places/PLACE_ID` endpoint

In [57]:
# places.id,places.formattedAddress,places.websiteUri,places.rating,places.reviews,places.regularOpeningHours.periods,places.googleMapsLinks.directionsUri
GOOGLE_PLACES_API_URL = "https://places.googleapis.com/v1/places:searchText?fields=places.id%2Cplaces.types%2Cplaces.formattedAddress%2Cplaces.websiteUri%2Cplaces.rating%2Cplaces.reviews%2Cplaces.regularOpeningHours.periods%2Cplaces.googleMapsLinks.directionsUri"

In [59]:
import requests
import os

def get_location_types(row):
    return f"{row['name']}, {row['location']} {row['country']}"

def fetch_place_details(row):
    """Fetches nearby places data either from a test file or the Google Places API.

    Returns:
        list: A list of places fetched from the API or test file.
    """
        
    try:
        headers = {
            # "Authorization": f"Bearer { os.environ["GOOGLE_PLACES_API_KEY"] }",
            "X-Goog-Api-Key": os.environ["GOOGLE_PLACES_API_KEY"],
            "Content-Type": "application/json",
            "Accept": "application/json",
        }
        
        req_data = {
            "textQuery": f"{row['name']}, {row['location']}, {row['country']}",
            # "includedType": "restaurant"
        }
        
        # Make a request to the Google Places API
        response = requests.post(url=GOOGLE_PLACES_API_URL, json=req_data, headers=headers)
        response.raise_for_status()
        if response.status_code == 200:
            # Return the places
            data = response.json()
            return data["places"][0]
        else:
            print(f"Response {response.status_code} [{response.reason}] Returning None.")
            return None        

    except requests.RequestException as e:
        return print(f"error: {e}")

In [60]:
places_with_google = restaurants.copy()
places_with_google.head()

Unnamed: 0,id,name,location,country,latitude,longitude,rating,priceCategory,type,img,link
0,110681,One White Street,New York,USA,40.719355,-74.006101,1,4,Contemporary,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...
1,110644,Dirt Candy,New York,USA,40.71794,-73.99087,1,4,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...
2,110607,Family Meal at Blue Hill,New York,USA,40.731971,-73.999639,1,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...
3,110478,Blue Hill at Stone Barns,Tarrytown,USA,41.100715,-73.829297,2,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...
4,110338,Oyster Oyster,Washington,USA,38.909302,-77.023015,1,3,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/district-of-c...


In [61]:
from tqdm import tqdm

tqdm.pandas()
places_with_google["google_details"] = places_with_google.progress_apply(fetch_place_details, axis=1)

100%|██████████| 581/581 [04:31<00:00,  2.14it/s]


Forgot to remove `types` from the fields, but Google monitors the number of API requests, so I will not fix that right now.

In [142]:
places_with_google.head()

Unnamed: 0,id,name,location,country,latitude,longitude,rating,priceCategory,type,img,link,google_details
0,110681,One White Street,New York,USA,40.719355,-74.006101,1,4,Contemporary,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"""\""{\\\""id\\\"": \\\""ChIJISXTwWJZwokRIok4xyMeb6..."
1,110644,Dirt Candy,New York,USA,40.71794,-73.99087,1,4,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"""\""{\\\""id\\\"": \\\""ChIJBXlte51ZwokRXzXtdaBG9x..."
2,110607,Family Meal at Blue Hill,New York,USA,40.731971,-73.999639,1,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"""\""{\\\""id\\\"": \\\""ChIJi9BURJFZwokRDcqvIgxdXd..."
3,110478,Blue Hill at Stone Barns,Tarrytown,USA,41.100715,-73.829297,2,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"""\""{\\\""id\\\"": \\\""ChIJExcPIfK_wokRtc6K_3jkhg..."
4,110338,Oyster Oyster,Washington,USA,38.909302,-77.023015,1,3,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/district-of-c...,"""\""{\\\""id\\\"": \\\""ChIJQcHOEK-3t4kRbvsIP3ccIt..."


In [140]:
import json

places_with_google["google_details"] = places_with_google["google_details"].apply(json.dumps)
places_with_google.to_csv("../data/raw/restaurants_google_places_api_raw.csv", index=False, encoding="utf8", quoting=1)

### Expand Specific Fields

In [143]:
df = pd.read_csv("../data/raw/restaurants_google_places_api_raw.csv")
df.head()

Unnamed: 0,id,name,location,country,latitude,longitude,rating,priceCategory,type,img,link,google_details
0,110681,One White Street,New York,USA,40.719355,-74.006101,1,4,Contemporary,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{""id"": ""ChIJISXTwWJZwokRIok4xyMeb6g"", ""types"":..."
1,110644,Dirt Candy,New York,USA,40.71794,-73.99087,1,4,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{""id"": ""ChIJBXlte51ZwokRXzXtdaBG9xM"", ""types"":..."
2,110607,Family Meal at Blue Hill,New York,USA,40.731971,-73.999639,1,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{""id"": ""ChIJi9BURJFZwokRDcqvIgxdXds"", ""types"":..."
3,110478,Blue Hill at Stone Barns,Tarrytown,USA,41.100715,-73.829297,2,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{""id"": ""ChIJExcPIfK_wokRtc6K_3jkhgM"", ""types"":..."
4,110338,Oyster Oyster,Washington,USA,38.909302,-77.023015,1,3,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/district-of-c...,"{""id"": ""ChIJQcHOEK-3t4kRbvsIP3ccItA"", ""types"":..."


In [151]:
df["google_details"] = df["google_details"].apply(json.loads)

In [152]:
df.iloc[0]["google_details"]

{'id': 'ChIJISXTwWJZwokRIok4xyMeb6g',
 'types': ['restaurant', 'food', 'point_of_interest', 'establishment'],
 'formattedAddress': '1 White St, New York, NY 10013, USA',
 'rating': 4.4,
 'websiteUri': 'https://www.onewhitestreetnyc.com/',
 'regularOpeningHours': {'periods': [{'open': {'day': 0,
     'hour': 17,
     'minute': 0},
    'close': {'day': 0, 'hour': 21, 'minute': 0}},
   {'open': {'day': 1, 'hour': 17, 'minute': 0},
    'close': {'day': 1, 'hour': 21, 'minute': 0}},
   {'open': {'day': 2, 'hour': 17, 'minute': 0},
    'close': {'day': 2, 'hour': 21, 'minute': 0}},
   {'open': {'day': 3, 'hour': 17, 'minute': 0},
    'close': {'day': 3, 'hour': 21, 'minute': 0}},
   {'open': {'day': 4, 'hour': 17, 'minute': 0},
    'close': {'day': 4, 'hour': 21, 'minute': 0}},
   {'open': {'day': 5, 'hour': 17, 'minute': 0},
    'close': {'day': 5, 'hour': 22, 'minute': 0}},
   {'open': {'day': 6, 'hour': 17, 'minute': 0},
    'close': {'day': 6, 'hour': 22, 'minute': 0}}]},
 'reviews': [{'

In [153]:
# df = places_with_google.copy()

def parse_google_details(details):
    if details:
        return {
            "website_uri": details.get("websiteUri"),
            "rating": details.get("rating"),
            "reviews": details.get("reviews", []),
            "formatted_address": details.get("formattedAddress"),
            "opening_hours": details.get("regularOpeningHours", {}).get("periods", []),
            "directions_link": details.get("googleMapsLinks", {}).get("directionsUri")
        }
    return {}

df["parsed_google_details"] = df["google_details"].apply(parse_google_details)
df.head()


Unnamed: 0,id,name,location,country,latitude,longitude,rating,priceCategory,type,img,link,google_details,parsed_google_details
0,110681,One White Street,New York,USA,40.719355,-74.006101,1,4,Contemporary,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{'id': 'ChIJISXTwWJZwokRIok4xyMeb6g', 'types':...",{'website_uri': 'https://www.onewhitestreetnyc...
1,110644,Dirt Candy,New York,USA,40.71794,-73.99087,1,4,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{'id': 'ChIJBXlte51ZwokRXzXtdaBG9xM', 'types':...",{'website_uri': 'http://www.dirtcandynyc.com/'...
2,110607,Family Meal at Blue Hill,New York,USA,40.731971,-73.999639,1,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{'id': 'ChIJi9BURJFZwokRDcqvIgxdXds', 'types':...",{'website_uri': 'https://www.bluehillfarm.com/...
3,110478,Blue Hill at Stone Barns,Tarrytown,USA,41.100715,-73.829297,2,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{'id': 'ChIJExcPIfK_wokRtc6K_3jkhgM', 'types':...",{'website_uri': 'https://www.bluehillfarm.com/...
4,110338,Oyster Oyster,Washington,USA,38.909302,-77.023015,1,3,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/district-of-c...,"{'id': 'ChIJQcHOEK-3t4kRbvsIP3ccItA', 'types':...",{'website_uri': 'http://www.oysteroysterdc.com...


In [154]:
enriched_restaurants = df.copy()

enriched_restaurants["website_uri"] = enriched_restaurants["parsed_google_details"].apply(lambda x: x.get("website_uri"))
enriched_restaurants["google_rating"] = enriched_restaurants["parsed_google_details"].apply(lambda x: x.get("rating"))
enriched_restaurants["google_reviews"] = enriched_restaurants["parsed_google_details"].apply(lambda x: x.get("reviews"))
enriched_restaurants["google_address"] = enriched_restaurants["parsed_google_details"].apply(lambda x: x.get("formatted_address"))
enriched_restaurants["google_opening_hours"] = enriched_restaurants["parsed_google_details"].apply(lambda x: x.get("opening_hours"))
enriched_restaurants["google_directions_link"] = enriched_restaurants["parsed_google_details"].apply(lambda x: x.get("directions_link"))

enriched_restaurants.head()

Unnamed: 0,id,name,location,country,latitude,longitude,rating,priceCategory,type,img,link,google_details,parsed_google_details,website_uri,google_rating,google_reviews,google_address,google_opening_hours,google_directions_link
0,110681,One White Street,New York,USA,40.719355,-74.006101,1,4,Contemporary,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{'id': 'ChIJISXTwWJZwokRIok4xyMeb6g', 'types':...",{'website_uri': 'https://www.onewhitestreetnyc...,https://www.onewhitestreetnyc.com/,4.4,[{'name': 'places/ChIJISXTwWJZwokRIok4xyMeb6g/...,"1 White St, New York, NY 10013, USA","[{'open': {'day': 0, 'hour': 17, 'minute': 0},...",https://www.google.com/maps/dir//''/data=!4m7!...
1,110644,Dirt Candy,New York,USA,40.71794,-73.99087,1,4,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{'id': 'ChIJBXlte51ZwokRXzXtdaBG9xM', 'types':...",{'website_uri': 'http://www.dirtcandynyc.com/'...,http://www.dirtcandynyc.com/,4.6,[{'name': 'places/ChIJBXlte51ZwokRXzXtdaBG9xM/...,"86 Allen St, New York, NY 10002, USA","[{'open': {'day': 2, 'hour': 17, 'minute': 30}...",https://www.google.com/maps/dir//''/data=!4m7!...
2,110607,Family Meal at Blue Hill,New York,USA,40.731971,-73.999639,1,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{'id': 'ChIJi9BURJFZwokRDcqvIgxdXds', 'types':...",{'website_uri': 'https://www.bluehillfarm.com/...,https://www.bluehillfarm.com/,4.6,[{'name': 'places/ChIJi9BURJFZwokRDcqvIgxdXds/...,"75 Washington Pl, New York, NY 10011, USA","[{'open': {'day': 0, 'hour': 11, 'minute': 30}...",https://www.google.com/maps/dir//''/data=!4m7!...
3,110478,Blue Hill at Stone Barns,Tarrytown,USA,41.100715,-73.829297,2,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/new-york-stat...,"{'id': 'ChIJExcPIfK_wokRtc6K_3jkhgM', 'types':...",{'website_uri': 'https://www.bluehillfarm.com/...,https://www.bluehillfarm.com/,4.7,[{'name': 'places/ChIJExcPIfK_wokRtc6K_3jkhgM/...,"630 Bedford Rd, Tarrytown, NY 10591, USA","[{'open': {'day': 0, 'hour': 11, 'minute': 30}...",https://www.google.com/maps/dir//''/data=!4m7!...
4,110338,Oyster Oyster,Washington,USA,38.909302,-77.023015,1,3,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://guide.michelin.com/us/en/district-of-c...,"{'id': 'ChIJQcHOEK-3t4kRbvsIP3ccItA', 'types':...",{'website_uri': 'http://www.oysteroysterdc.com...,http://www.oysteroysterdc.com/,4.6,[{'name': 'places/ChIJQcHOEK-3t4kRbvsIP3ccItA/...,"1440 8th St NW, Washington, DC 20001, USA","[{'open': {'day': 2, 'hour': 17, 'minute': 30}...",https://www.google.com/maps/dir//''/data=!4m7!...


In [155]:
enriched_restaurants.drop(columns=["link", "google_details", "parsed_google_details"], inplace=True)
enriched_restaurants.rename(columns={"rating": "distinctions"}, inplace=True)
enriched_restaurants.head()

Unnamed: 0,id,name,location,country,latitude,longitude,distinctions,priceCategory,type,img,website_uri,google_rating,google_reviews,google_address,google_opening_hours,google_directions_link
0,110681,One White Street,New York,USA,40.719355,-74.006101,1,4,Contemporary,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.onewhitestreetnyc.com/,4.4,[{'name': 'places/ChIJISXTwWJZwokRIok4xyMeb6g/...,"1 White St, New York, NY 10013, USA","[{'open': {'day': 0, 'hour': 17, 'minute': 0},...",https://www.google.com/maps/dir//''/data=!4m7!...
1,110644,Dirt Candy,New York,USA,40.71794,-73.99087,1,4,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,http://www.dirtcandynyc.com/,4.6,[{'name': 'places/ChIJBXlte51ZwokRXzXtdaBG9xM/...,"86 Allen St, New York, NY 10002, USA","[{'open': {'day': 2, 'hour': 17, 'minute': 30}...",https://www.google.com/maps/dir//''/data=!4m7!...
2,110607,Family Meal at Blue Hill,New York,USA,40.731971,-73.999639,1,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.bluehillfarm.com/,4.6,[{'name': 'places/ChIJi9BURJFZwokRDcqvIgxdXds/...,"75 Washington Pl, New York, NY 10011, USA","[{'open': {'day': 0, 'hour': 11, 'minute': 30}...",https://www.google.com/maps/dir//''/data=!4m7!...
3,110478,Blue Hill at Stone Barns,Tarrytown,USA,41.100715,-73.829297,2,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.bluehillfarm.com/,4.7,[{'name': 'places/ChIJExcPIfK_wokRtc6K_3jkhgM/...,"630 Bedford Rd, Tarrytown, NY 10591, USA","[{'open': {'day': 0, 'hour': 11, 'minute': 30}...",https://www.google.com/maps/dir//''/data=!4m7!...
4,110338,Oyster Oyster,Washington,USA,38.909302,-77.023015,1,3,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,http://www.oysteroysterdc.com/,4.6,[{'name': 'places/ChIJQcHOEK-3t4kRbvsIP3ccItA/...,"1440 8th St NW, Washington, DC 20001, USA","[{'open': {'day': 2, 'hour': 17, 'minute': 30}...",https://www.google.com/maps/dir//''/data=!4m7!...


In [157]:
enriched_restaurants["google_reviews"] = enriched_restaurants["google_reviews"].apply(json.dumps)
enriched_restaurants["google_opening_hours"] = enriched_restaurants["google_opening_hours"].apply(json.dumps)
enriched_restaurants.to_csv("../data/processed/expanded_restaurants_google_places.csv", index=False, encoding="utf8", quoting=1)

The Google Places API, as well as the TripAdvisor, only returns up to 5 reviews per place. So it would be relevant to get reviews from others users through another service. As an important aggregator of communities, Reddit could be a pertinent resource.

## API Fetching to get more reviews on Reddit
with [Reddit API](https://www.reddit.com/dev/api/) or [PRAW](https://praw.readthedocs.io/en/stable/)

subreddits such as [r/food](https://www.reddit.com/r/food/), [r/restaurant](https://www.reddit.com/r/restaurant/) or [r/MichelinStars](https://www.reddit.com/r/MichelinStars/)

In [1]:
import praw
import os
import pandas as pd
from tqdm import tqdm
import time
from requests.exceptions import HTTPError

In [2]:
# Set up Reddit API credentials
reddit = praw.Reddit(
    client_id=os.environ["REDDIT_CLIENT_ID"],
    client_secret=os.environ["REDDIT_CLIENT_SECRET"],
    user_agent="python.webscraping-ml-project"
)

[Lucene Query Syntax Overview](https://sec-api.io/resources/lucene-query-syntax-overview#Querying-Fields)

In [41]:
def fetch_reddit_posts(restaurant_name, subreddit="all", limit=10, retries=3, delay=2):
    """
    Fetches Reddit posts related to the given restaurant name.
    
    Args:
        restaurant_name (str): The name of the restaurant to search for.
        subreddit (str): The subreddit to search in (default is "all").
        limit (int): Maximum number of posts to fetch.
        retries (int): Number of retries in case of rate limiting.
        delay (int): Delay between retries in seconds.

    Returns:
        list: List of posts with title, body, and comments.
    """
    posts = []
    attempt = 0
    while attempt < retries:
        try:
            query = f"title:{restaurant_name} OR selftext:{restaurant_name}"
            # query = f"comments.body:{restaurant_name}"
            for submission in reddit.subreddit(subreddit).search(query, syntax='plain', limit=limit):
                submission.comments.replace_more(limit=0)  # Load all comments
                
                # comments = []
                # if restaurant_name.lower() in submission.title.lower() or restaurant_name.lower() in submission.selftext.lower():
                #     comments = [comment.body for comment in submission.comments]
                # else:
                #     # Check comments for restaurant name if not in title or body
                #     for comment in submission.comments:
                #         if restaurant_name.lower() in comment.body.lower():
                #             comments.append(comment.body)
                
                posts.append({
                    "title": submission.title,
                    "selftext": submission.selftext,
                    "score": submission.score,
                    "comments": [comment.body for comment in submission.comments]
                })
            break  # Exit loop if successful
        except Exception as e:
            print(f"Error fetching posts: {e}")
            print(f"Rate limited for fetching {restaurant_name} in r/{subreddit}. Retrying in {delay} seconds...")
            time.sleep(delay)
            attempt += 1
    if attempt >= retries:
        print(f"Failed to fetch {restaurant_name} in r/{subreddit} after {retries} attempts.")
    return posts

In [42]:
# Fetch posts for a restaurant in r/finedining
posts = fetch_reddit_posts('La Paix', subreddit="finedining", limit=5)
print(f"Number of posts: {len(posts)}")
print([post for post in posts])

Number of posts: 0
[]


In [43]:
subreddits = ["food", "restaurant", "MichelinStars", "finedining"]

def fetch_from_all_subreddits(restaurant_name, subreddits, limit=5):
    all_posts = []
    for subreddit in subreddits:
        posts = fetch_reddit_posts(restaurant_name, subreddit=subreddit, limit=limit)
        all_posts.extend(posts)
    return all_posts

In [44]:
# Fetch posts for "Ahimè" in multiple subreddits
all_posts = fetch_from_all_subreddits("Ahimè", subreddits)
print(f"Number of posts: {len(all_posts)}")
print(all_posts)

Number of posts: 1
[{'title': 'Italy Honeymoon Recommendations (Venice/Verona/Garda/Emilia Romagna/Tuscany/Umbria/Rome/Naples)', 'selftext': "Hello, I’m starting to plan my honeymoon (may 2025) and would love to hear dining and experience recommendations for: **Venice, Verona, Lake Garda, Bergamo, Parma, Modena, Bologna, Florence, Montepeluciano, Orvietto, Rome and Naples.**\n\nI know this is r/finedinning \xa0and any fine dining recommendations are welcomed (more so if it’s something not to be missed), although I’d love to hear more recommendations of local/traditional spots that have amazing food/experiences rather than typical michelin starred places. Opinions regarding the restaurants I already have on this list are also welcomed.\n\nOn the fine dining category, so far I have:\n\n·\xa0\xa0\xa0\xa0\xa0 **Lido84** (had the chance to try Ricardo Camanini food before and wouldn’t miss it), plan to stay at Lake Garda for a night so any other recommendations are welcomed.\n\n·\xa0\xa0\xa

In [45]:
def fetch_reddit_reviews(row):
    subreddits = ["food", "restaurant", "MichelinStars", "finedining"]
    return fetch_from_all_subreddits(row['name'], subreddits)

In [8]:
# Load the existing restaurants data
restaurants = pd.read_csv("../data/processed/expanded_restaurants_google_places.csv")

In [46]:
# Fetch Reddit reviews for the restaurant at index 14 ("The Wolf's Tailor")
fetch_reddit_reviews(restaurants.iloc[0])

[{'title': 'NYC - (*) One White Street ',
  'selftext': "one white street (*) \n\nThe walk in is very nice experience.\nBeautiful three stories of dining rooms. \nThe service was amazing and attentive. The Somm was knowledgeable and friendly to speak with, answering all questions from simple novice questions to more complex curious questions. \n\n1. Foie Gras - citrus explosion. The liver was rich and paired perfectly with the bottle of wine we chose after speaking with the Somm. The fat and citrus really paired well with each other, cutting down on the foie's lingering effect on the mouth\n\n2. American Wagyu with a Fried puff potato. I had a taste of this from my Fiancé. To me this really was a case of order regret. I wanted to switch out my plate with hers. The steak was cooked perfectly as to not disturb the middle of the marbling during cooking. just right enough heat to get the inside juices flowing. Delicate inside fat flavor yet strong flavor on the charred outside. The fried p

In [47]:
# Fetch Reddit reviews for the restaurant at index 14 ("The Wolf's Tailor")
fetch_reddit_reviews(restaurants.iloc[14])

[{'title': "The Wolf's Tailor in Denver",
  'selftext': '',
  'score': 301,
  'comments': ["I'll start with saying this was my first Michelin star experience and tasting menu fwiw. RE the flowers I'd say they're a net neutral on taste but did make the dishes beautiful. They're pulled from their own gardens and they have a theme of fresh and locally sourced so it does tie in somewhat. Aside from that everything tasted great, and the cocktail pairings were incredible and spot on.",
   'That’s a ton of edible flowers. How did that all taste / enhance the dishes?',
   'Edible flowers is such a shortcut food trope. It never brings anything tastewise to the dish.  This may be more curated, but we have a local “nice” restaurant here that sticks an orchid flower on every dish. I refuse to go anymore, it’s so dumb. We aren’t even in an area that has that flower naturally.',
   'Way too many flowers here.',
   'What the hell is pic 11??',
   'This looks incredible. The little candle at the end i

In [48]:
# Fetch Reddit reviews for each restaurant
tqdm.pandas()
restaurants["reddit_reviews"] = restaurants.progress_apply(fetch_reddit_reviews, axis=1)
restaurants.head()

 28%|██▊       | 163/581 [05:04<23:26,  3.37s/it]

Error fetching posts: received 429 HTTP response
Rate limited for fetching [àbitat] in r/food. Retrying in 2 seconds...


 74%|███████▎  | 428/581 [35:00<06:03,  2.38s/it]    

Error fetching posts: received 429 HTTP response
Rate limited for fetching Mirazur in r/finedining. Retrying in 2 seconds...


100%|██████████| 581/581 [44:05<00:00,  4.55s/it]


Unnamed: 0,id,name,location,country,latitude,longitude,distinctions,priceCategory,type,img,website_uri,google_rating,google_reviews,google_address,google_opening_hours,google_directions_link,reddit_reviews
0,110681,One White Street,New York,USA,40.719355,-74.006101,1,4,Contemporary,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.onewhitestreetnyc.com/,4.4,"""[{\""name\"": \""places/ChIJISXTwWJZwokRIok4xyMe...","1 White St, New York, NY 10013, USA","""[{\""open\"": {\""day\"": 0, \""hour\"": 17, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,"[{'title': 'NYC - (*) One White Street ', 'sel..."
1,110644,Dirt Candy,New York,USA,40.71794,-73.99087,1,4,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,http://www.dirtcandynyc.com/,4.6,"""[{\""name\"": \""places/ChIJBXlte51ZwokRXzXtdaBG...","86 Allen St, New York, NY 10002, USA","""[{\""open\"": {\""day\"": 2, \""hour\"": 17, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,[{'title': '[I Ate] Hand-Pulled Kale Noodles f...
2,110607,Family Meal at Blue Hill,New York,USA,40.731971,-73.999639,1,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.bluehillfarm.com/,4.6,"""[{\""name\"": \""places/ChIJi9BURJFZwokRDcqvIgxd...","75 Washington Pl, New York, NY 10011, USA","""[{\""open\"": {\""day\"": 0, \""hour\"": 11, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,[{'title': 'NYC Dinner Rec - Friend’s Birthday...
3,110478,Blue Hill at Stone Barns,Tarrytown,USA,41.100715,-73.829297,2,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.bluehillfarm.com/,4.7,"""[{\""name\"": \""places/ChIJExcPIfK_wokRtc6K_3jk...","630 Bedford Rd, Tarrytown, NY 10591, USA","""[{\""open\"": {\""day\"": 0, \""hour\"": 11, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,[{'title': 'A farm-to-table feast like no othe...
4,110338,Oyster Oyster,Washington,USA,38.909302,-77.023015,1,3,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,http://www.oysteroysterdc.com/,4.6,"""[{\""name\"": \""places/ChIJQcHOEK-3t4kRbvsIP3cc...","1440 8th St NW, Washington, DC 20001, USA","""[{\""open\"": {\""day\"": 2, \""hour\"": 17, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,[{'title': '[homemade] Rigatoni with Black Gar...


In [49]:
restaurants[restaurants['reddit_reviews'].apply(lambda x: x == [])]

Unnamed: 0,id,name,location,country,latitude,longitude,distinctions,priceCategory,type,img,website_uri,google_rating,google_reviews,google_address,google_opening_hours,google_directions_link,reddit_reviews
7,107792,Emmer & Rye,Austin,USA,30.256456,-97.739818,0,3,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,http://www.emmerandrye.com/,4.4,"""[{\""name\"": \""places/ChIJPWOnS6q1RIYRDOnUe3r5...","SkyHouse, 51 Rainey St UNIT 110, Austin, TX 78...","""[{\""open\"": {\""day\"": 0, \""hour\"": 17, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,[]
8,107745,Dai Due,Austin,USA,30.284752,-97.716712,0,2,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,http://www.daidue.com/,4.7,"""[{\""name\"": \""places/ChIJsx9cWey1RIYRpeZVdPR4...","2406 Manor Rd, Austin, TX 78722, USA","""[{\""open\"": {\""day\"": 0, \""hour\"": 10, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,[]
9,105594,The Chastain,Atlanta,USA,33.873170,-84.396488,-1,3,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,http://www.thechastainatl.com/,4.4,"""[{\""name\"": \""places/ChIJ1Wi-NlMP9YgRCVbGo4Zg...","4320 Powers Ferry Rd NW, Atlanta, GA 30342, USA","""[{\""open\"": {\""day\"": 0, \""hour\"": 8, \""minut...",https://www.google.com/maps/dir//''/data=!4m7!...,[]
11,103553,Bramble & Hare,Boulder,USA,40.018145,-105.278568,-1,3,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,http://brambleandhare.com/,4.5,"""[{\""name\"": \""places/ChIJj3ygJSbsa4cRNDo5pyr8...","1964 13th St, Boulder, CO 80302, USA","""[{\""open\"": {\""day\"": 1, \""hour\"": 17, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,[]
13,103542,Blackbelly Market,Boulder,USA,40.014836,-105.227953,-1,3,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.blackbelly.com/,4.6,"""[{\""name\"": \""places/ChIJYZ6vWdrta4cR5BtaxL6q...","1606 Conestoga St Suite 1, Boulder, CO 80301, USA","""[{\""open\"": {\""day\"": 0, \""hour\"": 7, \""minut...",https://www.google.com/maps/dir//''/data=!4m7!...,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
570,90170,Casa Vigil,Mendoza,Argentina,-33.044624,-68.721394,1,4,Contemporary,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.instagram.com/casavigilpalmares/?h...,4.4,"""[{\""name\"": \""places/ChIJ9VrSl0wKfpYRu2m2gfDc...","RP82 2650 local D18, M5501 Godoy Cruz, Mendoza...","""[{\""open\"": {\""day\"": 0, \""hour\"": 12, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,[]
571,90166,El Preferido de Palermo,Buenos Aires,Argentina,-34.585531,-58.425389,-1,2,Traditional Cuisine,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,http://elpreferido.meitre.com/,4.4,"""[{\""name\"": \""places/ChIJRazshIa1vJURczNt-pu4...","Jorge Luis Borges 2108, C1425 FFD, Cdad. Autón...","""[{\""open\"": {\""day\"": 0, \""hour\"": 11, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,[]
572,90165,Riccitelli Bistró,Mendoza,Argentina,-33.033974,-68.959131,-1,3,Seasonal Cuisine,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://matiasriccitelli.com/,4.4,"""[{\""name\"": \""places/ChIJM0cASSrefZYRGsl8S18r...","Callejon Nicolas de la Reta 750, Las Compuerta...","""[{\""open\"": {\""day\"": 1, \""hour\"": 9, \""minut...",https://www.google.com/maps/dir//''/data=!4m7!...,[]
574,90159,Zonda Cocina de Paisaje,Mendoza,Argentina,-33.023072,-68.872733,1,3,Traditional Cuisine,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.lagarde.com.ar/inicio,5.0,"""[{\""name\"": \""places/ChIJRbFYF3B1fpYRgCdVcxlE...","San Martín 1745, M5507 Mayor Drummond, Mendoza...","""[{\""open\"": {\""day\"": 2, \""hour\"": 11, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,[]


263 restaurants with no additional reviews found on Reddit, so more than half...

In [50]:
import json

# Format the Reddit reviews as JSON strings
restaurants["reddit_reviews"] = restaurants["reddit_reviews"].apply(json.dumps)

# Saving the data

In [51]:
# Save the enriched data to a new CSV file
restaurants.to_csv("../data/processed/restaurants_with_reddit_reviews.csv", index=False, encoding="utf8", quoting=1)

In [53]:
test = pd.read_csv("../data/processed/restaurants_with_reddit_reviews.csv")
test.head()

Unnamed: 0,id,name,location,country,latitude,longitude,distinctions,priceCategory,type,img,website_uri,google_rating,google_reviews,google_address,google_opening_hours,google_directions_link,reddit_reviews
0,110681,One White Street,New York,USA,40.719355,-74.006101,1,4,Contemporary,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.onewhitestreetnyc.com/,4.4,"""[{\""name\"": \""places/ChIJISXTwWJZwokRIok4xyMe...","1 White St, New York, NY 10013, USA","""[{\""open\"": {\""day\"": 0, \""hour\"": 17, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,"[{""title"": ""NYC - (*) One White Street "", ""sel..."
1,110644,Dirt Candy,New York,USA,40.71794,-73.99087,1,4,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,http://www.dirtcandynyc.com/,4.6,"""[{\""name\"": \""places/ChIJBXlte51ZwokRXzXtdaBG...","86 Allen St, New York, NY 10002, USA","""[{\""open\"": {\""day\"": 2, \""hour\"": 17, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,"[{""title"": ""[I Ate] Hand-Pulled Kale Noodles f..."
2,110607,Family Meal at Blue Hill,New York,USA,40.731971,-73.999639,1,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.bluehillfarm.com/,4.6,"""[{\""name\"": \""places/ChIJi9BURJFZwokRDcqvIgxd...","75 Washington Pl, New York, NY 10011, USA","""[{\""open\"": {\""day\"": 0, \""hour\"": 11, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,"[{""title"": ""NYC Dinner Rec - Friend\u2019s Bir..."
3,110478,Blue Hill at Stone Barns,Tarrytown,USA,41.100715,-73.829297,2,4,American,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,https://www.bluehillfarm.com/,4.7,"""[{\""name\"": \""places/ChIJExcPIfK_wokRtc6K_3jk...","630 Bedford Rd, Tarrytown, NY 10591, USA","""[{\""open\"": {\""day\"": 0, \""hour\"": 11, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,"[{""title"": ""A farm-to-table feast like no othe..."
4,110338,Oyster Oyster,Washington,USA,38.909302,-77.023015,1,3,Vegetarian,https://axwwgrkdco.cloudimg.io/v7/__gmpics3__/...,http://www.oysteroysterdc.com/,4.6,"""[{\""name\"": \""places/ChIJQcHOEK-3t4kRbvsIP3cc...","1440 8th St NW, Washington, DC 20001, USA","""[{\""open\"": {\""day\"": 2, \""hour\"": 17, \""minu...",https://www.google.com/maps/dir//''/data=!4m7!...,"[{""title"": ""[homemade] Rigatoni with Black Gar..."


In [15]:
json.loads(test.iloc[14]["reddit_reviews"])[1]

{'title': "The Wolf's Tailor (*) - Denver",
 'selftext': 'I had the tasting menu at The Wolf\'s Tailor (A hyper seasonal and locally sourced menu) this past weekend in a party of 2. Did not do the beverage pairing because neither of us were trying to drink that much (alcohol or otherwise) but the variety of cocktails, beer, wine, and other drinks did sound really intriguing. Went with a non-a ginger beer that was great. \n\nThis was my first 1 star restaurant in my sort of backwards journey through starred tasting menus having previously visited Benu (***) and Saison (**) earlier this year. While the star ranking difference is clear and well justified between these, I really thoroughly enjoyed the experience at The Wolf\'s Tailor. And while the service at both of those restaurants were "better," within the context of being in Denver, I don\'t think the experience would have been improved had the service been like it was at either of those places. The one thing that did feel like it cou