## Imports

In [3]:
import pandas as pd 
import numpy as np 
import requests
from selenium import webdriver
import matplotlib.pyplot as plt
import folium
from math import radians, sin, cos, asin, sqrt, ceil
import time
import os

from selenium import webdriver
import chromedriver_binary

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.chrome.options import Options

from bs4 import BeautifulSoup

from dotenv import load_dotenv, find_dotenv

In [4]:
path = find_dotenv()
load_dotenv(path)
yelp_key=os.getenv('YELP_KEY')
google_key=os.getenv('GOOGLE_PLACE_KEY')

## Get list of restaurants from Yelp API

In [3]:
#Haversine Distance function used to compute radius

def haversine_distance(lon1, lat1, lon2, lat2):
    """
    Compute distance between two pairs of coordinates (lon1, lat1, lon2, lat2)
    """
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    return 2 * 6371 * asin(sqrt(a))

In [53]:
#Divide Paris into subzones to request more restaurants from Yelp API

def subzones_paris(n_subzones):
    """
    Divide Paris into n_subzones squares. Returns list of centers (lat,lon)
    and the radius (in meters) to use for the API request
    """
    #lat-lon boundaries for Paris
    up_left=np.array([48.895887171270736, 2.2869284346488987])
    up_right=np.array([48.89319370738818, 2.423966623607285])
    down_right=np.array([48.8205428668887, 2.3981536506898578])
    down_left=np.array([48.83711493743023, 2.2752327599586315])

    coor = np.array([up_left,up_right,down_right,down_left])

    #compute distance
    dist_h=up_right-up_left
    dist_v=down_left-up_left

    #divide in n_subzones

    epsilon_h=dist_h/n_subzones**0.5
    epsilon_v=dist_v/n_subzones**0.5

    centers=[]
    start=up_left

    for i in range(int(n_subzones**0.5)):
        for j in range(int(n_subzones**0.5)):
            start=(i+0.5)*epsilon_v+(j+0.5)*epsilon_h+up_left
            centers.append(start)
            
    
    radius = haversine_distance(up_left[0], up_left[1], centers[0][0], centers[0][1])
    
    return centers,radius*750

In [54]:
def map_centers(centers,radius):
    centers_start=centers[int((len(centers)-1)/2)]
    m = folium.Map(location=list(centers_start), zoom_start=13)
    
    for c in centers:
        folium.Circle(radius=radius,location=list(c),
                      color="crimson",fill=False).add_to(m)
        
    return m 

In [55]:
map_centers(*subzones_paris(36))

In [37]:
def get_restaurants(centers,radius,api_key):
    """
    Returns DataFrame of restaurants in Paris
    """
    headers={'Authorization':f'Bearer {yelp_key}'}
    url = 'https://api.yelp.com/v3/businesses/search'

    data = []
    
    for i,c in enumerate(centers):
        print(f'---------- Requesting API for subzone #{i+1} ----------')
        for offset in range(0, 200, 50):
            print(f'   ------- Requesting API with offset = {offset} -------   ')
            params = {
                'limit': 50, 
                'categories':['restaurants'],
                'sort_by':'review_count',
                'offset': offset,
                'latitude':c[0],
                'longitude':c[1],
                'radius':int(radius)
            }

            response = requests.get(url, headers=headers, params=params)
            if response.status_code == 200:
                data += response.json()['businesses']
            elif response.status_code == 400:
                print('400 Bad Request')
                break
                
    print(f'#####   Request completed, {len(data)} businesses fetched   ###')
    return data

In [38]:
centers,radius = subzones_paris(16)
data = get_restaurants(centers,radius,yelp_key)

---------- Requesting API for subzone #1 ----------
   ------- Requesting API with offset = 0 -------   
   ------- Requesting API with offset = 50 -------   
   ------- Requesting API with offset = 100 -------   
   ------- Requesting API with offset = 150 -------   
---------- Requesting API for subzone #2 ----------
   ------- Requesting API with offset = 0 -------   
   ------- Requesting API with offset = 50 -------   
   ------- Requesting API with offset = 100 -------   
   ------- Requesting API with offset = 150 -------   
---------- Requesting API for subzone #3 ----------
   ------- Requesting API with offset = 0 -------   
   ------- Requesting API with offset = 50 -------   
   ------- Requesting API with offset = 100 -------   
   ------- Requesting API with offset = 150 -------   
---------- Requesting API for subzone #4 ----------
   ------- Requesting API with offset = 0 -------   
   ------- Requesting API with offset = 50 -------   
   ------- Requesting API with off

In [84]:
data

[{'id': 'DNrTjCKllID58vR3OSHVUw',
  'alias': 'le-sud-paris',
  'name': 'Le Sud',
  'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/uAIGHrll-HoOv8v8HkeH0w/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/le-sud-paris?adjust_creative=e_9PvwQK9fB2R-i9GrMSJA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=e_9PvwQK9fB2R-i9GrMSJA',
  'review_count': 124,
  'categories': [{'alias': 'french', 'title': 'French'}],
  'rating': 3.5,
  'coordinates': {'latitude': 48.878948, 'longitude': 2.284763},
  'transactions': [],
  'price': '€€€',
  'location': {'address1': '91 boulevard Gouvion Saint Cyr',
   'address2': None,
   'address3': None,
   'city': 'Paris',
   'zip_code': '75017',
   'country': 'FR',
   'state': '75',
   'display_address': ['91 boulevard Gouvion Saint Cyr',
    '75017 Paris',
    'France']},
  'phone': '+33145740277',
  'display_phone': '+33 1 45 74 02 77',
  'distance': 1723.6253396224263},
 {'id': 'RkoW6QUZ5KlU8nuPseFYYQ',
  'alias': '


## Yelp Reviews Scrapping

In [39]:
### Create DF for Yelp data


def create_df_yelp(data):

    df = pd.DataFrame(columns=[
        'alias', 'name', 'url', 'categories', 'latitude', 'longitude',
        'address', 'zip_code', 'price', 'rating', 'review_count'
    ])

    features_to_loop = [
        'alias', 'name', 'url', 'categories', 'price', 'rating', 'review_count'
    ]

    #populate DF
    #if condition to avoid raising errors in case restaurant doesn't have all informations

    for i, d in enumerate(data):

        for f in features_to_loop:
            if f in d:
                df.loc[i, f] = d[f]
            else:
                df.loc[i, f] = ''

        if 'location' in d:
            if 'latitude' in d['coordinates']:
                df.loc[i, 'latitude'] = d['coordinates']['latitude']
            else:
                df.loc[i, 'latitude'] = ''

            if 'longitude' in d['coordinates']:
                df.loc[i, 'longitude'] = d['coordinates']['longitude']
            else:
                df.loc[i, 'longitude'] = ''

            if 'address1' in d['location']:
                df.loc[i, 'address'] = d['location']['address1']
            else:
                df.loc[i, 'address'] = ''

            if 'zip_code' in d['location']:
                df.loc[i, 'zip_code'] = d['location']['zip_code']
            else:
                df.loc[i, 'zip_code'] = 0

    #clean DF
    #dtypes
    df['latitude'] = df['latitude'].astype(float)
    df['longitude'] = df['longitude'].astype(float)
    df['zip_code'] = df['zip_code'].replace('', 0).astype(int)
    df['rating'] = df['rating'].astype(float)
    df['review_count'] = df['review_count'].astype(float)

    #url
    df['url'] = df['url'].apply(lambda txt: txt.split('?', 1)[0])

    #price
    prices = {'€': '1', '€€': '2', '€€€': '3', '€€€€': '4'}

    for euro, num in prices.items():
        df['price'] = df['price'].replace(euro, num)

    df['price'] = df['price'].replace('', 0).astype(int)

    #categories
    df['categories'] = df['categories'].apply(
        lambda dicts: ', '.join([d['alias'] for d in dicts]))

    return df.drop_duplicates()

In [40]:
df = create_df_yelp(data)

In [43]:
df.sort_values("review_count",ascending=False)

Unnamed: 0,alias,name,url,categories,latitude,longitude,address,zip_code,price,rating,review_count
2000,l-as-du-fallafel-paris,L'As du Fallafel,https://www.yelp.com/biz/l-as-du-fallafel-paris,"kosher, sandwiches, falafel",48.857498,2.359080,34 rue des Rosiers,75004,1,4.5,1812.0
1000,angelina-paris,Angelina,https://www.yelp.com/biz/angelina-paris,"breakfast_brunch, tea, cakeshop",48.865092,2.328464,226 rue de Rivoli,75001,3,4.0,1349.0
1001,le-comptoir-de-la-gastronomie-paris,Le Comptoir de la Gastronomie,https://www.yelp.com/biz/le-comptoir-de-la-gas...,french,48.864516,2.345402,34 rue Montmartre,75001,2,4.5,1107.0
1002,bouillon-chartier-paris,Bouillon Chartier,https://www.yelp.com/biz/bouillon-chartier-paris,french,48.871940,2.343170,7 rue du Faubourg Montmartre,75009,2,3.5,953.0
1803,l-avant-comptoir-paris-3,L'Avant Comptoir,https://www.yelp.com/biz/l-avant-comptoir-paris-3,"tapas, wine_bars",48.852020,2.338800,3 carrefour de l'Odéon,75006,2,4.5,612.0
...,...,...,...,...,...,...,...,...,...,...,...
792,bistro-190-paris,Bistro 190,https://www.yelp.com/biz/bistro-190-paris,french,48.888120,2.391200,190 avenue Jean Jaurès,75019,0,3.0,1.0
793,snack-tacos-paris,Snack Taco's,https://www.yelp.com/biz/snack-tacos-paris,sandwiches,48.887389,2.387217,65 avenue Jean Jaurès,75019,0,4.0,1.0
794,le-cellier-pantin,Le Cellier,https://www.yelp.com/biz/le-cellier-pantin,"french, sud_ouest",48.896914,2.399882,11 Avenue Edouard Vaillant,93500,0,4.0,1.0
796,le-cabochard-pré-saint-gervais-le-3,Le Cabochard,https://www.yelp.com/biz/le-cabochard-pr%C3%A9...,creperies,48.885588,2.402583,16 Rue D'estienne D'orves,93310,0,4.0,1.0


In [57]:
df[df['review_count']>20].shape

(1498, 11)

In [58]:
df.to_csv('final_resto_list.csv')

In [15]:
url_ = df.loc[50,'url']

In [80]:
### SCRAPPING ONE RESTAURANT --- YELP ###

def get_reviews_yelp(url,verbose=0,quiet_mode=True,load_strategy='eager'):
    
    #pages
    
    options=Options()
    if quiet_mode:
        options.add_argument('--headless')
    options.page_load_strategy = load_strategy
    driver = webdriver.Chrome(options=options)
    
    driver.get(url)
    xpath_page="//yelp-react-root/div[1]/div[4]/div/div/div[2]/div/div[1]/div[2]/section/div[2]/div/div[4]/div[2]/span"
    try:
        page = driver.find_element(By.XPATH,xpath_page).text
        n_pages=int(page.split('of ')[1])
        
    except:
        n_pages=1
        
    reviews=[]
    rates=[]
    dates=[]
    alias=[]
    
    a_toappend=url.split('/biz/',1)[1]
    
    print(f'### {n_pages} pages to scrap for {a_toappend}###')

    for n in range(n_pages):

        if verbose>0:
            print(f'--- Fetching reviews of page #{n+1}...')

        #url of the n_page
        url_=f'{url}?start={n*10}'
        driver.get(url_)
        xpath_all_review=f"//yelp-react-root/div[1]/div[4]/div/div/div[2]/div/div[1]/div[2]/section/div[2]/div/ul"   
        
        try:
            review_block = driver.find_element(By.XPATH,xpath_all_review)
        except:
            continue
        try:
            all_reviews = review_block.find_elements(By.TAG_NAME,'li')
        except:
            continue
        for i,r in enumerate(all_reviews):
            alias.append(a_toappend)
            if verbose>1:
                print(f'--Getting reviews #{i+1}...')
            reviews.append(r.text)
            try:
                rate=r.find_element(By.XPATH,'.//div/div[2]/div/div[1]/span/div')
                rates.append(rate.get_attribute('aria-label'))
            except:
                rates.append('NAN')
            try:
                dates.append(r.find_element(By.XPATH,'.//div/div[2]/div/div[2]/span').text)
            except:
                dates.append('NAN')
                

        
    return alias, dates, rates, reviews


In [None]:
yelp_reviews=pd.DataFrame(columns=['date','rate','review'])

In [79]:
alias,dates,rates,reviews = get_reviews_yelp(df.loc[52,'url'],verbose=2)

### 2 pages to scrap for []###
--- Fetching reviews of page #1...
--Getting reviews #1...
--Getting reviews #2...
--Getting reviews #3...
--Getting reviews #4...
--Getting reviews #5...
--Getting reviews #6...
--Getting reviews #7...
--Getting reviews #8...
--Getting reviews #9...
--Getting reviews #10...
--- Fetching reviews of page #2...
--Getting reviews #1...
--Getting reviews #2...
--Getting reviews #3...
--Getting reviews #4...
--Getting reviews #5...


In [61]:

for i in [5,6,7,8]:
    dates,rates,reviews = get_reviews_yelp(df.loc[i,'url'],verbose=2)
    yelp_reviews=yelp_reviews.append(pd.DataFrame({
        'date':dates,
        'rate': rates,
        'review' : reviews
        },),ignore_index=True)

### 2 pages to scrap for None###
--- Fetching reviews of page #1...
--Getting reviews #1...
--Getting reviews #2...
--Getting reviews #3...
--Getting reviews #4...
--Getting reviews #5...
--Getting reviews #6...
--Getting reviews #7...
--Getting reviews #8...
--Getting reviews #9...
--Getting reviews #10...
--- Fetching reviews of page #2...


KeyboardInterrupt: 

In [21]:
yelp_reviews.shape

(252, 3)

In [89]:
essai=pd.DataFrame(columns=['date','rate','review'])

In [143]:
index = np.random.randint(0,len(list_resto))
index

99

In [126]:
dt=pd.read_csv('../fobokiller/data/scrapping.csv',index_col=0)
done=dt['alias'].unique()
len(done)

896

In [151]:
dt=pd.read_csv('../fobokiller/data/scrapping.csv',index_col=0)

### TESTS 

rate_nan = dt[dt['rate']=='NAN']['rate'].sum()
date_nan = dt[dt['date']=='NAN']['date'].sum() 
review_nan = dt[dt['review']=='NAN']['review'].sum()
unique_resto = len(dt['alias'].unique())
unique_reviews = len(dt['review'].unique())

print(f"""
Dataset Yelp reviews scrapped 🎵all night long 🎵: 
{rate_nan} null in rates
{date_nan} null in dates
{review_nan} null in reviews
{unique_resto} unique restaurants
{unique_reviews} unique reviews
""")


Dataset Yelp reviews scrapped 🎵all night long 🎵: 
0 null in rates
0 null in dates
0 null in reviews
982 unique restaurants
45753 unique reviews



In [147]:
dt.tail()

Unnamed: 0,alias,date,rate,review
42629,galette-café-paris-4,1/3/2015,1 star rating,"Mary P.\nWashington, DC\n0\n12\n1/3/2015\nAfte..."
42630,galette-café-paris-4,6/12/2013,5 star rating,"David H.\nMorehead City, NC\n2\n8\n6/12/2013\n..."
42631,galette-café-paris-4,10/17/2015,5 star rating,"Sarah S.\nManhattan, NY\n0\n23\n1\n10/17/2015\..."
42632,galette-café-paris-4,4/2/2015,3 star rating,"Kenia F.\nJersey City, NJ\n0\n25\n4/2/2015\nI ..."
42633,galette-café-paris-4,9/10/2013,5 star rating,"Scott W.\nBend, OR\n0\n8\n9/10/2013\nGo... Jus..."


In [127]:
done[-5:]

array(['le-relais-de-l-entrecôte-paris-4', 'café-de-flore-paris-3',
       'l-atelier-de-joël-robuchon-paris-2', 'eggs-et-co-paris',
       'le-bistro-du-périgord-paris-3'], dtype=object)

In [31]:
list_resto = pd.read_csv('../fobokiller/data/final_resto_list.csv',index_col=0)
list_resto=list_resto[list_resto['review_count']>20][:100]

In [35]:
list_resto.reset_index(drop=True,inplace=True)

In [28]:
list_resto.loc[34,'url']#.split('/biz/', 1)[1]

'https://www.yelp.com/biz/madame-tomate-levallois-perret'

In [36]:
for i in range(100):
    if list_resto.loc[i,'alias'] not in done:
        print(i,'hahahaha')

34 hahahaha
36 hahahaha
56 hahahaha
57 hahahaha
58 hahahaha
59 hahahaha
60 hahahaha
61 hahahaha
62 hahahaha
63 hahahaha
64 hahahaha
65 hahahaha
66 hahahaha
67 hahahaha
68 hahahaha
69 hahahaha
70 hahahaha
71 hahahaha
72 hahahaha
73 hahahaha
74 hahahaha
75 hahahaha
76 hahahaha
77 hahahaha
78 hahahaha
79 hahahaha
80 hahahaha
81 hahahaha
82 hahahaha
83 hahahaha
84 hahahaha
85 hahahaha
86 hahahaha
87 hahahaha
88 hahahaha
89 hahahaha
90 hahahaha
91 hahahaha
92 hahahaha
93 hahahaha
94 hahahaha
95 hahahaha
96 hahahaha
97 hahahaha
98 hahahaha
99 hahahaha


## Google Review Scrapping

In [35]:
## Get place ID 

def get_place_google_id(name,latitude,longitude):

    url = 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json'
    params={
        'key' : google_key,
        'input' : name,
        'inputtype' : 'textquery',
        'locationbias' : f'point:{latitude},{longitude}'
    }
    
    response = requests.get(url,params=params)
    
    #if conditions to avoid raising errors
    if response.status_code != 200:
        return ''
        
    if 'candidates' in response.json():
        response = response.json()['candidates']
        if len(response)==0:
            return ''
        if 'place_id' in response[0]:
            return response[0]['place_id']
    
    return ''


In [36]:
## id_ for testing purposes -- don't request API multiple times

name = df.loc[0,'name']
lat = df.loc[0,'latitude']
lon = df.loc[0,'longitude']
id_=get_place_google_id(name,lat,lon)

id_

'ChIJ4xutfT5u5kcRaJn2NkiOhPU'

In [37]:
## Get place url

def get_place_google_url(place_id):
    url='https://maps.googleapis.com/maps/api/place/details/json'
    params={
        'key' : google_key,
        'place_id' : place_id,
        'fields' : 'url'
    }
    
    response = requests.get(url,params=params)
    
    #if conditions to avoid raising errors
    if response.status_code != 200:
        return ''
    
    if 'result' in response.json():
        response = response.json()['result']
        if 'url' in response:
            return response['url']
    
    return ''


In [38]:
## url_for_test for testing purposes -- don't request API multiple times

url_for_test=get_place_google_url(id_)
url_for_test

'https://maps.google.com/?cid=17691421677029071208'

In [39]:
### Get all reviews from a Google page


def get_reviews_google(url,scroll_limit=None,quiet_mode=True,return_count=False):
    
    options=Options()
    if quiet_mode:
        options.add_argument('--headless')    
    driver = webdriver.Chrome(options=options)
    driver.get(url)

    
    ###Expand all the reviews using Selenium
        # privacy pop-up
    xpath = "/html/body/c-wiz/div/div/div/div[2]/div[1]/div[4]/form/div[1]/div/button/span"
    driver.find_element_by_xpath(xpath).click()

        #review_count click
    xpath = '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[1]/div[2]/div/div[1]/span[1]/span/span[1]/span[2]'

    review_count = driver.find_element_by_xpath(xpath).text
    review_count=review_count.split(' ', 1)[0]

    driver.find_element_by_xpath(xpath).click()

        #scroll to show all reviews
    time.sleep(2)
    if scroll_limit:
        review_count=scroll_limit
    scrollable_div = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]')
    for i in range(0,(round(int(review_count)/10-1))):
            driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', 
                    scrollable_div)
            time.sleep(2)


    ### Scrap the reviews info using BS      
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    #Scrap the reviews text
    reviews_soup = soup.find_all('div', class_='ODSEW-ShBeI NIyLF-haAclf gm2-body-2')
    reviews = [r.text for r in reviews_soup]
    
    #Scrap the reviews rate
    review_rates_soup = [s.find('span',class_='ODSEW-ShBeI-H1e3jb') for s in reviews_soup]
    review_rates = [rr.attrs['aria-label'][1] for rr in review_rates_soup]
    
    #Scrap the reviews date
    review_dates_soup=[s.find('span', class_='ODSEW-ShBeI-RgZmSc-date') for s in reviews_soup]
    review_dates=[rd.text for rd in review_dates_soup]
    
    
    if return_count:
        return review_count,review_dates,review_rates,reviews
    
    return review_dates,review_rates,reviews


In [40]:
review_dates,review_rates,reviews = get_reviews_google(url_for_test,scroll_limit=40,quiet_mode=False)

  driver.find_element_by_xpath(xpath).click()
  review_count = driver.find_element_by_xpath(xpath).text
  driver.find_element_by_xpath(xpath).click()
  scrollable_div = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]')


In [41]:
review_dates

['il y a une semaine',
 'il y a 4\xa0jours',
 'il y a 4\xa0jours',
 'il y a un mois',
 'il y a un mois',
 'il y a 2\xa0mois',
 'il y a 2\xa0mois',
 'il y a une semaine',
 'il y a un mois',
 'il y a 2\xa0mois',
 'il y a 3\xa0mois',
 'il y a 2\xa0mois',
 'il y a 3\xa0mois',
 'il y a 3\xa0mois',
 'il y a 4\xa0mois',
 'il y a 3\xa0semaines',
 'il y a 4\xa0mois',
 'il y a 3\xa0mois',
 'il y a 4\xa0mois',
 'il y a 2\xa0semaines',
 'il y a une semaine',
 'il y a une semaine',
 'il y a une semaine',
 'il y a un mois',
 'il y a un mois',
 'il y a 4\xa0mois',
 'il y a 3\xa0semaines',
 'il y a 3\xa0mois',
 'il y a un mois',
 'il y a 3\xa0mois',
 'il y a une semaine',
 'il y a une semaine',
 'il y a 3\xa0mois',
 'il y a un mois',
 'il y a 4\xa0mois',
 'il y a une semaine',
 'il y a 3\xa0mois',
 'il y a 3\xa0semaines',
 'il y a une semaine',
 'il y a 3\xa0mois']

In [42]:
reviews

["                  Jean-Luc Paredes  Local\xa0Guide6\xa0avis               il y a une semaine Nouveau    Excellent ! Typique ! La vrai brasserie parisienne telle qu'on l'espère ! Un serveur au top. Des voisins de table sympa. Tout était réuni pour passer un agréable moment !       Visité en novembre                J'aime    Partager       ",
 "                  Pascal vacaresse  Local\xa0Guide · 48\xa0avis               il y a 4\xa0jours Nouveau    De passage à Paris pour quelques jours. Une visite dans ce lieu me semblait incontournable. Victime de leur succès 1h30 d'attente pour avoir une table. Restauration convenable et les prix accessibles pour tous. Un peu bruyant. …    Plus      Visité en novembre             +2         J'aime    Partager       ",
 "                  Marie Claude Suchet  Local\xa0Guide · 172\xa0avis               il y a 4\xa0jours Nouveau    C'est vrai c'est vraiment une brasserie hors du temps où vous pouvez manger seul, car c'est un véritable spectacle. Nous 

In [43]:
data_to_clean = pd.DataFrame({'data':review_dates,
                              'rate':review_rates,
                              'review':reviews})
data_to_clean.head()

Unnamed: 0,data,rate,review
0,il y a une semaine,4,Jean-Luc Paredes Local Guid...
1,il y a 4 jours,4,Pascal vacaresse Local Guid...
2,il y a 4 jours,5,Marie Claude Suchet Local G...
3,il y a un mois,4,Michel C. Local Guide · 140...
4,il y a un mois,5,Jackie Ibanez Local Guide ·...


In [44]:
data_to_clean.to_csv('data_to_clean.csv')