In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
import re
import time
import json

In [2]:
import pandas as pd
urls_df = pd.read_csv('./scraped-data/UE-NY-rests-list.csv')
urls_df.head()

Unnamed: 0,url,city
0,https://www.ubereats.com/new-york/food-deliver...,new-york
1,https://www.ubereats.com/new-york/food-deliver...,new-york
2,https://www.ubereats.com/new-york/food-deliver...,new-york
3,https://www.ubereats.com/new-york/food-deliver...,new-york
4,https://www.ubereats.com/new-york/food-deliver...,new-york


In [3]:
def get_rest_details(rest_url):
    start_time = time.time()
    rest_obj = {}
    try:
        req = Request(rest_url, headers={'User-Agent': 'Mozilla/5.0', 'Accept': 'text/html,application/xhtml+xml,application/xml'})
        webpage = urlopen(req).read()
        soup = BeautifulSoup(webpage, 'html.parser')
        rest_obj = json.loads(soup.find("script", type="application/ld+json").text)
#         print(rest_obj['name'])
    except Exception as e:
        print(f"\nerror: {e}\n url: {rest_url}")
    
    elapsed_time = time.time()-start_time
#     print(f"elapsed time: {elapsed_time}")
    return rest_obj

In [4]:
def scrap_all_rest(urls_df, df, error_rests):
    start_time = time.time()
    num_rests = urls_df.shape[0]
    
    for i in range(num_rests):
        curr_rest = urls_df.iloc[i]
        if i % 50 == 0:
                print(f"Progress: {(i*100)//num_rests}%, restaurants done: {i}, time elapsed: {time.time()-start_time}")
                
        try:
            obj = get_rest_details(curr_rest.url)
            df.loc[i] = [
                obj['name'],
                curr_rest.city,
                obj.get('servesCuisine'),
                obj.get('aggregateRating'),
                obj.get('priceRange'),
                obj.get('address'),
                obj.get('geo'),
                obj.get('openingHoursSpecification'),
                obj.get('telephone'),
                curr_rest.url
            ]
        except Exception as e:
            print(f"\nerror: {e}\n url: {curr_rest.url}")
            error_rests.append(curr_rest.url)

    elapsed_time = time.time()-start_time
    print(f"total time: {elapsed_time}, error count: {len(error_rests)}")
    return df, error_rests

In [5]:
df = pd.DataFrame(columns=['name', 'city', 'cuisine', 'rating', 'price_range' ,'address', 'coordinates', 'opening_hours', 'phone', 'url'])
error_rests = []
scrap_all_rest(urls_df, df, error_rests)
# curr_rest = urls_df.iloc[0]
# curr_rest.url

Progress: 0%, restaurants done: 0, time elapsed: 0.00019884109497070312
Progress: 1%, restaurants done: 50, time elapsed: 113.98881387710571
Progress: 3%, restaurants done: 100, time elapsed: 239.9572684764862
Progress: 4%, restaurants done: 150, time elapsed: 338.4237627983093
Progress: 6%, restaurants done: 200, time elapsed: 442.3849837779999


KeyboardInterrupt: 

In [6]:
df

Unnamed: 0,name,city,cuisine,rating,price_range,address,coordinates,opening_hours,phone,url
0,Vive La Crepe - Colombus &amp; 68th St.,new-york,"[French, Coffee and Tea, Desserts]","{'@type': 'AggregateRating', 'ratingValue': 4....",$,"{'@type': 'PostalAddress', 'addressLocality': ...","{'@type': 'GeoCoordinates', 'latitude': 40.775...","[{'@type': 'OpeningHoursSpecification', 'dayOf...",+6464788008,https://www.ubereats.com/new-york/food-deliver...
1,sweetgreen (Chambers St),new-york,"[Healthy, Salads]","{'@type': 'AggregateRating', 'ratingValue': 4....",$,"{'@type': 'PostalAddress', 'addressLocality': ...","{'@type': 'GeoCoordinates', 'latitude': 40.715...","[{'@type': 'OpeningHoursSpecification', 'dayOf...",,https://www.ubereats.com/new-york/food-deliver...
2,Tacombi - Financial District,new-york,"[Mexican, Healthy, New Mexican]","{'@type': 'AggregateRating', 'ratingValue': 4....",$,"{'@type': 'PostalAddress', 'addressLocality': ...","{'@type': 'GeoCoordinates', 'latitude': 40.704...","[{'@type': 'OpeningHoursSpecification', 'dayOf...",+19172155939,https://www.ubereats.com/new-york/food-deliver...
3,Daily Poke Co.,new-york,"[Hawaiian, Seafood, Salads, Gluten Free Friend...",{},$,"{'@type': 'PostalAddress', 'addressLocality': ...","{'@type': 'GeoCoordinates', 'latitude': 40.759...","[{'@type': 'OpeningHoursSpecification', 'dayOf...",+19703339759,https://www.ubereats.com/new-york/food-deliver...
4,Dough Boys Pizza,new-york,"[Italian, Pizza, Sandwich]","{'@type': 'AggregateRating', 'ratingValue': 4....",$,"{'@type': 'PostalAddress', 'addressLocality': ...","{'@type': 'GeoCoordinates', 'latitude': 40.743...","[{'@type': 'OpeningHoursSpecification', 'dayOf...",+12126861211,https://www.ubereats.com/new-york/food-deliver...
5,Cafe Katja,new-york,"[Cafe, Breakfast and Brunch, Austrian, Pub]",{},$,"{'@type': 'PostalAddress', 'addressLocality': ...","{'@type': 'GeoCoordinates', 'latitude': 40.717...","[{'@type': 'OpeningHoursSpecification', 'dayOf...",+12122199545,https://www.ubereats.com/new-york/food-deliver...
6,My Food House,new-york,"[Chinese, Seafood, Vegetarian Friendly]",{},$$,,"{'@type': 'GeoCoordinates', 'latitude': 40.714...","[{'@type': 'OpeningHoursSpecification', 'dayOf...",+19293200555,https://www.ubereats.com/new-york/food-deliver...
7,Tous Les Jours,new-york,"[American, Breakfast and Brunch, Desserts]",{},$,"{'@type': 'PostalAddress', 'addressLocality': ...","{'@type': 'GeoCoordinates', 'latitude': 40.747...","[{'@type': 'OpeningHoursSpecification', 'dayOf...",+12129679661,https://www.ubereats.com/new-york/food-deliver...
8,Frankie Boys Pizza,new-york,"[Pizza, Salads, Italian]",{},$$,"{'@type': 'PostalAddress', 'addressLocality': ...","{'@type': 'GeoCoordinates', 'latitude': 40.752...","[{'@type': 'OpeningHoursSpecification', 'dayOf...",+12122447444,https://www.ubereats.com/new-york/food-deliver...
9,The Sound Bite - Hell's Kitchen,new-york,"[Soul Food, Burgers, Breakfast and Brunch]",{},$,"{'@type': 'PostalAddress', 'addressLocality': ...","{'@type': 'GeoCoordinates', 'latitude': 40.763...","[{'@type': 'OpeningHoursSpecification', 'dayOf...",+19174095868,https://www.ubereats.com/new-york/food-deliver...
