In [1]:
import pdb

import os

import numpy as np
import pandas as pd

import json
import requests

import re

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from common_definitions import columns_restaurants, columns_menu

In [3]:
# define folders
data_folder = './Data'
folder_rest_details = './Supper_London_Details'

In [5]:

headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0',
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'en-US,en;q=0.5',
    'Origin': 'https://supper.london',
    'Connection': 'keep-alive',
    'Referer': 'https://supper.london/',
}

params = (
    ('postcode', 'SW1H 0NF'),
)


In [6]:
# get data from the web
get_response = False

if get_response:
    response = requests.get('https://webapi.supper.london/api/restaurants', headers=headers, params=params)

# NB. Original query string below. It seems impossible to parse and
# reproduce query strings 100% accurately so the one below is given
# in case the reproduced version is not "correct".
# response = requests.get('https://webapi.supper.london/api/restaurants?postcode=SW1H%200NF', headers=headers)


In [5]:
# file for saving the dump
fn_victoria_street = 'Supper_London_response_victoria_street.json'

In [6]:
# this is the zip code matching the address used
zip_code_victoria = 'SW1H 0NF'

In [7]:
# basis URL for getting details about a restaurant
base_URL_rest_detail = 'https://webapi.supper.london/api/restaurants/london'

In [10]:
save_response = False

if save_response:
    # save response as json file
    with open(fn_victoria_street, 'w') as jf:
        json.dump(json.loads(response.content), jf)
        jf.close()

In [8]:
# load the previously dumped file
with open(fn_victoria_street, 'r') as jf:
    data_vs = json.load(jf)

In [10]:
key_cuisines = 'Cuisines'
key_rest = 'Restaurants'

In [11]:
# list of cuisines
data_vs[key_cuisines]

[{'CuisineId': 22, 'CuisineName': 'French'},
 {'CuisineId': 24, 'CuisineName': 'British'},
 {'CuisineId': 31, 'CuisineName': 'Wine'},
 {'CuisineId': 33, 'CuisineName': 'Sweet treats'},
 {'CuisineId': 46, 'CuisineName': 'Vegan'},
 {'CuisineId': 1040, 'CuisineName': 'Drinks'},
 {'CuisineId': 1043, 'CuisineName': 'Vegetarian'},
 {'CuisineId': 30, 'CuisineName': 'Peruvian'},
 {'CuisineId': 35, 'CuisineName': 'Seafood'},
 {'CuisineId': 34, 'CuisineName': 'Pizza'},
 {'CuisineId': 20, 'CuisineName': 'Japanese'},
 {'CuisineId': 25, 'CuisineName': 'Italian'},
 {'CuisineId': 23, 'CuisineName': 'Indian'},
 {'CuisineId': 40, 'CuisineName': 'Beers'},
 {'CuisineId': 32, 'CuisineName': 'Grill'},
 {'CuisineId': 28, 'CuisineName': 'Middle Eastern'},
 {'CuisineId': 1042, 'CuisineName': 'Greek'},
 {'CuisineId': 50, 'CuisineName': 'Taiwanese'},
 {'CuisineId': 1039, 'CuisineName': 'Tea'},
 {'CuisineId': 36, 'CuisineName': 'American'},
 {'CuisineId': 26, 'CuisineName': 'Chinese'}]

In [12]:
# list of restaurants
list_restaurants = data_vs[key_rest]

{'Cuisines': [{'CuisineId': 22, 'CuisineName': 'French'},
  {'CuisineId': 24, 'CuisineName': 'British'},
  {'CuisineId': 31, 'CuisineName': 'Wine'},
  {'CuisineId': 33, 'CuisineName': 'Sweet treats'},
  {'CuisineId': 46, 'CuisineName': 'Vegan'},
  {'CuisineId': 1040, 'CuisineName': 'Drinks'},
  {'CuisineId': 1043, 'CuisineName': 'Vegetarian'},
  {'CuisineId': 30, 'CuisineName': 'Peruvian'},
  {'CuisineId': 35, 'CuisineName': 'Seafood'},
  {'CuisineId': 34, 'CuisineName': 'Pizza'},
  {'CuisineId': 20, 'CuisineName': 'Japanese'},
  {'CuisineId': 25, 'CuisineName': 'Italian'},
  {'CuisineId': 23, 'CuisineName': 'Indian'},
  {'CuisineId': 40, 'CuisineName': 'Beers'},
  {'CuisineId': 32, 'CuisineName': 'Grill'},
  {'CuisineId': 28, 'CuisineName': 'Middle Eastern'},
  {'CuisineId': 1042, 'CuisineName': 'Greek'},
  {'CuisineId': 50, 'CuisineName': 'Taiwanese'},
  {'CuisineId': 1039, 'CuisineName': 'Tea'},
  {'CuisineId': 36, 'CuisineName': 'American'},
  {'CuisineId': 26, 'CuisineName': 'Chin

In [15]:
# Mapping: {ours: theirs}

mapper = {'name' : 'RestaurantName',
          'delivery_charge': 'DeliveryFee',
          'waiting_time': 'WaitingTime',
          'min_order': 'MinimumOrder'}

restaurant_URL_substr = 'RestaurantCode'

In [None]:
# for each restaurant, get detailed info
# and dump to file if not already on disk
for rest in list_restaurants:
    print('name: ', rest[mapper['name']])
    
    rest_detail_fn = os.path.join(folder_rest_details, rest[restaurant_URL_substr] + '.json')
    
    if os.path.isfile(rest_detail_fn):
        print('file exists')
        with open(rest_detail_fn, 'r') as jf:
            data_current_rest = json.load(jf)
    else:
        rest_URL = '/'.join([base_URL_rest_detail, rest[restaurant_URL_substr], zip_code_victoria])
        response_rest = requests.get(rest_URL)
    
        # dump json for the restaurant
        # rest_detail_fn + '.json'
        with open(rest_detail_fn, 'w') as jf:
            json.dump(json.loads(response_rest.content), jf)
            jf.close()

In [152]:
# set up lists for collecting the data
names = []
deliv_charge = []
waiting_time = []
cuisines = []
minimum_order = []

restaurant_code = []


In [153]:
# fill the data frame for the non-detailed stuff on the restaurants
for rest in list_restaurants:
    print('name: ', rest[mapper['name']])
    
    names.append(rest[mapper['name']])
    waiting_time.append(rest[mapper['waiting_time']])
    deliv_charge.append(rest[mapper['delivery_charge']])
    minimum_order.append(rest[mapper['min_order']])
    
    restaurant_code.append(rest['RestaurantCode'])
    
    cuis = []
    for elem in rest['Cuisines']:
        cuis.append(elem['CuisineName'])
        
    print(cuis)
    cuisines.append(cuis)

name:  Jean-Georges at The Connaught
['French', 'British', 'Wine', 'Sweet treats']
name:  Planet Organic | Torrington Place
['British', 'Sweet treats', 'Vegan', 'Drinks', 'Vegetarian']
name:  Crudo Cevicheria
['Peruvian', 'Seafood', 'Vegan']
name:  Daylesford Organic | Brompton Cross
['British', 'Sweet treats', 'Pizza', 'Vegan', 'Vegetarian']
name:  Daylesford Organic | Pimlico
['British', 'Wine', 'Sweet treats', 'Vegan', 'Vegetarian']
name:  Fancy Crab
['Seafood']
name:  Sumosan Twiga
['Japanese', 'Italian', 'Vegetarian']
name:  'O ver | St. James's
['Italian', 'Sweet treats', 'Pizza', 'Vegetarian']
name:  C London
['Italian']
name:  Cakes & Bubbles
['Sweet treats', 'Drinks']
name:  Cinnamon Kitchen
['Indian']
name:  Homeslice | Fitzrovia
['Pizza', 'Beers', 'Vegan', 'Vegetarian']
name:  Kahani
['Indian', 'Grill', 'Vegetarian']
name:  PITTABUN
['Middle Eastern', 'Grill', 'Vegan', 'Greek', 'Vegetarian']
name:  The Lebanese Bakery
['Middle Eastern', 'Sweet treats']
name:  Yi Fang | Soho


In [23]:
# # len(list_restaurants)
# print(len(names))
# print(len(cuisines))
# print(len(waiting_time))
# print(len(deliv_charge))

In [154]:
# set up the dataframe
restaurants_df = pd.DataFrame(columns=columns_restaurants)

restaurants_df['name'] = names
restaurants_df['cuisine'] = cuisines
restaurants_df['waiting_time'] = waiting_time
restaurants_df['delivery_charge'] = deliv_charge
restaurants_df['data_source'] = ['Supper'] * len(list_restaurants)
restaurants_df['min_order'] = minimum_order

restaurants_df['restaurant_code'] = restaurant_code

In [155]:
restaurants_df.shape

(27, 27)

In [156]:
restaurants_df.head(2)

Unnamed: 0,name,cuisine,description,price_category,breakfast,lunch,sandwiches,city,post_code,street,...,min_order,payment_cash,payment_card,waiting_time,delivery_time,delivery_postcode,rating_value,rating_count,data_source,restaurant_code
0,Jean-Georges at The Connaught,"[French, British, Wine, Sweet treats]",,,,,,,,,...,20.0,,,0,,,,,Supper,jeangeorgesattheconnaught
1,Planet Organic | Torrington Place,"[British, Sweet treats, Vegan, Drinks, Vegetar...",,,,,,,,,...,15.0,,,0,,,,,Supper,planetorganictorringtonplace


In [28]:
# list_restaurants

In [157]:
# load restaurant details and process data
rest_details_list = os.listdir('./Supper_London_Details/')
rest_details_list = [x for x in rest_details_list if x.endswith('.json')]


In [158]:
# from the detail files, collect menu items,
# and some extra info
menu_items = []
extra_info = []

for rest_detail_fn in rest_details_list:
    with open(os.path.join(folder_rest_details, rest_detail_fn), 'r') as jf:
        data_current_rest = json.load(jf)
        
        latitude = data_current_rest['Locations'][0]['Latitude']
        longitude = data_current_rest['Locations'][0]['Longitude']
        address = data_current_rest['Locations'][0]['Address']
        postcode = data_current_rest['Locations'][0]['Postcode']

        try:
            city = address.split(',')[1]
        except:
            city = None
        
        leading_nr = re.findall('^\d+\s[A-z]+\s[A-z]+', address)
        leading_nr_2 = re.findall('^\d+\s[A-z]+', address)
        if leading_nr or leading_nr_2:
            number = address.split(' ')[0].strip()
            address = ' '.join(address.split(' ')[1:]).strip()
        else:
            number = None
        
        
        restaurant_code = data_current_rest['RestaurantCode']
        
        extra_info.append([restaurant_code, address, postcode, latitude, longitude, number, city])
        
        # get menu data
        list_cat = data_current_rest['DefaultMenu']['Categories']
        
#         current_menu = []
        
        for cat in list_cat:
            menu_items_list = cat['MenuItems']
            
            for elem in menu_items_list:
                name = elem['MenuItemName']
                restaurant_name = elem['RestaurantCode']
                description = elem['Description']
                price = elem['Price']

#                 print('name: ', name)
#                 print('price: ', price)
                
#                 current_menu.append([name, restaurant_name, description, price])
                menu_items.append([name, restaurant_name, description, price])
        
#         print(latitude)
#         print(longitude)
#         print(address)
#         print(postcode)
        

In [159]:
menu_df = pd.DataFrame(menu_items, columns=['name', 'restaurant_name', 'description', 'price'])

In [160]:
menu_df.head(2)

Unnamed: 0,name,restaurant_name,description,price
0,Afternoon Tea,jeangeorgesattheconnaught,"A selection of finger sandwiches, pastries and...",55.0
1,Kale & Roasted Pumpkin Salad,jeangeorgesattheconnaught,"Russet Apples, Red Wine Vinegar Dressing",21.0


In [161]:
extra_df = pd.DataFrame(extra_info, columns=['restaurant_code', 'street', 'post_code', 'latitude', 'longitude', 'number', 'city'])

In [162]:
extra_df.head()

Unnamed: 0,restaurant_code,street,post_code,latitude,longitude,number,city
0,jeangeorgesattheconnaught,"The Connaught, Carlos Place, Mayfair, London",W1K 2AL,0.0,0.0,,Carlos Place
1,clondon,23- 25 Davies Street,W1K3DE,51.511547,-0.147962,,
2,fancycrab,"Wigmore St, Marylebone",W1U 3RD,0.0,0.0,92.0,Marylebone
3,cinnamonkitchen,Arches Lane,SW8 4PP,0.0,0.0,4.0,
4,memoriesofchina,65-69 Ebury Street,SW1W 0NZ,51.494919,-0.147433,,


In [127]:
# restaurants_df.merge(extra_df, how='outer', on='restaurant_code').head(3)

In [164]:
assert restaurants_df.shape[0] == extra_df.shape[0]

In [165]:
restaurants_df.sort_values('restaurant_code', inplace=True)
extra_df.sort_values('restaurant_code', inplace=True)

In [168]:
# fill in the extra data
for col in extra_df.columns[1:]:
    restaurants_df[col] = extra_df[col]

In [167]:
restaurants_df.head(3)

Unnamed: 0,name,cuisine,description,price_category,breakfast,lunch,sandwiches,city,post_code,street,...,min_order,payment_cash,payment_card,waiting_time,delivery_time,delivery_postcode,rating_value,rating_count,data_source,restaurant_code
12,Kahani,"[Indian, Grill, Vegetarian]",,,,,,Belgravia,SW1X 9QB,"Sloane St, Belgravia",...,20.0,,,0,,,,,Supper,Kahani
17,by CHLOE. | Covent Garden,"[British, Sweet treats, American, Vegan, Veget...",,,,,,St James's Market,SW1Y4RJ,"Norris St, St James's Market",...,15.0,,,0,,,,,Supper,bychloe
9,Cakes & Bubbles,"[Sweet treats, Drinks]",,,,,,,W1W6AB,3-8 Bolsover Street,...,20.0,,,0,,,,,Supper,cakesandbubbles


In [175]:
restaurant_menu = pd.merge(restaurants_df, menu_df, how='left', left_on=['restaurant_code'], right_on=['restaurant_name'])

In [177]:
restaurant_menu.head(3)

Unnamed: 0,name_x,cuisine,description_x,price_category,breakfast,lunch,sandwiches,city,post_code,street,...,delivery_time,delivery_postcode,rating_value,rating_count,data_source,restaurant_code,name_y,restaurant_name,description_y,price
0,Kahani,"[Indian, Grill, Vegetarian]",,,,,,Belgravia,SW1X 9QB,"Sloane St, Belgravia",...,,,,,Supper,Kahani,"Beetroot, Peach, Apple",Kahani,"With baby leaves, pinenuts and mustard",8.0
1,Kahani,"[Indian, Grill, Vegetarian]",,,,,,Belgravia,SW1X 9QB,"Sloane St, Belgravia",...,,,,,Supper,Kahani,Black Chicken,Kahani,Chetinaad roasted spice and curry leaves,10.0
2,Kahani,"[Indian, Grill, Vegetarian]",,,,,,Belgravia,SW1X 9QB,"Sloane St, Belgravia",...,,,,,Supper,Kahani,Spiced Chickpeas,Kahani,"With sweetened yoghurt, mint, tamarind chutney...",10.0


In [182]:
restaurant_menu.to_json(os.path.join(data_folder, 'supper_london_with_menu.json'))