In [1]:
import os

import json
import pandas as pd 
import numpy as np 
from pandas import Series
from pandas import DataFrame
import requests

In [2]:
# data collection paths
data_folder = './Data/'


#### Foodhub Restaurants

In [11]:
# request to extract some some data about restaurants from foodhub datasource 
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0',
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'de,en-US;q=0.7,en;q=0.3',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Origin': 'https://foodhub.co.uk',
    'Connection': 'keep-alive',
    'Referer': 'https://foodhub.co.uk/list/SW9%209TN',
    'TE': 'Trailers',
}

params = (
    ('api_token', '99b8ad5d2f9e80889efcd73bc31f7e7b'),
    ('app_name', 'FOODHUB'),
)

data = {
  'postcode': 'SW9 9TN'
}

response = requests.post('https://api.t2sonline.com/foodhub/takeaway/list', headers=headers, params=params, data=data)

In [12]:
foodhub_restaurants = json.loads(response.content)

In [15]:
# extract the cuisines for each restaurant
restaurants_cuisines = []
for item in foodhub_restaurants['data']:
    restaurant_cuisines = []
    for x in item['cuisines']:
        restaurant_cuisines.append(x['name'])
    restaurants_cuisines.append(restaurant_cuisines)
#len(restaurants_cuisines)

In [16]:
# build a datafrom from the restaurant data
foodhub_restaurants_df = pd.DataFrame.from_dict(foodhub_restaurants['data'])
#foodhub_restaurants_df[foodhub_restaurants_df.city == 'a'].town

In [17]:
# extract the restaurants' ids from restaurant data to use it later in getting the menu data.
stor_list_id = foodhub_restaurants_df.loc[:24,['id']]
#stor_list_id.iloc[1]

In [18]:
# delete the city column 
del foodhub_restaurants_df['city']

In [19]:
#define a mapper
mapper = {'town': 'city', 'postcode': 'post_code',
          'lat': 'latitude', 'lng':'longitude','cash_payment': 'payment_cash',
          'card_payment': 'payment_card', 'rating': 'rating_value', 'rating_value': 'rating_count'}

In [20]:
# rename the dataframe columns according to mapper
foodhub_restaurants_df_new = foodhub_restaurants_df.rename(mapper, axis=1)

In [11]:
#foodhub_restaurants_df_new

In [21]:
# add cuisine column to the dataframe
foodhub_restaurants_df_new = foodhub_restaurants_df_new.assign(cuisine = restaurants_cuisines)

In [22]:
# add new columns
foodhub_restaurants_df_new['data_source'] = 'foodhub'
new_columns =['number_addon','breakfast', 'lunch', 'sandwiches', 'price_category']
for i in new_columns:
    foodhub_restaurants_df_new[i] = np.nan
foodhub_restaurants_df_new.loc[:,'url']

0        afandinalebanesecusine.co.uk
1                   bari-pizzeria.com
2                 batterseagarden.com
3                   bebskitchen.co.uk
4                   bigbenpizza.co.uk
5                 bombayinnonline.com
6             bombaykitchenonline.com
7            cardamomclubtakeaway.com
8       crownpointpizzaandgrill.co.uk
9                      curry-base.com
10    dilpasandexpressstreatham.co.uk
11               fornopizzaonline.com
12              fusiongrilllondon.com
13           godfatherpizzaonline.com
14                hottakelondon.co.uk
15                jrjamesonline.co.uk
16             kinggrillbattersea.com
17    laparmapizzeria-battersea.co.uk
18                 londonfastfood.com
19                   mrmaonline.co.uk
20               napurarestaurant.com
21           newmilanopizzaonline.com
22                    orchidpizza.com
23           oregano-leafpizzeria.com
24    oreganoleafpizzeriaonline.co.uk
25                   pizzamania.co.uk
26          

In [23]:
# rearrange the columns
foodhub_restaurants_df_new = foodhub_restaurants_df_new.loc[:,['name',  # restaurant name
                       'cuisine',  # such a s 'italian', 'bugers' , NOTE: should this be a list?
                       'description',
                       'price_category',  # such as '€', '€€', '€€€', mapped to 1, 2, 3, ...
                       'breakfast',
                       'lunch',
                       'sandwiches',
                       'city',
                       'post_code',
                       'street',
                       'number',  # house number, like 56
                       'number_addon',  # add on to house number, like 56/2 or 56-2
                       'phone',
                       'url',
                       'latitude',
                       'longitude',
                       'delivery_charge',
                       'min_order',  # min amount of order, in pounds
                       'payment_cash',
                       'payment_card',
                       'waiting_time',  # how long after order is pickup [do we need this?]
                       'delivery_time',  # typical delivery time to address
                       'delivery_postcode',  # where delivery is possible to
                       'rating_value',
                       'rating_count',  # how many ratings?
                       'data_source'  # such as 'uber eats', 'foodhub', etc.
                       ]]

In [24]:
# as intermediate result, save out the dataframe containin info only on restaurants
foodhub_restaurants_df_new.to_json(os.path.join(data_folder, 'foodhub.json'))

#### Foodhub Restaurant Menu

In [25]:
#define params for menus requests
params = (
    ('app_name', 'FOODHUB'),
    ('', ''),
    ('sref', '795946'),
    ('day', 'tuesday'),
    ('date', 'Tue Dec 10 2019 17:04:37 GMT 0100 (Mitteleurop\xE4ische Normalzeit)'),
)

In [26]:
# list of menu url of 25 restaurants
referer_list = ['https://foodhub.co.uk/ordernow/afandina-lebanese-cusine/a/3979730',
                'https://foodhub.co.uk/ordernow/bari-pizzeria/battersea-park/4025780',
                'https://foodhub.co.uk/ordernow/battersea-garden/a/4011710',
                'https://foodhub.co.uk/ordernow/bebs-kitchen/london/3996945',
                'https://foodhub.co.uk/ordernow/big-ben-pizza/a/4005365',
                'https://foodhub.co.uk/ordernow/bombay-inn/surrey/3987255',
                'https://foodhub.co.uk/ordernow/bombay-kitchen/london/3990350',
                'https://foodhub.co.uk/ordernow/crown-point-pizza-and-grill/west-norwood/3997715',
                'https://foodhub.co.uk/ordernow/curry-base/battersea/3994275',
                'https://foodhub.co.uk/ordernow/dilpasand-express/surrey/4014660',
                'https://foodhub.co.uk/ordernow/forno-pizza/a/4009435',
                'https://foodhub.co.uk/ordernow/fusion-grill/london/3983855',
                'https://foodhub.co.uk/ordernow/go-go-gourmet-pizza/london/4026705',
                'https://foodhub.co.uk/ordernow/godfather-stone-baked-italian-pizza/london/4015190',
                'https://foodhub.co.uk/ordernow/fresh-gourmet-pizza/london/4014760',
                'https://foodhub.co.uk/ordernow/hot-take/a/3996930',
                'https://foodhub.co.uk/ordernow/j-r-james/a/3986275',
                'https://foodhub.co.uk/ordernow/king-grill/a/4011705',
                'https://foodhub.co.uk/ordernow/mr-ma-kitchen/london/27330',
                'https://foodhub.co.uk/ordernow/napura/nunhead/4027200',
                'https://foodhub.co.uk/ordernow/new-milano-pizza/a/4009280',
                'https://foodhub.co.uk/ordernow/orchid-pizza/a/4007700',
                'https://foodhub.co.uk/ordernow/oregano-leaf/london/4017535',
                'https://foodhub.co.uk/ordernow/pizza-mania/a/4009985',
                'https://foodhub.co.uk/ordernow/poppy-hana/london/3999055']
len(referer_list)

25

In [27]:
# list of headers for menus requests 
headers_list = []
for i in range(25):
    header = {}
    header['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0'
    header['Accept'] = 'application/json, text/plain, */*'
    header['Accept-Language'] = 'de,en-US;q=0.7,en;q=0.3'
    header['Store'] = str(stor_list_id.iloc[i].values[0])
    header['Origin'] = 'https://foodhub.co.uk'
    header['Connection'] = 'keep-alive'
    header['Referer'] = referer_list[i]
    header['TE'] = 'Trailers'
    headers_list.append(header)

In [28]:
#headers_list[11]

In [29]:
# save responses in data-menu list  
response_list = []
data_menu = []
for i in range(25):
    header = headers_list[i]
    response = requests.get('https://api.t2sonline.com/consumer/menu/popular', headers=header, params=params)
    x = json.loads(response.content)
    #print(x['data'][0])
    #break
    for j in range(len(x['data'])):
        data_menu.append(x['data'][j])
        #if j == 10:
            #break

In [21]:
#data_menu

In [30]:
# build dataframe from menu data and select specific columns
df = pd.DataFrame.from_dict(data_menu)

In [32]:
data_menu[0]

{'id': 10685776,
 'host': 'afandinalebanesecusine.co.uk',
 'item_addon_cat': '7395670',
 'name': 'Falafel Special',
 'description': 'Deep fried mixture of ground chickpeas and broad beans with spices served with hummus, salad, lebanese bread and tahina sauce.',
 'information': None,
 'price': '10.50',
 'subcat': 2330863,
 'image': '',
 'aws_image': '',
 'image_backup': '',
 'addon_type': '4',
 'monday': 1,
 'tuesday': 1,
 'wednesday': 1,
 'thursday': 1,
 'friday': 1,
 'saturday': 1,
 'sunday': 1,
 'delivery': 1,
 'collection': 1,
 'show_online': 1,
 'background_color': '0000ff',
 'font_color': 'ffffff',
 'pos': 3,
 'offer': 'NONE',
 'btn_name': '',
 'mip_id': 0,
 'coupon_allowed': 1,
 'collection_discount_allowed': 1,
 'online_discount_allowed': 1,
 'item_code': None,
 'added': '2018-08-10 15:39:24',
 'user_id': 'yohanpradeep',
 'page': 'add_item_insert',
 'modified': '2019-12-04 22:05:53',
 'modified_by': 'Fabinbaptist',
 'modified_page': 'update_description',
 'vat': '1',
 'exclude_f

In [23]:
menu_df = df.loc[:, ['host', 'name','description', 'price']]

In [24]:
# merge restaurant data with their menus
restaurant_menu = pd.merge(foodhub_restaurants_df_new, menu_df,  how='left', left_on=['url'], right_on = ['host'])

In [25]:
# delete menu items with NaN values-fai 
restaurant_menu = restaurant_menu[restaurant_menu['price'].notnull()]

In [1]:
restaurant_menu.head(2) # city values: Brixton and SURREY are not in London, the rest is either London or part/district of London
                # features with NaN values are not found in the data source.

NameError: name 'restaurant_menu' is not defined

In [30]:
# restaurant_menu.to_csv(r'foodData.csv')

In [30]:
restaurant_menu.to_json(os.path.join(data_folder, 'foodhub_with_menu.json'))