In [29]:
from sklearn import feature_extraction
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.metrics.pairwise import linear_kernel

import requests
import json

In [30]:
def get_restaurants_info():
    # get resturants by location, should have parameters as location
    headers = {
    'x-rapidapi-host': "us-restaurant-menus.p.rapidapi.com",
    'x-rapidapi-key': "97fe65f894msh2b7de28891987d4p181282jsnf490e829c02a",
    }
    querystring = {"page":"1"}
    url2 ="https://us-restaurant-menus.p.rapidapi.com/restaurants/search/geo?lat=40.688072&lon=-73.997385&distance=0.3"
    response_res = requests.request("GET", url2, headers=headers, params=querystring)
    restaurants_info_json = json.loads(response_res.text)

    return restaurants_info_json

In [31]:
def get_ids_from_resonse(response_info_json):
    # get 25 resturant ids from above response
    if 'result' in response_info_json:
        restaurants_data_list = restaurants_info_json['result']['data']
        ids = map(lambda x: x['restaurant_id'], restaurants_data_list)
        return list(ids)
    else:
        return []


In [32]:
def remove_numbers(string):
    # remove words that contain numbers
    output = ""
    words = string.split()
    for word in words:
        if word.isalpha():
            output += " " + word
    return output

In [33]:
def get_restaurant_menu_items_from_ids(ids):
    # for each id get its menu items string
    menu_items = []
    for id in ids:
        menu_items_string = get_menu_items_string(id)
        clean_menu_items_string = remove_numbers(menu_items_string)
        menu_items.append(clean_menu_items_string)
    
    return menu_items

In [34]:
def get_menu_items_string(id):
    #get menu items json by restaurant id
    headers = {
    'x-rapidapi-host': "us-restaurant-menus.p.rapidapi.com",
    'x-rapidapi-key': "97fe65f894msh2b7de28891987d4p181282jsnf490e829c02a",
    }
    url = "https://us-restaurant-menus.p.rapidapi.com/restaurant/" + str(id) + "/menuitems"
    response_menu = requests.request("GET", url, headers=headers,)
    response_menu_json = json.loads(response_menu.text)
    
    # bonding all menu_item_name to form a description of the restaurant
    menu_item_string = ""
    if 'result' in response_menu_json:
        for menu_item in response_menu_json['result']['data']:
            # menu_item_name = menu_item['menu_item_name']
            menu_item_description = menu_item['menu_item_description']
            menu_item_string += " " + menu_item_description
    return menu_item_string

In [35]:
def calc_similarities(restaurant_menus): 
    # form vector for each resturant
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform(restaurant_menus)

    print(vectorizer.get_feature_names())
    # tf-idf on resturant menue matrix
    transformer = TfidfTransformer()
    tfidf = transformer.fit_transform(X)
    tfidf_array = tfidf.toarray()

    # calculate similarity with first resturant using cosine_similarity
    target = tfidf_array[0]
    similarities = linear_kernel([target], tfidf_array)

    return similarities

In [36]:
def rank_similarities(similarities, restaurant_ids):
    # form pairs of similarity-id, and find most similar restaurants
    similairties_ids = []
    similairties_array = similarities[0]

    for i in range(len(restaurant_ids)):
        similarity = similairties_array[i]
        id = restaurant_ids[i]
        similairty_id = (similarity, id) # construct tuple with similarity first
        similairties_ids.append(similairty_id)

    similairties_ids.sort(reverse=True)

    return similairties_ids

In [37]:

restaurants_info_json = get_restaurants_info()
print(restaurants_info_json)

{'result': {'totalResults': 100, 'data': [{'geo': {'lon': -73.997385, 'lat': 40.688072}, 'hours': 'Mon-Thu: 11:30am-12am  Fri: 11:30am-2am  Sat: 10:30am-2am  Sun: 10:30am-12am', 'address': {'city': 'Brooklyn', 'formatted': '391 Henry St Brooklyn, NY 11201', 'street': '391 Henry St', 'state': 'NY', 'postal_code': '11201'}, 'restaurant_phone': '(718) 243-2522', 'restaurant_id': 274038, 'price_range': '', 'menus': [], 'price_range_100': 0, 'cuisines': ['Alcohol', 'Italian', 'Tapas'], 'restaurant_name': 'Bocca Lupo'}, {'geo': {'lon': -73.997466, 'lat': 40.688151}, 'hours': 'Mon-Thu: 11am-11pm  Fri-Sat: 11am-11:30am  Sun: 12pm-11pm', 'address': {'city': 'Brooklyn', 'formatted': '402 Henry St Brooklyn, NY 11201', 'street': '402 Henry St', 'state': 'NY', 'postal_code': '11201'}, 'restaurant_phone': '(718) 625-8583', 'restaurant_id': 354824, 'price_range': '$$', 'menus': [], 'price_range_100': 2, 'cuisines': ['Chinese'], 'restaurant_name': "Chan's Golden City"}, {'geo': {'lon': -73.997757, 'la

In [38]:
restaurant_ids = get_ids_from_resonse(restaurants_info_json)
print(restaurant_ids)

[274038, 354824, 379292, 364909, 358697, 273462, 421671, 250281, 478584, 250282, 483640, 445454, 478620, 478396, 386818, 250289, 250287, 490879, 365781, 329617, 388881, 373386, 358606, 305774, 504923]


In [39]:
restaurant_menus = get_restaurant_menu_items_from_ids(restaurant_ids)
print(restaurant_menus)

[' fresh ricotta pin nuts arugula sliced tomato pesto rotating seasonal drafts bottle selection sardinian cous lemon cherry parsley pomegranate molasses hazelnut mousse bulleit ginger medium fresh orange refreshing tangy orange creamy yellow floral medium stone fruits fresh green apple refreshing bright smooth finish medium fruit spicy round shaved lemon vinaigrette with truffle cheese garlic toasted almonds pancetta chivas year angostura biters roasted fresh nutmeg solid crisp herbal wild red morello delicate refreshing', ' cheese wonton french fries or fried rice plain french fries or fried rice with chicken or pork fried rice beef with mixed vegetable in brown with garlic sauce with meat with bean curd soup', ' Hot and Served with Served with choice of rice and fried Served with soft Flat Thin Served with white Hot and Hot and Hot and Hot and Hot and Hot and Served with', ' grilled chicken organic chicken breast fresh herb thyme parsley traditional family organic chicken house fresh

In [40]:
similairties = calc_similarities(restaurant_menus)
print(similairties)

['add', 'agave', 'aged', 'aioli', 'albarino', 'ale', 'alioli', 'almond', 'almonds', 'amber', 'american', 'anchovies', 'andaluz', 'andoluz', 'angostura', 'angustora', 'ann', 'apple', 'apples', 'applewood', 'apricot', 'argentinean', 'aromatic', 'aromatics', 'arugula', 'asparagus', 'assorted', 'available', 'avocado', 'babaghanouj', 'baby', 'bacon', 'baguette', 'baked', 'balsamic', 'balthazar', 'banana', 'basil', 'basmati', 'bavarian', 'bbq', 'bean', 'beef', 'beefsteak', 'belgian', 'belly', 'best', 'big', 'billion', 'bit', 'biters', 'bitters', 'black', 'blanc', 'blanco', 'blended', 'blistered', 'blood', 'blossom', 'board', 'boiled', 'boqueria', 'bottle', 'bouquet', 'bourbon', 'bowl', 'braised', 'branston', 'bread', 'breaded', 'breast', 'brew', 'bright', 'brioche', 'broiled', 'broken', 'brooklyn', 'broth', 'brown', 'browned', 'brut', 'builder', 'bulleit', 'butter', 'buttermilk', 'butternut', 'cabot', 'cacao', 'california', 'candied', 'capers', 'caramelized', 'carmenere', 'carrots', 'carte',

In [41]:
similairties_ids = rank_similarities(similairties, restaurant_ids)
print(similairties_ids)

[(1.0, 274038), (0.15164208868595158, 364909), (0.10558481951291888, 273462), (0.07759611350820811, 478620), (0.07681460613676165, 329617), (0.06936200236742299, 365781), (0.06831218445144129, 373386), (0.06821022801005903, 421671), (0.054990909989171284, 305774), (0.04919860151499794, 358697), (0.04076295645892069, 250281), (0.03411334730572686, 388881), (0.024349410621611046, 504923), (0.02287884665296741, 490879), (0.022721611292726103, 478396), (0.021387495883565907, 354824), (0.016663922140976627, 445454), (0.014086548001389518, 250287), (0.013215103234024039, 483640), (0.012037749793234225, 250289), (0.003911919015537427, 358606), (0.0, 478584), (0.0, 386818), (0.0, 379292), (0.0, 250282)]
