In [160]:
from sklearn import feature_extraction
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.metrics.pairwise import linear_kernel

import requests
import json

In [161]:
def get_restaurants_info():
    # get resturants by location, should have parameters as location
    headers = {
    'x-rapidapi-host': "us-restaurant-menus.p.rapidapi.com",
    'x-rapidapi-key': "97fe65f894msh2b7de28891987d4p181282jsnf490e829c02a",
    }
    querystring = {"page":"1"}
    url2 ="https://us-restaurant-menus.p.rapidapi.com/restaurants/search/geo?lat=40.688072&lon=-73.997385&distance=0.3"
    response_res = requests.request("GET", url2, headers=headers, params=querystring)
    restaurants_info_json = json.loads(response_res.text)

    return restaurants_info_json

In [162]:
def get_ids_from_resonse(response_info_json):
    # get 25 resturant ids from above response
    if 'result' in response_info_json:
        restaurants_data_list = restaurants_info_json['result']['data']
        ids = map(lambda x: x['restaurant_id'], restaurants_data_list)
        return list(ids)
    else:
        return []


In [163]:
def remove_numbers(string):
    # remove words that contain numbers
    output = ""
    words = string.split()
    for word in words:
        if word.isalpha():
            output += " " + word
    return output

In [164]:
def get_restaurant_menu_items_from_ids(ids):
    # for each id get its menu items string
    menu_items = []
    for id in ids:
        menu_items_string = get_menu_items_string(id)
        clean_menu_items_string = remove_numbers(menu_items_string)
        menu_items.append(clean_menu_items_string)
    
    return menu_items

In [165]:
def get_menu_items_string(id):
    #get menu items json by restaurant id
    headers = {
    'x-rapidapi-host': "us-restaurant-menus.p.rapidapi.com",
    'x-rapidapi-key': "97fe65f894msh2b7de28891987d4p181282jsnf490e829c02a",
    }
    url = "https://us-restaurant-menus.p.rapidapi.com/restaurant/" + str(id) + "/menuitems"
    response_menu = requests.request("GET", url, headers=headers)
    response_menu_json = json.loads(response_menu.text)
    
    # bonding all menu_item_name to form a description of the restaurant
    menu_item_string = ""
    if 'result' in response_menu_json:
        for menu_item in response_menu_json['result']['data']:
            menu_item_name = menu_item['menu_item_name']
            menu_item_string += " " + menu_item_name
       
    return menu_item_string

In [166]:
def calc_similarities(restaurant_menus): 
    # form vector for each resturant
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform(restaurant_menus)

    print(vectorizer.get_feature_names())
    # tf-idf on resturant menue matrix
    transformer = TfidfTransformer()
    tfidf = transformer.fit_transform(X)
    tfidf_array = tfidf.toarray()

    # calculate similarity with first resturant using cosine_similarity
    target = tfidf_array[0]
    similarities = linear_kernel([target], tfidf_array)

    return similarities

In [167]:
def rank_similarities(similarities, restaurant_ids):
    # form pairs of similarity-id, and find most similar restaurants
    similairties_ids = []
    similairties_array = similarities[0]

    for i in range(len(restaurant_ids)):
        similarity = similairties_array[i]
        id = restaurant_ids[i]
        similairty_id = (similarity, id) # construct tuple with similarity first
        similairties_ids.append(similairty_id)

    similairties_ids.sort(reverse=True)

    return similairties_ids

In [168]:

restaurants_info_json = get_restaurants_info()
print(restaurants_info_json)

{'result': {'totalResults': 100, 'data': [{'geo': {'lon': -73.997385, 'lat': 40.688072}, 'hours': 'Mon-Thu: 11:30am-12am  Fri: 11:30am-2am  Sat: 10:30am-2am  Sun: 10:30am-12am', 'address': {'city': 'Brooklyn', 'formatted': '391 Henry St Brooklyn, NY 11201', 'street': '391 Henry St', 'state': 'NY', 'postal_code': '11201'}, 'restaurant_phone': '(718) 243-2522', 'restaurant_id': 274038, 'price_range': '', 'menus': [], 'price_range_100': 0, 'cuisines': ['Alcohol', 'Italian', 'Tapas'], 'restaurant_name': 'Bocca Lupo'}, {'geo': {'lon': -73.997466, 'lat': 40.688151}, 'hours': 'Mon-Thu: 11am-11pm  Fri-Sat: 11am-11:30am  Sun: 12pm-11pm', 'address': {'city': 'Brooklyn', 'formatted': '402 Henry St Brooklyn, NY 11201', 'street': '402 Henry St', 'state': 'NY', 'postal_code': '11201'}, 'restaurant_phone': '(718) 625-8583', 'restaurant_id': 354824, 'price_range': '$$', 'menus': [], 'price_range_100': 2, 'cuisines': ['Chinese'], 'restaurant_name': "Chan's Golden City"}, {'geo': {'lon': -73.997757, 'la

In [169]:
restaurant_ids = get_ids_from_resonse(restaurants_info_json)
print(restaurant_ids)

[274038, 354824, 379292, 364909, 358697, 273462, 421671, 250281, 478584, 250282, 483640, 445454, 478620, 478396, 386818, 250289, 250287, 490879, 365781, 329617, 388881, 373386, 358606, 305774, 504923]


In [170]:
restaurant_menus = get_restaurant_menu_items_from_ids(restaurant_ids)
print(restaurant_menus)

[' Roasted Beets Tarragon Chicken Salad Fresh Mozzarella Birra Steamed Clams Red Wine Poached Pear Chocolate Torte Bourbon Basilico Rosato Di Bastianich Albana Di Fattoria Monticino Rosso Umbria Orvieto Barbi Sardegna Santadi Lombardia Rosso Di Sandro Fay Sardegna Cannonau Sella Mosca Stagione Roasted Vegetables Goat Cheese Marinated Olives Creamy Mushroom Leek Zucchini Eggs In Spicy Tomato Sauce Roasted Vegetables Goat Cheese Panino Italian Rob Roy Hard Pumpkin Oregon Anderson Valley California Emilia Tentua Pederzana', ' Crab Rangoon Egg Drop Soup House Special Chicken Shrimp Vegetable House Special Sweet Sour Chicken Roast Pork Szechuan Style Moo Shu Shrimp Eggplant In Garlic Sauce Fried Chicken Wings Honey Chicken Wings Sesame Chicken Wings General Chicken Wings Triple Crown LC Spare Ribs LC String Bean In Garlic Sauce Pan Fried Wonton Steamed Dumpling Vegetable Chicken Beef Vegetable Steamed Mixed Vegetable', ' Fried Wontons Pan Fried Wontons with Garlic Sauce Hot and Sour Soup Sh

In [171]:
similairties = calc_similarities(restaurant_menus)
print(similairties)

['action', 'adobo', 'agnolotti', 'ajo', 'al', 'albana', 'ale', 'alexander', 'almond', 'almonds', 'amb', 'americano', 'amour', 'amstel', 'anago', 'anderson', 'antigua', 'appetizer', 'apple', 'arroz', 'arugula', 'asado', 'asahi', 'australian', 'avocado', 'awake', 'babaganouj', 'babaghanouj', 'baby', 'bacalao', 'bacon', 'baguette', 'baked', 'balady', 'ball', 'banana', 'bananas', 'barbi', 'basilico', 'bastianich', 'battered', 'bbq', 'bean', 'beef', 'beer', 'beets', 'bellini', 'ben', 'berenjena', 'berkshire', 'berries', 'berry', 'birra', 'biscuit', 'bisque', 'black', 'blanc', 'blast', 'blend', 'bloody', 'blt', 'blue', 'bocata', 'boiled', 'bolognese', 'bonafide', 'bonafier', 'boost', 'botanically', 'bottled', 'boucas', 'bourbon', 'bourgogne', 'box', 'bravas', 'bravo', 'brazo', 'bread', 'breakfast', 'breast', 'breeze', 'brew', 'brewed', 'broccoli', 'broiled', 'broth', 'brown', 'brownstone', 'brunch', 'brussels', 'budweiser', 'bun', 'burger', 'butter', 'cabernet', 'cabra', 'caesar', 'cake', 'c

In [172]:
similairties_ids = rank_similarities(similairties, restaurant_ids)
print(similairties_ids)

[(0.9999999999999997, 274038), (0.09059879574957719, 250282), (0.07531045153488693, 478584), (0.0739844116888645, 273462), (0.06456450745835696, 379292), (0.06197235189790897, 354824), (0.050259502715598994, 365781), (0.04758470931614692, 329617), (0.04380102067123827, 478620), (0.04326367221675731, 250289), (0.041637182759819716, 445454), (0.03957910832875771, 358606), (0.03250987586895601, 388881), (0.02713424831984004, 504923), (0.024377203696405857, 490879), (0.02339837216381761, 250281), (0.019398063437529853, 305774), (0.016369882949216267, 250287), (0.011814389328888028, 373386), (0.010358024970292975, 364909), (0.00637358291054111, 421671), (0.004796730506625033, 483640), (0.0, 478396), (0.0, 386818), (0.0, 358697)]
