In [186]:
from sklearn import feature_extraction
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.metrics.pairwise import linear_kernel

import requests
import json

In [187]:
def get_restaurants_info():
    # get resturants by location, should have parameters as location
    headers = {
    'x-rapidapi-host': "us-restaurant-menus.p.rapidapi.com",
    'x-rapidapi-key': "97fe65f894msh2b7de28891987d4p181282jsnf490e829c02a",
    }
    querystring = {"page":"2"}
    url2 ="https://us-restaurant-menus.p.rapidapi.com/restaurants/search/geo?lat=40.688072&lon=-73.997385&distance=0.3"
    response_res = requests.request("GET", url2, headers=headers, params=querystring)
    restaurants_info_json = json.loads(response_res.text)

    return restaurants_info_json

In [188]:
def get_ids_from_resonse(response_info_json):
    # get 25 resturant ids from above response
    if 'result' in response_info_json:
        restaurants_data_list = restaurants_info_json['result']['data']
        ids = map(lambda x: x['restaurant_id'], restaurants_data_list)
        return list(ids)
    else:
        return []


In [189]:
def get_tags_from_resonse(response_info_json):
    # get 25 resturant tags from above response
    if 'result' in response_info_json:
        restaurants_data_list = restaurants_info_json['result']['data']
        tags = map(lambda x: " ".join(x['cuisines']), restaurants_data_list)
        return list(tags)
    else:
        return []

In [190]:
def calc_similarities(restaurant_menus): 
    # form vector for each resturant
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform(restaurant_menus)

    print(vectorizer.get_feature_names())
    # tf-idf on resturant menue matrix
    transformer = TfidfTransformer()
    tfidf = transformer.fit_transform(X)
    tfidf_array = tfidf.toarray()

    # calculate similarity with first resturant using cosine_similarity
    target = tfidf_array[0]
    similarities = linear_kernel([target], tfidf_array)

    return similarities

In [191]:
def rank_similarities(similarities, restaurant_ids):
    # form pairs of similarity-id, and find most similar restaurants
    similairties_ids = []
    similairties_array = similarities[0]

    for i in range(len(restaurant_ids)):
        similarity = similairties_array[i]
        id = restaurant_ids[i]
        similairty_id = (similarity, id) # construct tuple with similarity first
        similairties_ids.append(similairty_id)

    similairties_ids.sort(reverse=True)

    return similairties_ids

In [192]:

restaurants_info_json = get_restaurants_info()
print(restaurants_info_json)

{'result': {'totalResults': 100, 'data': [{'geo': {'lon': -74.001238, 'lat': 40.687549}, 'hours': 'Daily: 11:30am-11pm', 'address': {'city': 'BROOKLYN', 'formatted': '117 Columbia St BROOKLYN, NY 11231', 'street': '117 Columbia St', 'state': 'NY', 'postal_code': '11231'}, 'restaurant_phone': '(718) 237-4300', 'restaurant_id': 309898, 'price_range': '$$$', 'menus': [], 'price_range_100': 3, 'cuisines': ['Italian', 'Pasta', 'Pizza', 'Seafood'], 'restaurant_name': 'Casa Di Campagna'}, {'geo': {'lon': -73.994565, 'lat': 40.690155}, 'hours': ' ', 'address': {'city': 'Brooklyn', 'formatted': '156 Atlantic Ave Brooklyn, NY 11201', 'street': '156 Atlantic Ave', 'state': 'NY', 'postal_code': '11201'}, 'restaurant_phone': '(347) 689-4192', 'restaurant_id': 509610, 'price_range': '', 'menus': [], 'price_range_100': 0, 'cuisines': [], 'restaurant_name': 'Swallow Cafe'}, {'geo': {'lon': -73.994565, 'lat': 40.690155}, 'hours': 'Daily: 12pm-11pm', 'address': {'city': 'Brooklyn', 'formatted': '156 Atl

In [193]:
restaurant_ids = get_ids_from_resonse(restaurants_info_json)
print(restaurant_ids)

[309898, 509610, 250279, 250280, 150753, 489103, 386010, 250283, 367456, 284202, 150842, 490544, 150786, 250266, 379346, 150796, 150799, 356581, 250267, 363672, 300146, 355106, 250286, 250285, 150803]


In [194]:
restaurant_tags = get_tags_from_resonse(restaurants_info_json)
print(restaurant_tags)

['Italian Pasta Pizza Seafood', '', 'Mediterranean Middle Eastern', 'Middle Eastern Pizza', 'Thai', '', 'Cocktails', 'Chinese', 'Coffee &amp; Tea', 'American Barbecue', 'Indian', '', 'Salads Sandwiches Smoothies &amp; Juices', 'Deli Food Sandwiches', 'Italian Pizza', 'Italian Pizza', 'Thai Vegetarian', 'American', 'Seafood', 'Italian', 'Italian Pizza Sandwiches', 'American (New) Italian', 'Coffee &amp; Tea Sandwiches', 'American Deli Food', 'Mediterranean']


In [195]:
similairties = calc_similarities(restaurant_tags)
print(similairties)

['american', 'amp', 'barbecue', 'chinese', 'cocktails', 'coffee', 'deli', 'eastern', 'food', 'indian', 'italian', 'juices', 'mediterranean', 'middle', 'new', 'pasta', 'pizza', 'salads', 'sandwiches', 'seafood', 'smoothies', 'tea', 'thai', 'vegetarian']
[[1.         0.         0.         0.20404026 0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.57876014 0.57876014 0.         0.
  0.54089159 0.39583744 0.45558637 0.18279257 0.         0.
  0.        ]]


In [196]:
similairties_ids = rank_similarities(similairties, restaurant_ids)
print(similairties_ids)

[(0.9999999999999999, 309898), (0.57876013791234, 379346), (0.57876013791234, 150796), (0.540891585779221, 250267), (0.45558637404811614, 300146), (0.3958374417599965, 363672), (0.2040402600729245, 250280), (0.1827925704504811, 355106), (0.0, 509610), (0.0, 490544), (0.0, 489103), (0.0, 386010), (0.0, 367456), (0.0, 356581), (0.0, 284202), (0.0, 250286), (0.0, 250285), (0.0, 250283), (0.0, 250279), (0.0, 250266), (0.0, 150842), (0.0, 150803), (0.0, 150799), (0.0, 150786), (0.0, 150753)]
