In [1]:
from ipywidgets import widgets
import recommender

Using TensorFlow backend.


In [4]:
# Extract embeddings
city_weights = recommender.get_embeddings("city_embedding")
country_weights = recommender.get_embeddings("country_embedding")
hotel_weights = recommender.get_embeddings("hotel_embedding")
rating_weights = recommender.get_embeddings("rating_embedding")
popularity_weights = recommender.get_embeddings("popularity_embedding")
locality_weights = recommender.get_embeddings("locality_embedding")
price_weights = recommender.get_embeddings("price_embedding")
landmark_weights = recommender.get_embeddings("landmark_embedding")

hotels_df = recommender.get_df()

# Mapping items to integers with get_int_mapping().
city_index, index_city, unique_cities = recommender.get_int_mapping(hotels_df, "city")
country_index, index_country, unique_countries = recommender.get_int_mapping(hotels_df, "country")
hotel_index, index_hotel, unique_hotels = recommender.get_int_mapping(hotels_df, "hotel_name")
locality_index, index_locality, unique_localities = recommender.get_int_mapping(hotels_df, "locality")
landmark_index, index_landmark, unique_landmarks = recommender.get_int_mapping(hotels_df, "landmark")
price_index, index_price, unique_prices = recommender.get_int_mapping(hotels_df, "price")
rating_index, index_rating, unique_ratings = recommender.get_int_mapping(hotels_df, "rating")
popularity_index, index_popularity, unique_popularities = recommender.get_int_mapping(hotels_df, "popularity_rating")

KeyboardInterrupt: 

In [12]:
def find_similar(name, weights, index_name = "hotel_name", n = 10, plot = True, 
                 filtering = False, filter_name = None):
    """ Return most similar items """
    
    index = hotel_index
    rindex = index_hotel
    
    # Select index and reverse index
    if index_name == "city":
        index = city_index
        rindex = index_city
    if index_name == "country":
        index = country_index
        rindex = index_country
    if index_name == "rating":
        index = rating_index
        rindex = index_rating
    if index_name == "popularity_rating":
        index = popularity_index
        rindex = index_popularity
    if index_name == "locality":
        index = locality_index
        rindex = index_locality
    if index_name == "price":
        index = price_index
        rindex = index_price
    if index_name == "landmark":
        index = landmark_index
        rindex = index_landmark

    # Check name is in index
    try:
        # Calculate dot product between item/property and all others
        distances = np.dot(weights, weights[index[name]])
    except KeyError:
        print(" {} Not Found.".format(name))
        return
    
    # Sort distances from smallest to largest
    sorted_distances = np.argsort(distances)
        
    # Find the most similar
    closest = sorted_distances[-n:]

    # Limit results by filtering
    filter_ = None
    hotel_name = []
    city = []
    country = []
    url = []
    landmark = []
    locality = []
    rating = []

    # Limit results by filtering
    filter_ = None
    filtered_results = []
    if filtering:
        for idxs, rows in hotels_df.iterrows():
            if hotels_df.at[idxs, index_name] == name:
                filter_ = hotels_df.at[idxs, filter_name]
                break
        match_df = hotels_df[hotels_df[filter_name].str.match(filter_)]
        match_df = match_df.reset_index(drop = True)
        match_df["distance"] = None
        for idxs, rows in match_df.iterrows():
            item = match_df.at[idxs, index_name]
            distance = np.dot(weights[index[item]], weights[index[name]])
            match_df.loc[match_df.index[idxs], "distance"] = distance
        match_df = match_df.sort_values(by = ["distance"], axis = 0, ascending = False)
        list_of_filtered_items = match_df[index_name].to_list()
        list_of_filtered_distances = match_df["distance"].to_list()
        list_of_filtered_results = list(zip(list_of_filtered_items, list_of_filtered_distances))
        for item in list_of_filtered_results[1:]:
            if item not in filtered_results:
                filtered_results.append(item)     
        if plot:
            # Find closest and most far away item
            closest = filtered_results[:n // 2]
            far_away = filtered_results[-n-1: len(filtered_results) - 1]
            to_plot = [c[0] for c in closest]
            to_plot.extend(c[0] for c in far_away)

            # Find distances 
            dist = [c[1] for c in closest]
            dist.extend(c[1] for c in far_away)  

            # Colors
            colors = ["darkturquoise" for _ in range(n)]
            colors.extend("hotpink" for _ in range(n // 2))

            # Data in DataFrame
            data = pd.DataFrame({"distance": dist}, index = to_plot)

            # Bar chart
            data["distance"].plot.barh(color = colors, figsize = (10, 8), edgecolor = "k", linewidth = 2)
            plt.xlabel("Cosine Similarity");
            plt.axvline(x = 0, color = "k");

            # Title
            name_str = "Most and Least Similar to {}".format(name)
            plt.title(name_str, x = 0.2, size = 28, y = 1.05)
            return None
        
        return None

    # Plot results
    if plot:
        # Find closest and most far away item
        far_away = sorted_distances[:n // 2]
        closest = sorted_distances[-n-1: len(distances) - 1]
        to_plot = [rindex[c] for c in far_away]
        to_plot.extend(rindex[c] for c in closest)
        
        # Find distances 
        dist = [distances[c] for c in far_away]
        dist.extend(distances[c] for c in closest)
        
        # Colors
        colors = ["hotpink" for _ in range(n // 2)]
        colors.extend("darkturquoise" for _ in range(n))
        
        # Data in DataFrame
        data = pd.DataFrame({"distance": dist}, index = to_plot)
        
        # Bar chart
        data["distance"].plot.barh(color = colors, figsize = (10, 8), edgecolor = "k", linewidth = 2)
        plt.xlabel("Cosine Similarity");
        plt.axvline(x = 0, color = "k");
        
        # Title
        name_str = "Most and Least Similar to {}".format(name)
        plt.title(name_str, x = 0.2, size = 28, y = 1.05)
        
        return None

In [11]:
# Name of item
text_name = widgets.Text(description = "Name: ")

# Weights
menu_weights = widgets.Dropdown(options = ["city_weights", "country_weights", "hotel_weights", "rating_weights",
                                  "popularity_weights", "locality_weights", "price_weights", "landmark_weights"],
                        value = "hotel_weights", description = "Weights: ")

# Type
menu_type = widgets.Dropdown(options = ["city", "country", "hotel_name", "rating",
                                  "popularity_rating", "locality", "price", "landmark"],
                        value = "hotel_name", description = "Type: ")

# Button
button = widgets.Button(description = "take.me.there")
display(button)
button.style.button_color = "mediumorchid"

# Results
label_six = widgets.Label("Recommendations: ")
recommendation = find_similar(text_name.value, menu_weights.value, text_type.value, 
                                          filtering = False, filter_name = None)
label_six.value = recommendation
button.on_click(recommendation)

box = widgets.VBox([text_name, menu_weights, menu_type, button, label_six])
display(box)

Label(value='Name')

Text(value='')

Label(value='Weights')

Text(value='')

Label(value='Type')

Text(value='')

Label(value='Filtering')

Text(value='')

Label(value='Filter Name')

Text(value='')

Button(description='take.me.there', style=ButtonStyle())

Label(value='Recommendations: ')

ValueError: too many values to unpack (expected 2)