In [1]:
import pandas as pd
import re
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
from geopy.distance import geodesic
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

In [2]:
# Data Collection
yelp_data_file = 'Restaurants_Yelp_With_Lat.csv'
yelp_df = pd.read_csv(yelp_data_file)

yelp_df.head()

Unnamed: 0,restaurant_name,price_range,rating,total_reviews,category,location,province,address,Latitude,Longitude,Province
0,The Flying Pig,$$,4.0,1300,Canadian (New),vancouver,BC,"1168 Hamilton Street Unit 104 Vancouver, BC V6...",49.275039,-123.122038,BC
1,Kingyo,$$,4.4,1100,Japanese,vancouver,BC,"871 Denman Street Vancouver, BC V6G 2L9 Canada",49.290612,-123.137095,BC
2,Miku,$$$,4.4,2200,Japanese,vancouver,BC,"200 Granville Street Suite 70 Vancouver, BC V6...",49.286826,-123.112583,BC
3,Kosoo,$$,4.3,88,Korean,vancouver,BC,"1128 Robson Street Vancouver, BC V6E 1B2 Canada",49.284988,-123.125959,BC
4,Dinesty Dumpling House,$$,3.9,928,Chinese,vancouver,BC,"1719 Robson Street Vancouver, BC V6G 1C8 Canada",49.29078,-123.134073,BC


In [3]:
print(yelp_df.columns)

Index(['restaurant_name', 'price_range', 'rating', 'total_reviews', 'category',
       'location', 'province', 'address', 'Latitude', 'Longitude', 'Province'],
      dtype='object')


In [4]:
len(yelp_df)


1157

In [5]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder
from geopy.distance import geodesic

In [26]:
def recommendation(yelp_df, chosen_category):
    # Filter the DataFrame based on the chosen category
    filtered_df = yelp_df[yelp_df['category'] == chosen_category].copy()

    # Sort the DataFrame based on rating (higher rating means more similar to the chosen category)
    filtered_df.sort_values(by='rating', ascending=False, inplace=True)

    # print("Filtered and Sorted DataFrame Base on Rating:")


    return filtered_df

chosen_category = 'Chinese'

recommended_items = recommendation(yelp_df, chosen_category)
recommended_items.head()

Unnamed: 0,restaurant_name,price_range,rating,total_reviews,category,location,province,address,Latitude,Longitude,Province
793,S R Kitchen,$$,4.7,3,Chinese,regina,SK,"6843 Rochdale Boulevard Regina, SK S4X 2Z2 Canada",50.494107,-104.679286,SK
1084,New Golden Inn Restaurant,$$,4.6,7,Chinese,winnipeg,MB,"1084 Pembina Hwy Winnipeg, MB R3T 1Z9 Canada",49.849055,-97.152322,MB
729,The Dumpling Hut,$$,4.5,99,Chinese,montreal,QC,"3591 Rue Clark Montreal, QC H2X 2R9 Canada",45.513283,-73.572578,QC
46,Chinatown BBQ,$$,4.5,261,Chinese,vancouver,BC,"130 E Pender Street Vancouver, BC V6A 1T3 Canada",49.280187,-123.101485,BC
80,Micha,$$,4.4,42,Chinese,vancouver,BC,"120-4751 Garden City Road Richmond, BC V6X 3M7...",49.177868,-123.125541,BC


In [24]:
def get_coordinates(location):
    geolocator = Nominatim(user_agent="geoapiExercises")
    location = geolocator.geocode(location)
    if location:
        return location.latitude, location.longitude
    else:
        return None

def geographic_proximity_recommendation(yelp_df, user_location, user_province):
    # Get the user's latitude and longitude from the location string
    user_coordinates = get_coordinates(user_location)
    if user_coordinates is None:
        raise ValueError("Invalid user location.")

    # Calculate distance to the user's location for each restaurant
    yelp_df['Distance'] = yelp_df.apply(lambda row: geodesic(user_coordinates,
                                    (row['Latitude'], row['Longitude'])).kilometers, axis=1)

    # DataFrame sorted based on distance and rating
    yelp_df.sort_values(by=['Distance', 'rating'], ascending=[True, False], inplace=True)

    # Drop the Distance column as it's no longer needed in the final result
    yelp_df.drop(columns=['Distance'], inplace=True)

    return yelp_df

# User's location as a string
user_location = "Montreal"
user_province = "Quebec"

# Call the function to get geographic proximity recommendations sorted by rating
proximity_recommendations = geographic_proximity_recommendation(yelp_df, user_location, user_province)
proximity_recommendations.head()

Unnamed: 0,restaurant_name,price_range,rating,total_reviews,category,location,province,address,Latitude,Longitude,Province
713,Escondite,$$,4.1,245,Bars,montreal,QC,"1206 Avenue Union Montreal, QC H3B 3A7 Canada",45.502992,-73.567894,QC
650,Il Focolaio,$$,4.3,477,Pizza,montreal,QC,"1223 Rue du Square-Phillips Montreal, QC H3B 3...",45.504015,-73.568177,QC
682,Nouilles Zhonghua,$$,4.4,38,Noodles,montreal,QC,"908 Rue Sherbrooke O Montreal, QC H3A 1G3 Canada",45.503298,-73.574944,QC
673,Bar George,$$,4.3,222,British,montreal,QC,"1440 Rue Drummond Montreal, QC H3G 1V9 Canada",45.499069,-73.575749,QC
653,Gyu-Kaku Japanese BBQ,$$$,4.4,195,Japanese,montreal,QC,"1255 Rue Crescent Montreal, QC H3G 2B1 Canada",45.49697,-73.575057,QC


In [27]:
def hybrid_recommendation(yelp_df, chosen_category, user_location, user_province):
    # Get content-based recommendations
    content_based_df = recommendation(yelp_df, chosen_category)

    # Get geographic proximity recommendations
    proximity_df = geographic_proximity_recommendation(yelp_df, user_location, user_province)

    # Merge the two recommendation DataFrames based on the restaurant name
    recommended_df = pd.merge(content_based_df, proximity_df, on='restaurant_name', suffixes=('_content', '_proximity'))

    return recommended_df


In [29]:
chosen_category = 'Italian'
user_location = "toronto"
user_province = "ontario"


recommendations = hybrid_recommendation(yelp_df, chosen_category, user_location, user_province)

recommendations.head()

Unnamed: 0,restaurant_name,price_range_content,rating_content,total_reviews_content,category_content,location_content,province_content,address_content,Latitude_content,Longitude_content,...,price_range_proximity,rating_proximity,total_reviews_proximity,category_proximity,location_proximity,province_proximity,address_proximity,Latitude_proximity,Longitude_proximity,Province_proximity
0,Lasagne Du Plateau,$$,4.9,8,Italian,montreal,QC,"5007, Avenue Du Parc Montreal, QC H2V 4E9 Canada",45.519349,-73.594662,...,$$,4.9,8,Italian,montreal,QC,"5007, Avenue Du Parc Montreal, QC H2V 4E9 Canada",45.519349,-73.594662,QC
1,Pasta da Panza,$$,4.8,47,Italian,montreal,QC,"3825 Boulevard Saint-Martin O Laval, QC H7T 1B...",45.552741,-73.758813,...,$$,4.8,47,Italian,montreal,QC,"3825 Boulevard Saint-Martin O Laval, QC H7T 1B...",45.552741,-73.758813,QC
2,Bar Bricco,$$$,4.6,57,Italian,edmonton,AB,"10347 Jasper Avenue Edmonton, AB T5J 1Y7 Canada",53.540748,-113.498722,...,$$$,4.6,57,Italian,edmonton,AB,"10347 Jasper Avenue Edmonton, AB T5J 1Y7 Canada",53.540748,-113.498722,AB
3,Tre Visi Cafe,$$,4.5,13,Italian,winnipeg,MB,"926 Grosvenor Avenue Winnipeg, MB R3M 2W9 Canada",49.87046,-97.163042,...,$$,4.5,13,Italian,winnipeg,MB,"926 Grosvenor Avenue Winnipeg, MB R3M 2W9 Canada",49.87046,-97.163042,MB
4,Il Terrazzo Ristorante,$$$,4.5,614,Italian,victoria,BC,"555 Johnson Street Victoria, BC V8W 1M2 Canada",48.427406,-123.36944,...,$$$,4.5,614,Italian,victoria,BC,"555 Johnson Street Victoria, BC V8W 1M2 Canada",48.427406,-123.36944,BC
