# Required Imports

In [1]:
import requests,json
import pandas as pd
import numpy as np
import re

# Geodesic function import
from functools import partial
import pyproj
from shapely.ops import transform
from shapely.geometry import Point

## Required parameters.

In [2]:
# Google Direction API
key = '***'
# Source name or coordinate (Vancouver)
source = '49.283031,-123.121216'
# Name of destination (hiking Place)
hiking_place = 'Lower Falls Trail'
# Replace space with +.
hiking_place = hiking_place.replace(' ','+')
# Add British columbia, canada in hiking place name.
des = hiking_place+'+BC+Canada'

print(f'Source: {source}\n Destination: {des}')

Source: 49.283031,-123.121216
 Destination: Lower+Falls+Trail+BC+Canada


## Load Restaurants Data.

In [52]:
tdf = pd.read_csv('/kaggle/input/all-bc-merged-new-data/All_BC_Merged_New_Data.csv')

In [53]:
tdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25301 entries, 0 to 25300
Data columns (total 29 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   Title                                    25279 non-null  object 
 1   Description                              8165 non-null   object 
 2   Tagline                                  0 non-null      float64
 3   Google Address                           21382 non-null  object 
 4   Latitude                                 21319 non-null  object 
 5   Longitude                                21319 non-null  object 
 6   Phone                                    20397 non-null  object 
 7   Email                                    7311 non-null   object 
 8   Website                                  18495 non-null  object 
 9   Twitter                                  0 non-null      float64
 10  Facebook                                 0 non

In [23]:
m_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4339 entries, 0 to 15496
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Title            4339 non-null   object
 1   lat              4339 non-null   object
 2   lng              4339 non-null   object
 3   Tags (Keywords)  4339 non-null   object
 4   Features         4339 non-null   object
 5   Category         4339 non-null   object
 6   id               4339 non-null   int64 
dtypes: int64(1), object(6)
memory usage: 271.2+ KB


In [77]:
# # Get required column
m_df = tdf[['Title','Latitude','Longitude','Tags (Keywords)','Features','Category']]
m_df['id']=np.arange(0,len(m_df),1) # create ids.
m_df['Latitude'] = m_df['Latitude'].replace(' ',np.nan)
m_df= m_df.dropna(subset=['Latitude'])
m_df = m_df.replace(np.nan,'') # drop blank coordinates rows.
m_df = m_df.rename(columns={'Latitude':'lat','Longitude':'lng'}) # Rename columns.
m_df['lat'] = pd.to_numeric(m_df['lat'],downcast="float")
m_df['lng'] = pd.to_numeric(m_df['lng'],downcast="float")

In [76]:
for each in m_df['lng']:
    try:
        float(each)
    except:
        print(each)

In [67]:
m_df[m_df['Longitude']==' ']

Unnamed: 0,Title,Latitude,Longitude,Tags (Keywords),Features,Category,id


In [70]:
m_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21315 entries, 0 to 21388
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Title            21315 non-null  object
 1   Latitude         21315 non-null  object
 2   Longitude        21315 non-null  object
 3   Tags (Keywords)  21315 non-null  object
 4   Features         21315 non-null  object
 5   Category         21315 non-null  object
 6   id               21315 non-null  int64 
dtypes: int64(1), object(6)
memory usage: 1.3+ MB


## Helping Function.

In [78]:
proj_wgs84 = pyproj.Proj('+proj=longlat +datum=WGS84')

# =================== Generate coordinates using points on route in circular form in range 7km.============#

def geodesic_point_buffer(lat, lon, r):
    '''
    Generate coordinates in circular form using lat, lon and range=radius(in km.)

    '''
    data =[]
    # Creating circle from 100 meter to 3.5 km with 200 meter increment in radius.
    for e_r in np.arange(0.1,r+0.1,0.2):
        
        aeqd_proj = '+proj=aeqd +lat_0={lat} +lon_0={lon} +x_0=0 +y_0=0'
        project = partial(
            pyproj.transform,
            pyproj.Proj(aeqd_proj.format(lat=lat, lon=lon)),
            proj_wgs84)
        buf = Point(0, 0).buffer(e_r * 1000)  # distance in metres
        data.append(transform(project, buf).exterior.coords[:])
    return data

## Main Function to filter restaurant.

In [79]:
def route_restaurants(source,des,m_df=m_df,r=3.5,key=key):
    '''
    source = '49.283031,-123.121216' or 'vancouver+bc+canada'.
    des = 'lat,lng' or 'place name with + seperator'.
    m_df = df of restaurants with their coordinates.
    r = radius in km if want circle of 5km r should be 2.5
    key = Direction API key.
    '''
    m="https://maps.googleapis.com/maps/api/directions/json?origin="+source+"&destination="+des+"&key="+key
    res = requests.get(m) # Response object.
    x = res.json() # Convert to json object
    
    # =============================Arrange Data in Structure form ==============================#
    # Assign midpoints in route.
    t = x['routes'][0]['legs'][0]['steps']
    # Structure data
    data = []
    for each in t:
        end_lat=each['end_location']['lat']
        end_lng=each['end_location']['lng']
        start_lat=each['start_location']['lat']
        start_lng=each['start_location']['lng']
    
        data.append([start_lat,start_lng,end_lat,end_lng])
    # create DataFrame
    df = pd.DataFrame(data,columns=['Start_lat','Start_lng','end_lat','end_lng'])
    # ========================= Convert latitude, longitude in list ==============================#
    lat=df['Start_lat'].to_list()
    lng=df['Start_lng'].to_list()
    e_lat=df['end_lat'].to_list()
    e_lng=df['end_lng'].to_list()
    
    # append last values
    lat.append(e_lat[-1])
    lng.append(e_lng[-1])
    
    
    # ================== Call Geodesic_point_buffer function for each coordinate in route(lat,lng) ===============#
    all_cor=[]
    for x,y in zip(lat,lng):
        b = geodesic_point_buffer(x, y, r)
        all_cor.append(b)
    
    # ================ Filter Restaurants basis of coordinates of each points ===========================#
    
    # Blank DataFrame to keep all filtered restaurants.
    main_df = pd.DataFrame()

    # each coordinate(point) in all coordinate(points) of route. ex Delhi to Mumbai have 20 stops(mid point).
    for e_cor in all_cor:
    # Flatten all circle coordinates(in range 7km) for single point on route.
        a_point_cor=[each for cir in e_cor for each in cir]
        # Create DataFrame to store single point all coordinates.
        new = pd.DataFrame(a_point_cor,columns=['lng','lat'])
        # Filter Restaurant DataFrame with range of min,max(lat,lng) of single point.
        new_df=m_df[(m_df['lat']>=min(new.lat)) & (m_df['lat']<=max(new.lat)) & (m_df['lng']>=min(new.lng)) & (m_df['lng']<=max(new.lng))]
        # Check if main_df blank assign filtered restaurants else concat with main_df.
        if len(main_df)==0:
            main_df=new_df
        else:
            main_df=pd.concat([main_df,new_df])
            
    # Drop Duplicate Restaurants data.
    final_res = main_df.drop_duplicates(subset=['id'])
    
    return final_res,lat,lng

## Call route_restaurant function to get filtered restaurant.

In [80]:
filtered_res,lat,lng = route_restaurants(source,des,m_df=m_df)
filtered_res

Unnamed: 0,Title,lat,lng,Tags (Keywords),Features,Category,id
96,Tim Hortons,49.279945,-123.117531,"doughnuts,timbits,breakfastsandwich,greatdonut...","Takeout,Seating,WheelchairAccessible,FreeWifi","Cafe,Fastfood,Canadian,Breakfast,",96
97,Tim Hortons,49.279945,-123.117531,"doughnuts,timbits,breakfastsandwich,greatdonut...","Takeout,Seating,WheelchairAccessible,FreeWifi","Cafe,Fastfood,Canadian,Breakfast,",97
98,Tim Hortons,49.279945,-123.117531,"doughnuts,timbits,breakfastsandwich,greatdonut...","Takeout,Seating,WheelchairAccessible,FreeWifi","Cafe,Fastfood,Canadian,Breakfast,",98
99,Tim Hortons,49.272133,-123.097710,"doughnuts,vancouverairport,timbits,combos,capp...","Takeout,Seating,WheelchairAccessible","Cafe,Fastfood,Canadian,Breakfast,",99
100,Tim Hortons,49.272133,-123.097710,"doughnuts,vancouverairport,timbits,combos,capp...","Takeout,Seating,WheelchairAccessible","Cafe,Fastfood,Canadian,Breakfast,",100
...,...,...,...,...,...,...,...
15145,China Kitchen Restaurant,49.216179,-122.589783,",best chinese food,likely good food,restaurant...","Takeout,Reservations,Seating,ServesAlcohol,Whe...","Chinese,Asian,Lunch,Dinner,",15145
15206,Kanaka Pizza,49.206753,-122.556900,",,",,"American,Indian,Pizza,Canadian,Central-Italian,,",15206
459,Tim Hortons,49.219948,-122.535110,",good coffee,good getting coffee,good cup coff...",,",Breakfast,Lunch,Dinner,",459
460,Tim Hortons,49.219948,-122.535110,",good coffee,best tim hortons,quick service fo...",,",Breakfast,Lunch,Dinner,",460


# Remove Duplicates.

In [83]:
final_res = filtered_res.drop_duplicates(subset=['Title'])
final_res.nunique()

Title              3209
lat                2428
lng                2514
Tags (Keywords)    2388
Features            946
Category           1453
id                 3209
dtype: int64

In [86]:
final_res.to_csv('filtered.csv',encoding='utf8',index=False)

# Recommendation

In [87]:
final_res = final_res.drop(columns=['id'])
final_res.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3209 entries, 96 to 15206
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Title            3209 non-null   object 
 1   lat              3209 non-null   float32
 2   lng              3209 non-null   float32
 3   Tags (Keywords)  3209 non-null   object 
 4   Features         3209 non-null   object 
 5   Category         3209 non-null   object 
dtypes: float32(2), object(4)
memory usage: 150.4+ KB


## Create Id

In [88]:
final_res['id']=np.arange(0,len(final_res),1) # create ids.

In [89]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfv = TfidfVectorizer(min_df=3, max_features=None,
                     strip_accents='unicode', analyzer='word', token_pattern=r'\w(l,)',
                     ngram_range=(1,3),
                     stop_words='english')

In [102]:
tfv_matrix_m_df = tfv.fit_transform(m_df['Tags (Keywords)'])

In [90]:
tfv_matrix = tfv.fit_transform(final_res['Tags (Keywords)'])

In [103]:
tfv_matrix_m_df.shape

(21315, 3)

In [104]:
from sklearn.metrics.pairwise import sigmoid_kernel

sig = sigmoid_kernel(tfv_matrix_m_df,tfv_matrix)

In [140]:
m_df = m_df.reset_index()
m_df = m_df.drop(columns=['index'])
m_df.head(2)

Unnamed: 0,Title,lat,lng,Tags (Keywords),Features,Category,id
0,Chartreuse Moose Cappuccino Bar & Bistro,51.64579,-121.295052,"wraps,breakfastwrap,beans,greatcoffee,coffeesh...","Takeout,Seating,HighchairsAvailable,Wheelchair...","Cafe,Canadian,Breakfast,Lunch,Brunch,Vegetaria...",0
1,Red Rock Grill,51.647846,-121.296371,"steak,wings,prawns,schnitzel,salad,fries,wingn...","Takeout,Reservations,Seating,ParkingAvailable,...","American,Bar,Canadian,Lunch,Dinner,Drinks,Vege...",1


In [141]:
indices = pd.Series(m_df.index, index=m_df['Title']).drop_duplicates()
indices

Title
Chartreuse Moose Cappuccino Bar & Bistro        0
Red Rock Grill                                  1
Firehouse Diner                                 2
GBR - Craft Burgers & Poutines                  3
BJ's Donuts & Eatery                            4
                                            ...  
Trailhead Cafe                              21310
Whitehorn Bistro                            21311
Outpost Pub                                 21312
Timberwolf Pizza & Pasta Cafe               21313
Laggan's Mountain Bakery & Delicatessen     21314
Length: 21315, dtype: int64

In [117]:
def give_rec(res,sig=sig):
    try:
        idx=indices[res]
    except:
        print('Restaurant not found!')
    
    sig_scores = list(enumerate(sig[idx]))
    
    sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)
    
    sig_scores = sig_scores
    
    res_ind = [i[0] for i in sig_scores]
    
    return m_df['Title'].iloc[res_ind]

In [109]:
sig[1][1]

0.7615941559557649

In [142]:
recommended_res_on_route = give_rec('Trailhead Cafe')

In [143]:
n_df = pd.DataFrame()
n_df['Res'] = recommended_res_on_route
n_df.head()

Unnamed: 0,Res
0,Chartreuse Moose Cappuccino Bar & Bistro
1,Red Rock Grill
2,Firehouse Diner
3,GBR - Craft Burgers & Poutines
4,BJ's Donuts & Eatery


In [144]:
n_df = n_df.drop_duplicates()
len(n_df)

122

In [145]:
recommended_res = final_res.loc[final_res['Title'].isin(n_df['Res'])]

In [146]:
recommended_res

Unnamed: 0,Title,lat,lng,Tags (Keywords),Features,Category,id
96,Tim Hortons,49.279945,-123.117531,"doughnuts,timbits,breakfastsandwich,greatdonut...","Takeout,Seating,WheelchairAccessible,FreeWifi","Cafe,Fastfood,Canadian,Breakfast,",0
1159,A&W Restaurant,49.282856,-123.11776,",,","Takeout,Seating","Fastfood,Lunch,Dinner,",1
1341,Subway,49.262814,-123.090919,",best subway,good food,good service,healthy fo...",,"Fastfood,Breakfast,Lunch,Dinner,",2
2552,Dairy Queen Grill & Chill,49.259438,-123.100792,",,",,"American,,",3
2633,White Spot,49.276051,-123.1278,"unlimitedfries,burger,salad,potatoes,pie,granv...","Takeout,Seating,HighchairsAvailable,Wheelchair...","American,Canadian,Breakfast,Lunch,Dinner,Brunc...",4
2786,Moxie's Grill & Bar,49.281181,-123.132172,"whitechocolatebrownie,burger,sirloin,wings,cae...","Takeout,Reservations,Seating,StreetParking,Tel...","American,Bar,International,Grill,Canadian,Lunc...",5
2798,Little Japan Restaurant,49.284679,-123.111893,",lovely place sushi,my favorite roll,",,",,",6
2813,Little Saigon,49.273792,-123.123055,",good dim sum,delicious spring roll,great serv...","Takeout,Seating,Reservations,WheelchairAccessi...","Asian,Vietnamese,Lunch,Dinner,VeganOptions,Veg...",7
2869,Meat & Bread,49.282619,-123.109535,"porchetta,sandwich,bread,pork,coleslaw,lunchru...","ParkingAvailable,ServesAlcohol,Takeout,Seating...","Fastfood,Canadian,Soups,British,Lunch,Drinks,V...",8
3160,Panago Pizza,49.25877,-123.100769,",many choice pizza,great service,hot friendly ...",,"Pizza,,",9


In [147]:
len(recommended_res)

14

# End Recommendation

In [136]:
pip install gmplot

Collecting gmplot
  Downloading gmplot-1.4.1-py3-none-any.whl (164 kB)
[K     |████████████████████████████████| 164 kB 882 kB/s eta 0:00:01
Installing collected packages: gmplot
Successfully installed gmplot-1.4.1
Note: you may need to restart the kernel to use updated packages.


In [137]:
# Step10: Plot Restaurants and Route on Map.
# import gmplot package 
import gmplot 

gmap3 = gmplot.GoogleMapPlotter(lat[1], 
								lng[1], 13) 
# scatter method of map object 
# scatter points on the google map 
gmap3.scatter(recommended_res.lat.to_list(), recommended_res.lng.to_list(), '#FF0000', 
							size = 30, marker = False ) 

# scatter points of route on the google map.
gmap3.scatter(lat,lng, 'purple', size=60, marker=True)
# Plot method Draw a line in 
# between given coordinates 
gmap3.plot(lat, lng,'cornflowerblue', edge_width = 2.5) 
# gmap.plot(latitudes, longitudes, 'cornflowerblue', edge_width=10)

gmap3.draw("All_Recommended_Res_in_route.html") 