In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
import torch
from sklearn.feature_extraction.text import CountVectorizer
from transformers import BertModel, BertTokenizer
from math import radians, cos, sin, asin, sqrt, atan2


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_json('All_Category_embeddings.json')

In [3]:
df.head()

Unnamed: 0,name,address,gmap_id,description,latitude,longitude,category,avg_rating,num_of_reviews,price,hours,MISC,state,relative_results,url,category_embeddings
0,St Peter & Paul Church,"St Peter & Paul Church, 500 Main St, Karlsruhe...",0x52d94fbefa0e6353:0xf709e2d8674fe3a,,48.093248,-100.618664,Catholic church,4.9,7,,,{'Accessibility': ['Wheelchair accessible entr...,,"[0x52d9384b75abac93:0x13526f8266cae6cf, 0x52d9...",https://www.google.com/maps/place//data=!4m2!3...,"[0.2593699098, 0.0440241471, -0.35110336540000..."
1,Northwest Martial Arts Academy,"Northwest Martial Arts Academy, 1430 Main Ave,...",0x52c8cbe775edec7d:0xb46e15ed33643070,,46.875093,-96.802717,Martial arts school,5.0,8,,"[[Thursday, 7:30AM–8PM], [Friday, 7:30AM–8PM],...",{'Accessibility': ['Wheelchair accessible entr...,Closed ⋅ Opens 7:30AM,"[0x52c8ccbcb1785327:0x2d50311eabd7afc, 0x52cf3...",https://www.google.com/maps/place//data=!4m2!3...,"[0.0723896474, -0.1581346691, -0.2116216719, -..."
2,Thad's Amazing Magic - Fargo Birthday Party Magic,Thad's Amazing Magic - Fargo Birthday Party Ma...,0x52c8cd270f50bbbb:0x4ee4629598a8090e,,46.812415,-96.856729,Magician,5.0,58,,"[[Thursday, Open 24 hours], [Friday, Open 24 h...",{'Amenities': ['Good for kids']},Open 24 hours,"[0x52c8c9613725e9ef:0xc628b86d8593e7e6, 0x52c8...",https://www.google.com/maps/place//data=!4m2!3...,"[0.0990275964, 0.12004911900000001, -0.3748326..."
3,Thad's Amazing Magic - Fargo Birthday Party Magic,Thad's Amazing Magic - Fargo Birthday Party Ma...,0x52c8cd270f50bbbb:0x4ee4629598a8090e,,46.812415,-96.856729,Children's party service,5.0,58,,"[[Thursday, Open 24 hours], [Friday, Open 24 h...",{'Amenities': ['Good for kids']},Open 24 hours,"[0x52c8c9613725e9ef:0xc628b86d8593e7e6, 0x52c8...",https://www.google.com/maps/place//data=!4m2!3...,"[-0.1832211912, -0.401704371, -0.1263380498, -..."
4,Threefold,"Threefold, 212 W Main Ave, Bismarck, ND 58501",0x52d7836b7314da5d:0xc3cc63667b8c13a0,,46.805707,-100.79299,Film production company,5.0,5,,"[[Wednesday, 9AM–6PM], [Thursday, 9AM–6PM], [F...",,Closed ⋅ Opens 9AM Thu,"[0x52d7836c2b519b77:0x74c84187e38f42b, 0x52d78...",https://www.google.com/maps/place//data=!4m2!3...,"[0.3351577818, 0.23361778260000002, -0.4228747..."


In [4]:
df.shape

(27057, 16)

### Implementing the BERT Pretrained Model from Hugging face

In [6]:
# Load pre-trained BERT model and tokenizer

model = BertModel.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")


- Converting the user input in to the embedding and returning it

In [7]:
def convert_user_input(user_input):
    tokens = tokenizer(user_input, truncation=True, padding=True, max_length=128, return_tensors="pt")

    with torch.no_grad():
        outputs = model(**tokens)

    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

- Function find_business(), which takes the input from the user, convert that text into the embedding and finding the cosine similarilty between that user input and all the businesses in the category_embeddings.

In [10]:
def find_business(user_input):
    user_vector = convert_user_input(user_input)
    similar_business = []
    for categories in df['category_embeddings']:
        user_vector_array = np.array(user_vector).reshape(1, -1)
        categories_array = np.array(categories).reshape(1, -1)
        
        cosine_similarities = cosine_similarity(user_vector_array, categories_array)
        similar_business.append(cosine_similarities.max())

    similar_business = np.where(np.array(similar_business) >= 0.8)[0]
    similar_business_df = df.iloc[similar_business]
    return similar_business_df

In [11]:
df['category_embeddings'].isnull().sum()

57

In [12]:
df = df.dropna(subset=['category_embeddings'])

In [19]:
df_ans = find_business('Restaurant')
df_ans.head(5)

Unnamed: 0,name,address,gmap_id,description,latitude,longitude,category,avg_rating,num_of_reviews,price,hours,MISC,state,relative_results,url,category_embeddings
2,Thad's Amazing Magic - Fargo Birthday Party Magic,Thad's Amazing Magic - Fargo Birthday Party Ma...,0x52c8cd270f50bbbb:0x4ee4629598a8090e,,46.812415,-96.856729,Magician,5.0,58,,"[[Thursday, Open 24 hours], [Friday, Open 24 h...",{'Amenities': ['Good for kids']},Open 24 hours,"[0x52c8c9613725e9ef:0xc628b86d8593e7e6, 0x52c8...",https://www.google.com/maps/place//data=!4m2!3...,"[0.0990275964, 0.12004911900000001, -0.3748326..."
19,Branded Envy,"Branded Envy, 919 E Main Ave, Bismarck, ND 58501",0x52d7833d945df3a3:0x736faecb113ed740,,46.805034,-100.77753,Boutique,4.3,7,,"[[Wednesday, Closed], [Thursday, Closed], [Fri...","{'Service options': ['In-store pickup', 'In-st...",Closed ⋅ Opens 10AM Sat,"[0x52d7824af95579a5:0x2816ab9499d07645, 0x52d7...",https://www.google.com/maps/place//data=!4m2!3...,"[0.1697109491, -0.2387706041, -0.182652995, 0...."
21,Branded Envy,"Branded Envy, 919 E Main Ave, Bismarck, ND 58501",0x52d7833d945df3a3:0x736faecb113ed740,,46.805034,-100.77753,Jewelry store,4.3,7,,"[[Wednesday, Closed], [Thursday, Closed], [Fri...","{'Service options': ['In-store pickup', 'In-st...",Closed ⋅ Opens 10AM Sat,"[0x52d7824af95579a5:0x2816ab9499d07645, 0x52d7...",https://www.google.com/maps/place//data=!4m2!3...,"[0.4626559615, -0.2025493979, -0.2480148673, -..."
28,Dollar General,"Dollar General, 1108 4th St SE, Crosby, ND 58730",0x53204b41b9163a9b:0x616d7ca74d5592c8,Bargain retail chain selling a range of househ...,48.900314,-103.291145,Dollar store,4.5,14,,"[[Wednesday, 8AM–10PM], [Thursday, 8AM–10PM], ...","{'Service options': ['In-store shopping', 'Del...",Open ⋅ Closes 10PM,,https://www.google.com/maps/place//data=!4m2!3...,"[0.47631388900000005, -0.1689469963, -0.226667..."
30,Dollar General,"Dollar General, 1108 4th St SE, Crosby, ND 58730",0x53204b41b9163a9b:0x616d7ca74d5592c8,Bargain retail chain selling a range of househ...,48.900314,-103.291145,Discount store,4.5,14,,"[[Wednesday, 8AM–10PM], [Thursday, 8AM–10PM], ...","{'Service options': ['In-store shopping', 'Del...",Open ⋅ Closes 10PM,,https://www.google.com/maps/place//data=!4m2!3...,"[0.403291285, -0.3845753968, -0.0526043251, 0...."
