In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModel
import torch
import spacy

# 1️⃣ Load the hotel data and embeddings
hotel_data = pd.read_csv("dataset/hotel_data2.csv")
embeddings = np.load("dataset/hotel_embeddings2.npy")

# 2️⃣ Load the pre-trained model and tokenizer for generating user query embeddings
model_name = "roberta-base"  # or 'microsoft/deberta-v3-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 3️⃣ Load spaCy's pre-trained NER model
nlp = spacy.load("en_core_web_trf")

# 4️⃣ Function to extract location from user query using spaCy
def extract_location(text):
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == "GPE":  # GPE = Geopolitical Entity (countries, cities, states)
            return ent.text
    return None

# 5️⃣ Function to generate embeddings for the user query
def get_embedding(text):
    tokens = tokenizer(text, padding=True, truncation=True, return_tensors="pt").to(device)
    with torch.no_grad():
        output = model(**tokens)
    embedding = output.last_hidden_state[:, 0, :].cpu().numpy()
    return embedding.squeeze()

# 6️⃣ Function to recommend hotels based on user query
def recommend_hotels(user_query, top_n=20):
    # Extract location from the user query
    location = extract_location(user_query)
    if location:
        # Filter hotels based on location
        filtered_data = hotel_data[hotel_data['cityName'].str.contains(location, case=False, na=False)]
        if filtered_data.empty:
            print(f"No hotels found for location: {location}")
            return None
    else:
        print("No location found in the query. Showing results from all locations.")
        filtered_data = hotel_data

    # Generate embedding for the user query
    user_embedding = get_embedding(user_query).reshape(1, -1)

    # Calculate cosine similarity between user query and hotel embeddings
    similarities = cosine_similarity(user_embedding, embeddings[filtered_data.index])[0]

    # Get the top N most similar hotels
    top_indices = similarities.argsort()[-top_n:][::-1]

    # Return the top N hotels with their details and similarity scores
    recommendations = filtered_data.iloc[top_indices].copy()
    recommendations['similarity_score'] = similarities[top_indices]
    return recommendations




  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# 7️⃣ Example Usage
user_query = "looking for hotel with swimming pool"
recommended_hotels = recommend_hotels(user_query)

# Display the recommendations
if recommended_hotels is not None:
    print(recommended_hotels[['HotelName', 'cityName', 'similarity_score']])

No location found in the query. Showing results from all locations.
                                        HotelName  \
32105                               Kunwar Haveli   
8136                  Fabhotel Evlewt Omr Chennai   
33095                           Hotel Gokul Grand   
23368        FabHotel Golden Park Jogeshwari West   
5598        V Resorts Bear Mountain Jungle Resort   
7709                            Hotel Darbar Farm   
9918                            Hotel Kailash Inn   
27909                      Hotel Indrayani Motels   
27199                      Hotel Indrayani Motels   
29059                      Amans Annabella Resort   
1467                       Amans Annabella Resort   
14855              FabHotel Vibrant Jubilee Hills   
24691                      Casa Clarks INN Mysore   
3776                          Hotel Nandana Vista   
31753  Treebo Al Saj Convention Centre Trivandrum   
7949            FabHotel Stay Eeasy Thiruvanmiyur   
7989                        Koh

In [7]:
hotel_data

Unnamed: 0,cityName,HotelCode,HotelName,HotelRating,Address,Description,FaxNumber,HotelFacilities,Map,PhoneNumber,PinCode,HotelWebsiteUrl,clean_text
0,Abu Road,1416145,Hotel Bhagwati,ThreeStar,Mount Road Talhati307026 Abu RoadRajasthan,Ideally located in the prime touristic area of...,,available in all rooms getting around for the ...,24.46923673216729|72.76929729614255,+912974228274,307026,http://www.hotelbhagwati.org,available in all rooms getting around for the ...
1,Abu Road,1505601,Hotel Royal Heritage,TwoStar,Near Arbud School Abu Road 307026Rajasthan,"Set in a prime location of Mount Abu, Hotel Ro...",+912974221199,available in all rooms languages spoken access...,24.4786369881006|72.7805074454227,+919166889966|+919928370538,307026,http://www.royalheritageabu.com,available in all rooms languages spoken access...
2,Abu Road,1512652,Hotel Bhumi,TwoStar,Mount Road Opposite Sai Baba Mandir Abu Road O...,<p>HeadLine : In Abu Road</p><p>Location : Wit...,,Daily Smoke-free property Ballroom Free newspa...,24.49442|72.793,91-93-13931393,307026,http://www.hotelbhumi.com,daily smokefree property ballroom free newspap...
3,Abu Road,1524275,Hotel Chandrawati Palace,ThreeStar,Sp 2H Ambaji Industrial Areapalanpur Highway A...,"Hotel Chandrawati Palace has a garden, shared ...",,Parking Restaurant Room service meeting/banque...,24.4596|72.76549,912974226037,307026,https://www.booking.com/hotel/in/chandrawati-p...,parking restaurant room service meetingbanquet...
4,Abu Road,1529822,OYO 3784 Hotel Durga,ThreeStar,Saini Market Outside Gurunanak Colony Abu Road...,<p>HeadLine : In Abu Road</p><p>Location : Wit...,,Designated smoking areas,24.481725|72.780791,91-9313931393,307026,http://www.hoteldurgaabu.com,designated smoking areas
...,...,...,...,...,...,...,...,...,...,...,...,...,...
33726,"Yercaud, Tamil Nadu",6188176,Kolaahalam Mainland Resorts & Spa,ThreeStar,GH Road Opposite To Taluk Office,"Set in Yercaud, Kolaahalam Mainland Resorts & ...",,Parking Pets allowed Room service meeting/banq...,11.77861|78.21598,,636601,https://www.booking.com/hotel/in/mainland-reso...,parking pets allowed room service meetingbanqu...
33727,"Yercaud, Tamil Nadu",6188604,Sushmika Resorts Inn,All,Pakoda Point Road near sengadu,"Situated in Yercaud, Sushmika Resorts Inn feat...",,Parking Restaurant Pets allowed Room service m...,11.77012|78.2348,,636601,https://www.booking.com/hotel/in/sushmika-reso...,parking restaurant pets allowed room service m...
33728,"Yuksom, Sikkim",1414698,Hotel Red Palace Hotel & Resort,TwoStar,Yuksom Kothi. P.O Yuksom 737113West Sikkim,Ideally located in the prime touristic area of...,,things to do ways to relax dining drinking ...,27.3666477203369|88.2258148193359,,737113,http://ww.purikarimunhotel.com,things to do ways to relax dining drinking and...
33729,"Zirakpur, Punjab",1016043,Aloft Chandigarh Zirakpur,FourStar,Ambala Chandigarh National Highway 22 Zirakpur...,<p>HeadLine : In Zirakpur</p><p>Location : Wit...,+911762660444,Wheelchair-accessible on-site restaurant No ac...,30.638073|76.822716,91-1762-416666,140603,http://www.starwoodhotels.com/alofthotels/prop...,wheelchairaccessible onsite restaurant no acce...
