In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# STEP 1: Load dataset from GitHub
url = "https://raw.githubusercontent.com/dhruvj27/HotelRecommendationSystemNPN/main/data/processed/preprocessed_hotel_data_final_new.csv"
df = pd.read_csv(url)

print("✅ Dataset loaded:", df.shape)

# STEP 2: Create a combined feature column
df['combined_features'] = (
    df['location'].astype(str) + " " +
    df['rating_description'].astype(str) + " " +
    df['star_rating'].astype(str)
)

# STEP 3: Convert text into TF-IDF vectors
vectorizer = TfidfVectorizer()
feature_matrix = vectorizer.fit_transform(df['combined_features'])

# STEP 4: Compute similarity between hotels
similarity = cosine_similarity(feature_matrix)

# STEP 5: Define recommendation function
def recommend(hotel_name, top_n=5):
    if hotel_name not in df['hotel_name'].values:
        return f"❌ Hotel '{hotel_name}' not found in dataset."

    idx = df[df['hotel_name'] == hotel_name].index[0]
    scores = list(enumerate(similarity[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)

    # Collect recommended hotels with details
    recommendations = df.iloc[[i[0] for i in scores[1:top_n+1]]][['hotel_name', 'city', 'price_per_night']]
    return recommendations

# STEP 6: Take user input
hotel_name = input("Enter a hotel name: ")

print("\nRecommended Hotels:\n")
print(recommend(hotel_name))


✅ Dataset loaded: (4632, 39)
Enter a hotel name: The Park Bangalore

Recommended Hotels:

                          hotel_name       city  price_per_night
14   Hyatt Centric MG Road Bangalore  Bangalore           6250.0
46    Royal Orchid Central Bangalore  Bangalore           5823.0
32             The Oberoi, Bengaluru  Bangalore          11500.0
42    Radisson Bengaluru City Center  Bangalore           7500.0
121                    The Residency    Chennai           5336.0
