# **1. Import necessary libraries**

In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# **2. Load the dataset and display basic information**

In [2]:
# Load the dataset
data = pd.read_csv("TripAdvisor_RestaurantRecommendation.csv")
print(data.head())

                            Name       Street Address  \
0  Betty Lou's Seafood and Grill     318 Columbus Ave   
1              Coach House Diner        55 State Rt 4   
2               Table Talk Diner  2521 South Rd Ste C   
3                    Sixty Vines     3701 Dallas Pkwy   
4                   The Clam Bar    3914 Brewerton Rd   

                       Location                                          Type  \
0  San Francisco, CA 94133-3908   Seafood, Vegetarian Friendly, Vegan Options   
1     Hackensack, NJ 07601-6337          Diner, American, Vegetarian Friendly   
2   Poughkeepsie, NY 12601-5476          American, Diner, Vegetarian Friendly   
3          Plano, TX 75093-7777       American, Wine Bar, Vegetarian Friendly   
4            Syracuse, NY 13212                        American, Bar, Seafood   

            Reviews No of Reviews  \
0  4.5 of 5 bubbles   243 reviews   
1    4 of 5 bubbles    84 reviews   
2    4 of 5 bubbles   256 reviews   
3  4.5 of 5 bubbles   

In [3]:
# Print dataset information
print("Dataset Information:\n")
print(f"Number of rows: {data.shape[0]}")
print(f"Number of columns: {data.shape[1]}")
print(f"\nColumns:\n{', '.join(data.columns)}")
print(f"\nData Types:\n{data.dtypes}\n")

# Print summary statistics
print("Summary Statistics:\n")
print(data.describe().to_string())

Dataset Information:

Number of rows: 3062
Number of columns: 11

Columns:
Name, Street Address, Location, Type, Reviews, No of Reviews, Comments, Contact Number, Trip_advisor Url, Menu, Price_Range

Data Types:
Name                object
Street Address      object
Location            object
Type                object
Reviews             object
No of Reviews       object
Comments            object
Contact Number      object
Trip_advisor Url    object
Menu                object
Price_Range         object
dtype: object

Summary Statistics:

                   Name   Street Address                   Location                                          Type           Reviews No of Reviews                                                                                                                                                                                               Comments Contact Number                                                                                                

# **3. Preprocess the data**

In [4]:
# Select relevant columns
data = data[["Name", "Type"]]
print(data.head())

                            Name                                          Type
0  Betty Lou's Seafood and Grill   Seafood, Vegetarian Friendly, Vegan Options
1              Coach House Diner          Diner, American, Vegetarian Friendly
2               Table Talk Diner          American, Diner, Vegetarian Friendly
3                    Sixty Vines       American, Wine Bar, Vegetarian Friendly
4                   The Clam Bar                        American, Bar, Seafood


In [5]:
# Check for missing values
print(data.isnull().sum())

Name     0
Type    13
dtype: int64


In [6]:
# Drop rows with missing values
data = data.dropna()

# **4. Compute the similarity matrix and create a reverse mapping of restaurant names and DataFrame indices**

In [7]:
# Initialize the TfidfVectorizer and fit_transform the 'Type' column
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(data["Type"])

# Compute the cosine similarity matrix
similarity = cosine_similarity(tfidf_matrix)

In [8]:
# Create a reverse mapping of restaurant names and DataFrame indices
indices = pd.Series(data.index, index=data['Name']).drop_duplicates()

# **5. Define the restaurant recommendation function and test the recommendation function**

In [9]:
# Define the restaurant recommendation function
def restaurant_recommendation(name, similarity=similarity, indices=indices):
    if name not in indices:
        print(f"The restaurant '{name}' is not found in the dataset. Please try again with a valid restaurant name.")
        return None
    index = indices[name]
    similarity_scores = list(enumerate(similarity[index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[1:11] # Exclude the restaurant itself
    restaurant_indices = [i[0] for i in similarity_scores]
    return data['Name'].iloc[restaurant_indices]

In [10]:
# Test the recommendation function
print("Recommended restaurants similar to 'Sixty Vines':\n", restaurant_recommendation("Sixty Vines"))

Recommended restaurants similar to 'Sixty Vines':
 85                             Seasons 52
553                        Brix Wine Cafe
636          Red House Beer & Wine Shoppe
825                       Wink Restaurant
2616      Thirsty Owl Wine Bar and Bistro
2633                       The River Cafe
2994                       Swing Wine Bar
529                         CUCINA urbana
737     Bellanico Restaurant and Wine Bar
1341    Spuntino Wine Bar & Italian Tapas
Name: Name, dtype: object
