In [4]:
import numpy as np
import pandas as pd
import string
import pickle
import warnings
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk.tokenize import WordPunctTokenizer

warnings.filterwarnings('ignore')
nltk.download('stopwords')

# Load the dataset
df = pd.read_csv('zomato.csv', encoding='latin-1')

# Select only relevant columns
zomato_data = df[['Restaurant Name', 'City', 'Address', 'Locality', 'Cuisines', 'Has Table booking', 'Has Online delivery', 'Aggregate rating', 'Rating text']]

# Filter out rows with missing values in important columns
zomato_data.dropna(subset=['City', 'Locality', 'Cuisines', 'Aggregate rating'], inplace=True)

# Define stopwords without punctuation
stop = []
for word in stopwords.words('english'):
    s = [char for char in word if char not in string.punctuation]
    stop.append(''.join(s))

# Text processing function
def text_process(mess):
    """Remove punctuation and stopwords, then return cleaned text."""
    nopunc = [char for char in mess if char not in string.punctuation]
    nopunc = ''.join(nopunc)
    return " ".join([word for word in nopunc.split() if word.lower() not in stop])

# Apply text processing to the 'Cuisines' column
zomato_data['Cuisines'] = zomato_data['Cuisines'].apply(text_process)

# Combine fields to create a textual profile for each restaurant, with weighted emphasis on rating and availability of services
zomato_data['profile'] = (
    zomato_data['City'] + " " +
    zomato_data['Locality'] + " " +
    zomato_data['Cuisines'] + " " +
    zomato_data['Rating text'] * 2 + " " +  # Increase weight of rating text in profile
    (zomato_data['Has Table booking'] + " " if zomato_data['Has Table booking'].str.lower().str.contains("yes").any() else "") +
    (zomato_data['Has Online delivery'] + " " if zomato_data['Has Online delivery'].str.lower().str.contains("yes").any() else "")
)

# Vectorization
vectorizer = TfidfVectorizer(tokenizer=WordPunctTokenizer().tokenize, max_features=5000)
profile_vectors = vectorizer.fit_transform(zomato_data['profile'])

# Matrix factorization setup (using aggregate rating)
P = pd.DataFrame(profile_vectors.toarray(), index=zomato_data.index, columns=vectorizer.get_feature_names_out())
zomato_data['Normalized Rating'] = zomato_data['Aggregate rating'] / zomato_data['Aggregate rating'].max()
Q = pd.DataFrame(P.values * zomato_data['Normalized Rating'].values[:, None], index=P.index, columns=P.columns)

# Matrix factorization function
def matrix_factorization(P, Q, steps=25, gamma=0.001, lamda=0.02):
    for step in range(steps):
        for i in P.index:
            for j in Q.columns:
                eij = P.loc[i].dot(Q[j]) - Q.loc[i, j]
                Q.loc[i] += gamma * (eij - lamda * Q.loc[i])
                Q[j] += gamma * (eij - lamda * P.loc[i])
        if np.linalg.norm(eij) < 0.001:
            break
    return P, Q

# Store model in pickle file
with open('zomato_recommendation_model.pkl', 'wb') as output:
    pickle.dump(P, output)
    pickle.dump(Q, output)
    pickle.dump(vectorizer, output)

# Dynamic input for recommendations
user_input = input("Enter your preferences (e.g., 'New Delhi Italian restaurant with online delivery or table booking'): ")

# Filter based on specific feature requests
filter_online_delivery = "online delivery" in user_input.lower()
filter_table_booking = "table booking" in user_input.lower()

if filter_online_delivery:
    zomato_data = zomato_data[zomato_data['Has Online delivery'].str.lower() == "yes"]
if filter_table_booking:
    zomato_data = zomato_data[zomato_data['Has Table booking'].str.lower() == "yes"]

# Process user input
if user_input:
    test_df = pd.DataFrame([user_input], columns=['text'])
    test_df['text'] = test_df['text'].apply(text_process)
    test_vectors = vectorizer.transform(test_df['text'])
    test_v_df = pd.DataFrame(test_vectors.toarray(), columns=vectorizer.get_feature_names_out())

    # Predict item ratings
    predict_item_rating = pd.DataFrame(np.dot(test_v_df.loc[0], Q.T), index=Q.index, columns=['Rating'])
    top_recommendations = predict_item_rating.sort_values(by='Rating', ascending=False).head(5)

    # Display recommendations with priority for restaurants that meet specific criteria
    print("Top restaurant recommendations based on your input:")
    for i in top_recommendations.index:
        print(f"Restaurant Name: {zomato_data.loc[i, 'Restaurant Name']}")
        print(f"Address: {zomato_data.loc[i, 'Address']}")
        print(f"City: {zomato_data.loc[i, 'City']}, Locality: {zomato_data.loc[i, 'Locality']}")
        print(f"Cuisines: {zomato_data.loc[i, 'Cuisines']}")
        print(f"Aggregate Rating: {zomato_data.loc[i, 'Aggregate rating']} - {zomato_data.loc[i, 'Rating text']}")
        print(f"Has Table Booking: {zomato_data.loc[i, 'Has Table booking']}")
        print(f"Has Online Delivery: {zomato_data.loc[i, 'Has Online delivery']}")
        print("----")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Enter your preferences (e.g., 'New Delhi Italian restaurant with online delivery or table booking'): north indian food in noida with good rating
Top restaurant recommendations based on your input:
Restaurant Name: Night Food Service
Address: Green Market, Phase II, Greater Noida, Noida
City: Noida, Locality: Greater Noida
Cuisines: Fast Food North Indian Chinese
Aggregate Rating: 3.8 - Good
Has Table Booking: No
Has Online Delivery: No
----
Restaurant Name: Wakhra Swaad
Address: Shop 129, Jaipuria Plaza, Sector 26, Noida
City: Noida, Locality: Sector 26
Cuisines: North Indian Fast Food
Aggregate Rating: 4.1 - Very Good
Has Table Booking: No
Has Online Delivery: Yes
----
Restaurant Name: Desi Swag
Address: Shop 22, Godavari Shopping Complex, Sector 37, Noida
City: Noida, Locality: Sector 37
Cuisines: North Indian Mughlai
Aggregate Rating: 4.3 - Very Good
Has Table Booking: No
Has Online Delivery: Yes
----
Restaurant Name: Kalpak Restaurant & Cafe
Address: Shop 34-35, Central Market, Sec