In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Model
from keras.layers import Input, Dense
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/zomato.csv')

# Data preprocessing
# Drop unnecessary columns
df.drop(['url', 'phone', 'dish_liked', 'menu_item'], axis=1, inplace=True)

# Remove duplicates
df.drop_duplicates(inplace=True)

# Remove rows with missing values
df.dropna(inplace=True)

# Convert 'rate' column to numeric, handle non-numeric values
def convert_rate_to_numeric(rate):
    if isinstance(rate, str) and '/' in rate:
        return float(rate.split('/')[0])
    else:
        return np.nan

df['rate'] = df['rate'].apply(convert_rate_to_numeric)

# Drop rows with non-numeric rate values
df.dropna(subset=['rate'], inplace=True)

# Remove commas from 'approx_cost(for two people)' column
df['approx_cost(for two people)'] = df['approx_cost(for two people)'].str.replace(',', '')


# Convert 'approx_cost(for two people)' column to numeric
df['approx_cost(for two people)'] = pd.to_numeric(df['approx_cost(for two people)'], errors='coerce')

# Drop rows with missing values in 'approx_cost(for two people)' column
df.dropna(subset=['approx_cost(for two people)'], inplace=True)

# Scale numeric columns
scaler = MinMaxScaler()
df[['rate', 'approx_cost(for two people)', 'votes']] = scaler.fit_transform(df[['rate', 'approx_cost(for two people)', 'votes']])

# Train-test split
X = df[['rate', 'approx_cost(for two people)', 'votes']]
y = df['rate']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the denoising autoencoder model
input_dim = X_train.shape[1]
encoding_dim = 64

input_layer = Input(shape=(input_dim,))
encoded = Dense(encoding_dim, activation='relu')(input_layer)
decoded = Dense(input_dim, activation='sigmoid')(encoded)

autoencoder = Model(input_layer, decoded)

# Compile the model
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

# Fit the model
autoencoder.fit(X_train, X_train, epochs=10, batch_size=64, validation_data=(X_test, X_test))

# Get the learned encoding layer
encoder = Model(input_layer, encoded)

# Encode the input data
encoded_X_train = encoder.predict(X_train)
encoded_X_test = encoder.predict(X_test)

similarities = cosine_similarity(encoded_X_test)

# Example: Recommend top 5 similar restaurants for a given restaurant index
restaurant_idx = 5 # Replace 0 with the index of the restaurant you want to find recommendations for
top_similar_restaurants_idx = np.argsort(similarities[restaurant_idx])[::-1][1:6]  # Exclude the restaurant itself
top_similar_restaurants = df.iloc[top_similar_restaurants_idx]['name'].values

print("Top 5 recommended restaurants similar to the selected restaurant:")
for idx, restaurant in enumerate(top_similar_restaurants, start=1):
    print(f"{idx}. {restaurant}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Top 5 recommended restaurants similar to the selected restaurant:
1. Hokey Pokey Ice Creams
2. Third Wave Coffee Roasters
3. Anand Sweets and Savouries
4. The Cake Ville
5. Pop Hop
