In [1]:
%pip install pandas numpy scikit-learn

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# Read the chicago.txt file
df = pd.read_csv('chicago.txt', sep='\t', header=None,
                 names=['Restaurant_ID', 'Restaurant_Name', 'Features'])

In [4]:
# Display the first 5 rows
print("\nFirst 5 Restaurants in Chicago Dataset:")
print(df.head())


First 5 Restaurants in Chicago Dataset:
   Restaurant_ID           Restaurant_Name  \
0              0                Moti Mahal   
1              1                   Village   
2              2  Millrose Brewing Company   
3              3             Dover Straits   
4              4       Eat Your Hearts Out   

                                            Features  
0                214 035 149 021 117 075 204 051 163  
1  026 249 174 004 132 249 198 191 192 125 075 20...  
2  137 249 194 215 213 174 249 191 192 008 075 20...  
3                137 190 174 249 212 075 205 053 165  
4    214 249 249 197 111 025 025 112 075 205 053 164  


In [5]:
# Convert features to binary vectors
def create_feature_matrix(features_str):
    # Split features and create a set of unique features
    all_features = set()
    for features in features_str:
        all_features.update(features.split())

    # Create a mapping of feature to index
    feature_to_idx = {feature: idx for idx, feature in enumerate(all_features)}

    # Create binary matrix
    n_restaurants = len(features_str)
    n_features = len(all_features)
    feature_matrix = np.zeros((n_restaurants, n_features), dtype=int)

    # Fill the matrix
    for i, features in enumerate(features_str):
        for feature in features.split():
            feature_matrix[i, feature_to_idx[feature]] = 1

    return feature_matrix, feature_to_idx

In [6]:
# Create feature matrix
feature_matrix, feature_to_idx = create_feature_matrix(df['Features'])

In [7]:
# Compute cosine similarity
cosine_sim = cosine_similarity(feature_matrix)

In [8]:
# Compute Jaccard similarity
def jaccard_similarity(matrix):
    intersection = np.dot(matrix, matrix.T)
    row_sums = matrix.sum(axis=1)
    outer_sum = np.outer(row_sums, row_sums)
    union = outer_sum - intersection
    return intersection / (union + 1e-10)  # Add small epsilon to avoid division by zero

jaccard_sim = jaccard_similarity(feature_matrix)

In [9]:
# Function to get top recommendations
def get_top_recommendations(similarity_matrix, df, restaurant_idx, n=10):
    # Get similarity scores for the restaurant
    sim_scores = similarity_matrix[restaurant_idx]

    # Get indices of top similar restaurants (excluding self)
    top_indices = np.argsort(sim_scores)[::-1][1:n+1]

    # Get restaurant names and similarity scores
    recommendations = []
    for idx in top_indices:
        recommendations.append({
            'Restaurant': df.iloc[idx]['Restaurant_Name'],
            'Similarity': sim_scores[idx]
        })

    return recommendations

In [10]:
def get_recommendations_by_id(restaurant_id):
    # Find the index of the restaurant in the dataframe
    restaurant_idx = df[df['Restaurant_ID'] == restaurant_id].index[0]
    restaurant_name = df.iloc[restaurant_idx]['Restaurant_Name']

    print(f"\nTop 10 Recommendations for {restaurant_name} (ID: {restaurant_id}) using Cosine Similarity:")
    cosine_recs = get_top_recommendations(cosine_sim, df, restaurant_idx)
    for i, rec in enumerate(cosine_recs, 1):
        print(f"{i}. {rec['Restaurant']} (Similarity: {rec['Similarity']:.4f})")

    print(f"\nTop 10 Recommendations for {restaurant_name} (ID: {restaurant_id}) using Jaccard Similarity:")
    jaccard_recs = get_top_recommendations(jaccard_sim, df, restaurant_idx)
    for i, rec in enumerate(jaccard_recs, 1):
        print(f"{i}. {rec['Restaurant']} (Similarity: {rec['Similarity']:.4f})")

In [11]:
restaurant_id = 10  # Example restaurant ID
get_recommendations_by_id(restaurant_id)


Top 10 Recommendations for Morton's of Chicago (ID: 10) using Cosine Similarity:
1. Palm (Similarity: 0.7591)
2. Lawry's the Prime Rib (Similarity: 0.7161)
3. Rupert's for Steaks (Similarity: 0.7006)
4. Rosebud on Rush (Similarity: 0.6888)
5. Sage's (Similarity: 0.6708)
6. MAGGIANO'S LITTLE ITALY (Similarity: 0.6455)
7. GIBSONS BAR & STEAKHOUSE (Similarity: 0.6351)
8. Nick's Fishmarket (Similarity: 0.6262)
9. Ruth's Chris Steak House (Similarity: 0.6228)
10. GORDON (Similarity: 0.6055)

Top 10 Recommendations for Morton's of Chicago (ID: 10) using Jaccard Similarity:
1. Rupert's for Steaks (Similarity: 0.0577)
2. Palm (Similarity: 0.0553)
3. Lawry's the Prime Rib (Similarity: 0.0541)
4. Sage's (Similarity: 0.0526)
5. Ruth's Chris Steak House (Similarity: 0.0510)
6. Outback Steakhouse (Similarity: 0.0490)
7. Joe-n-Giuseppe (Similarity: 0.0490)
8. Shaw's Deerfield (Similarity: 0.0465)
9. Kampai (Similarity: 0.0465)
10. Rosebud on Rush (Similarity: 0.0451)
