In [31]:
import warnings
warnings.filterwarnings('ignore')


from engines import RecommenderInterface
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pickle
import os
import numpy as np 
import pandas as pd

In [32]:
data_dir = '/Users/dare_devil/Documents/MLDS_2024/Quarter2/DataMining/Project/data'

test_fname = 'test_df_filtered.json'
train_fname = 'train_data_red.json'

# Collaborative filters Data Fnames
cf_ub_fname = 'collaborative_filters/user_based.pkl'
cf_ib_fname = 'collaborative_filters/item_based.pkl'

# Content Based Data Fnames
cb_user_profiles_fname = 'user_profiles.pkl'
cb_item_profiles_fname = 'item_profiles.pkl'

In [33]:
# Loading Training Data 
train_df = pd.read_json(os.path.join(data_dir, train_fname))
train_df.head(2)

Unnamed: 0,review_id,user_id,business_id,user_rating,useful,funny,cool,text,date,name,...,Wine & Spirits,Beer,Arts & Entertainment,Cafes,Bakeries,Burgers,Asian Fusion,Vegetarian,Japanese,Salad
0,LEGT4hPIyEVMzy4HUROtoQ,-1MF2tosrw2WcCxeVNk81Q,9c7MUiE6VI8NesjPdj5FkA,3,4,0,1,A coworker and I stopped by here for an aftern...,2017-12-13 22:58:16,Bubblefish,...,0,0,0,0,0,0,0,0,1,0
1,QvoAHbml7mkqFrx9K18h2Q,-1MF2tosrw2WcCxeVNk81Q,1OfhM-ZKvcpxyxptCCzEwA,4,4,0,0,Stopped by here for happy hour and was fairly ...,2017-08-01 22:47:11,Independence Beer Garden,...,1,1,0,0,0,0,0,0,0,0


In [34]:
# Content Based Data 

## Loading user and item profiles
cb_user_profiles = pickle.load(open(os.path.join(data_dir, cb_user_profiles_fname), 'rb'))
cb_item_profiles = pickle.load(open(os.path.join(data_dir, cb_item_profiles_fname), 'rb'))

## Defining User, Item Matrices and Mapping Functions
cb_user_matrix = np.array([profile  for user_id, profile in cb_user_profiles.items()])
cb_item_matrix = np.array([profile  for item_id, profile in cb_item_profiles.items()])

cb_idx2user = { idx:user_id for idx,user_id in enumerate(cb_user_profiles.keys())}
cb_idx2item = { idx:item_id for idx,item_id in enumerate(cb_item_profiles.keys())}

# Defining Similarity Matrices
cb_user_sim_matrix = cosine_similarity(cb_user_matrix)
np.fill_diagonal(cb_user_sim_matrix, 0)
cb_item_sim_matrix = cosine_similarity(cb_item_matrix)
np.fill_diagonal(cb_item_sim_matrix, 0)

In [35]:
# Defining Weights

# Content Based User-Based Weight
CB_USER_WT = 0.6

# Collaborative Filtering User-Based Weight
CF_USER_WT = 0.1

# Content Based Engine Weight 
CB_WT = 0.5

SEARCH_RANGE_IN_MILES=2

In [36]:
# Intializing The Recommendation Engine
recommender = RecommenderInterface(
    cb_user_sim_matrix=cb_user_sim_matrix, 
    cb_item_sim_matrix=cb_item_sim_matrix,
    cb_idx2user=cb_idx2user, 
    cb_idx2item=cb_idx2item,
    search_range_in_miles=SEARCH_RANGE_IN_MILES,
    cb_user_wt=CB_USER_WT,
    cf_user_wt=CF_USER_WT,
    cf_ub_engine_path=os.path.join(data_dir,cf_ub_fname),
    cf_ib_engine_path=os.path.join(data_dir,cf_ib_fname),
    cb_wt=CB_WT,
    train_df=train_df
)

Initializing Engine
CB User Wt : 0.6
CF user Wt : 0.1


In [37]:
# Prediction using Hybrid Engine
preds = recommender.recommend(user_id = 'TVN3qoXU0-sateboutdCZg',
                         lat = 39.958211,
                         long = -75.173137,
                         user_preference  = 'Italian',
                         topk=20
                        )
preds.head()

Unnamed: 0,item_id,rating
171,wUnLSg_GKfEIQ5CQQ770_g,4.759277
73,oZzN706lKoL4faaTK739xA,4.636019
76,qjIN4UbE96Cq6JKwLIQ9VQ,4.591428
240,ZsSsyknkpARZUrD6rRH27A,4.508464
79,d5fAUl4lKaNxGfiXj4Kygg,4.481208


In [None]:
# Reading Test Data
test_df = pd.read_json(os.path.join(data_dir, test_fname))

In [None]:
y_true = test_df['user_rating'].tolist()
y_pred = []
topk = 20

done=0
total=test_df.shape[0]
for idx,row in test_df.iterrows():
    user_id = row['user_id']
    item_id = row['business_id']
    pred = recommender.predict(user_id, item_id, topk)
    y_pred.append(pred)
    done+=1

    if done % 500 == 0:
        print(f"{done}/{total} Done")

In [30]:
mse = round(mean_squared_error(y_true, y_pred),4)
mae = round(mean_absolute_error(y_true, y_pred),4)

print(f"MSE : {mse} MAE : {mae}")

MSE : 0.3491 MAE : 0.4312


In [39]:
user_id = "TVN3qoXU0-sateboutdCZg"
user_review_summary = '\n\n'.join(train_df[train_df['user_id'] == '-1MF2tosrw2WcCxeVNk81Q'].text.tolist())
#print(user_review_summary)

**Likes:**
- Quality food
- Instagram-worthy
- Creamy salmon
- Balanced sauces
- Reasonable prices
- Crunchy wings
- All-you-can-eat
- Delicious desserts
- Fast service
- Flavorful chicken

**Dislikes:**
- Long wait
- Uninformed delays
- Slow service
- Not spicy
- Bland guacamole
- Dry chicken
- Gift wrapping
- Walking distance
- Limited spice
- Simple wrapping

In [55]:
business_reviews = ''.join(train_df[train_df.business_id.isin([preds.item_id.tolist()[0]])].text.tolist()[:20])
#print(business_reviews)

**Top 10 Good Things:**
- Delicious Food: Repeatedly praised across reviews for its quality and taste.
- Amazing Service: Many users highlighted the attentive and professional service.
- Unique Pasta: The pasta dishes, especially the sweet onion crepe and chestnut fettuccine, were often mentioned.
- Great Ambiance: The cozy and warm atmosphere was appreciated.
- Memorable Desserts: Lavender gelato and petit fours were specifically mentioned as highlights.
- Olive Oil: Praised for its freshness and quality.
- Wine Selection: Positive mentions of the wine pairing and selection.
- Special Occasions: Many users found it perfect for celebrating special events.
- Expert Preparation: High marks for the culinary skill in preparation and presentation.
- Generous Portions: Some users were satisfied with the amount of food served.

  
**Top 10 Bad Things:**
- Pricey: Several mentions of the high cost, making it a consideration for repeat visits.
- Small Portions: Contradictory to some opinions, others found the portions too small.
- Service Inconsistencies: A few reports of off-service or feeling rushed.
- Reservation Wait: Difficulty in getting reservations mentioned by some.
- Limited Menu: Some diners found the choice restrictive, especially with the tasting menu.
- Unimpressed by Classics: A few dishes, like almond tortellini, didn't meet expectations for some.
- Late Seating: Mention of being seated later than the reservation time.
- Interior Decor: One review implied the decor was unexpected or underwhelming.
- Oily Dishes: Specific mentions of dishes being too oily.
- Noise Level: Although not explicitly mentioned, fine dining experiences often include critiques of ambiance, such as noise.