In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split as skl_train_test_split
from surprise import Reader, Dataset, SVD, KNNBasic, NormalPredictor, BaselineOnly, NMF
from surprise.model_selection import cross_validate, KFold
from collections import defaultdict

plt.rcParams['font.family'] = 'Times New Roman'

# Preparacion de datos

In [3]:
df_ratings = pd.read_csv('./datos/rating_final.csv')
df_places = pd.read_csv('./datos/geoplaces2.csv')

datasets_locales = ['./datos/chefmozaccepts.csv', './datos/chefmozcuisine.csv', './datos/chefmozhours4.csv', './datos/chefmozparking.csv']

for dataset in datasets_locales:
    df_temp = pd.read_csv(dataset)
    merged_places = pd.merge(df_places, df_temp, on='placeID', how='left')

print(f"Shape del DataFrame original de locales: {merged_places.shape}")

merged_places = df_places.drop_duplicates(subset=['placeID'])

merged_places.to_csv('./datos/dataLocal.csv', sep=';', index=False)

for dataset in datasets_locales:
    df_temp = pd.read_csv(dataset)
    df_places = pd.merge(df_places, df_temp, on='placeID', how='left')

avg_ratings = df_ratings.groupby('placeID')[['rating', 'food_rating', 'service_rating']].mean().reset_index()

df_places = pd.merge(df_places, avg_ratings, on='placeID', how='left')
df_places = df_places.drop_duplicates(subset=['placeID'])
df_places.to_csv('./datos/dataLocal_with_averages.csv', sep=';', index=False)

print(f"Shape del DataFrame final de locales con promedios: {df_places.shape}")

Shape del DataFrame original de locales: (130, 22)
Shape del DataFrame final de locales con promedios: (130, 29)


In [4]:
userprofile_file = './datos/userprofile.csv'
usercuisine_file = './datos/usercuisine.csv'
userpayment_file = './datos/userpayment.csv'

df_userprofile = pd.read_csv(userprofile_file, sep=',')
df_usercuisine = pd.read_csv(usercuisine_file, sep=',')
df_userpayment = pd.read_csv(userpayment_file, sep=',')

df_users = pd.merge(df_userprofile, df_usercuisine, on='userID', how='left')
df_users = pd.merge(df_users, df_userpayment, on='userID', how='left')

df_users = df_users.drop_duplicates(subset=['userID'])

avg_user_ratings = df_ratings.groupby('userID')[['rating', 'food_rating', 'service_rating']].mean().reset_index()

df_users = pd.merge(df_users, avg_user_ratings, on='userID', how='left')

df_users.to_csv('./datos/dataUser.csv', sep=';', index=False)

print(f"Shape del DataFrame original de usuarios: {df_users.shape}")

Shape del DataFrame original de usuarios: (138, 24)


In [5]:
ratings = df_users
locales = df_places

In [6]:
print(ratings.columns)
print(locales.columns)

Index(['userID', 'latitude', 'longitude', 'smoker', 'drink_level',
       'dress_preference', 'ambience', 'transport', 'marital_status', 'hijos',
       'birth_year', 'interest', 'personality', 'religion', 'activity',
       'color', 'weight', 'budget', 'height', 'Rcuisine', 'Upayment', 'rating',
       'food_rating', 'service_rating'],
      dtype='object')
Index(['placeID', 'latitude', 'longitude', 'the_geom_meter', 'name', 'address',
       'city', 'state', 'country', 'fax', 'zip', 'alcohol', 'smoking_area',
       'dress_code', 'accessibility', 'price', 'url', 'Rambience', 'franchise',
       'area', 'other_services', 'Rpayment', 'Rcuisine', 'hours', 'days',
       'parking_lot', 'rating', 'food_rating', 'service_rating'],
      dtype='object')


# Similitud del coseno

In [7]:
df_ratings = pd.read_csv('./datos/dataRatings.csv', delimiter=';')

df_ratings = df_ratings.drop_duplicates(subset=['userID', 'placeID'])

df_ratings.to_csv('./datos/dataRatings.csv', index=False, sep=';')

In [8]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics.pairwise import cosine_similarity

categorical_features = ['alcohol', 'smoking_area', 'dress_code', 'accessibility', 'price', 'Rambience', 'franchise', 'area', 'other_services', 'Rpayment', 'Rcuisine']
numerical_features = ['latitude', 'longitude', 'rating', 'food_rating', 'service_rating']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)])

pipeline = Pipeline(steps=[('preprocessor', preprocessor)])
locales_preprocessed = pipeline.fit_transform(locales)

similarity_matrix = cosine_similarity(locales_preprocessed, locales_preprocessed)

print(similarity_matrix)

[[1.         0.29052311 0.26067155 ... 0.43389833 0.7050241  0.17912732]
 [0.29052311 1.         0.62682767 ... 0.13611863 0.49574583 0.57941364]
 [0.26067155 0.62682767 1.         ... 0.22696956 0.15812585 0.33536408]
 ...
 [0.43389833 0.13611863 0.22696956 ... 1.         0.51621685 0.15246621]
 [0.7050241  0.49574583 0.15812585 ... 0.51621685 1.         0.43144178]
 [0.17912732 0.57941364 0.33536408 ... 0.15246621 0.43144178 1.        ]]


In [9]:
def recommend_locales_based_on_similarity(local_input, n=15):
    try:
        local_index = locales.loc[locales.name == local_input].index[0]
    except IndexError:
        return "Local no encontrado. Asegúrate de que el nombre sea correcto."

    similarity_score = list(enumerate(similarity_matrix[local_index]))
    similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)
    similarity_score = similarity_score[1:n+1]
    local_indices = [i[0] for i in similarity_score]

    return locales[['name', 'address', 'city', 'rating', 'food_rating', 'service_rating']].iloc[local_indices]

recomendaciones = recommend_locales_based_on_similarity('carnitas_mata', n=15)
print(recomendaciones)

                                    name  \
522            Hamburguesas Valle Dorado   
639                    Hamburguesas saul   
57             Abondance Restaurante Bar   
645                      Rincon Huasteco   
669                         Cafe Chaires   
705                 Restaurant Orizatlan   
898                     sirloin stockade   
330                            Luna Cafe   
441                           Potzocalli   
354                          el pueblito   
261      Restaurante la Estrella de Dima   
465  Restaurante El Chivero S.A. de C.V.   
531                          dairy queen   
54                        Restaurante 75   
3                        puesto de tacos   

                                      address             city    rating  \
522                                 Av. Coral  San Luis Potosi  0.800000   
639  Av. Saan Luis enttre moctezuma y salinas  San Luis Potosi  0.600000   
57                Industrias 908 Valle Dorado  San Luis Potosi  0.5

# SR usando Surprise

In [10]:
print(df_ratings)

     userID  placeID  rating  food_rating  service_rating   latitude  \
0     U1077   135085       2            2               2  22.150802   
1     U1077   135038       2            2               1  22.155651   
2     U1077   132825       2            2               2  22.147392   
3     U1077   135060       1            2               2  22.156883   
4     U1068   135104       1            1               2  23.752982   
...     ...      ...     ...          ...             ...        ...   
1156  U1043   132630       1            1               1  23.752931   
1157  U1011   132715       1            1               0  23.732423   
1158  U1068   132733       1            1               0  23.752707   
1159  U1068   132594       1            1               1  23.752168   
1160  U1068   132660       0            0               0  23.752943   

       longitude                                     the_geom_meter  \
0    -100.982680  0101000020957F00009F823DA6094858C18A2D4D37F9A4

In [11]:
from surprise import Dataset
from surprise import Reader

reader = Reader(rating_scale=(0, 2))
data = Dataset.load_from_df(df_ratings[['userID', 'placeID', 'rating']], reader)

In [12]:
from surprise import SVD
from surprise.model_selection import cross_validate

svd = SVD(verbose=True, n_epochs=10)
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.7152  0.7342  0.6985  0.7160  0.0146  
MAE (testset)     0.6167  0.6339  0.5947  0.6151  0.0160  
Fit time          0.01    0.01    0.00    0.01    0.00    
Test time         0.01    0.01    0.01    0.01    0.00    


{'test_rmse': array([0.71520697, 0.73415813, 0.69849138]),
 'test_mae': array([0.6166756 , 0.63385178, 0.59471754]),
 'fit_time': (0.00797581672668457, 0.006070137023925781, 0.004473447799682617),
 'test_time': (0.011011362075805664,
  0.006634950637817383,
  0.00956416130065918)}

In [13]:
trainset = data.build_full_trainset()
svd.fit(trainset)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x29c81755790>

In [14]:
svd.predict(uid=10, iid=100)

Prediction(uid=10, iid=100, r_ui=None, est=1.1998277347114557, details={'was_impossible': False})

In [15]:
print(df_ratings)

     userID  placeID  rating  food_rating  service_rating   latitude  \
0     U1077   135085       2            2               2  22.150802   
1     U1077   135038       2            2               1  22.155651   
2     U1077   132825       2            2               2  22.147392   
3     U1077   135060       1            2               2  22.156883   
4     U1068   135104       1            1               2  23.752982   
...     ...      ...     ...          ...             ...        ...   
1156  U1043   132630       1            1               1  23.752931   
1157  U1011   132715       1            1               0  23.732423   
1158  U1068   132733       1            1               0  23.752707   
1159  U1068   132594       1            1               1  23.752168   
1160  U1068   132660       0            0               0  23.752943   

       longitude                                     the_geom_meter  \
0    -100.982680  0101000020957F00009F823DA6094858C18A2D4D37F9A4

In [16]:
df_ratings.loc[df_ratings.name.str.contains('Tortas Locas Hipocampo'), :]

Unnamed: 0,userID,placeID,rating,food_rating,service_rating,latitude,longitude,the_geom_meter,name,address,...,url,Rambience,franchise,area,other_services,Rpayment,Rcuisine,hours,days,parking_lot
0,U1077,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
68,U1108,135085,1,2,1,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
211,U1081,135085,1,2,1,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
246,U1056,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
257,U1134,135085,2,1,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
347,U1116,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
403,U1132,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
420,U1120,135085,0,0,0,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
450,U1036,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
485,U1084,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public


# Generando recomendaciones

In [17]:
import difflib
import random

def get_local_id(nombre_local, metadata):
    existing_names = list(metadata['name'].values)
    closest_names = difflib.get_close_matches(nombre_local, existing_names)
    place_id = metadata[metadata['name'] == closest_names[0]]['placeID'].values[0]
    return place_id

def get_local_info(local_id, metadata):
    local_info = metadata[metadata['placeID'] == local_id][['placeID', 'latitude', 'longitude', 'address']]
    return local_info.to_dict(orient='records')

def predict_review(userID, local_name, model, metadata):
    placeID = get_local_id(local_name, metadata)
    review_prediction = model.predict(uid=userID, iid=placeID)
    return review_prediction.est

def generate_recommendation(user_id, model, data, num_recommendations):
    if 'userId' not in data.columns or 'placeID' not in data.columns:
        print("El DataFrame no contiene las columnas 'userId' o 'placeID'.")
        return None

    try:
        user_idx = data.index[data['userId'] == user_id].tolist()[0]
        print(f"User index: {user_idx}")
    except IndexError:
        print(f"User {user_id} not found.")
        return None

    user_ratings = data[data['userId'] == user_id]
    all_locales = data['placeID'].unique()
    rated_locales = user_ratings['placeID'].values
    unrated_locales = [locale for locale in all_locales if locale not in rated_locales]
    print(f"Unrated locales: {unrated_locales}")

    predictions = []
    for locale in unrated_locales:
        prediction = model.predict(user_idx, locale)
        print(f"Prediction for locale {locale}: {prediction.est}")
        predictions.append((locale, prediction.est))

    predictions.sort(key=lambda x: x[1], reverse=True)
    top_recommendations = predictions[:num_recommendations]

    recommended_locales = [place for place, rating in top_recommendations]
    return recommended_locales




# TSNE

In [18]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, n_iter=500, verbose=3, random_state=1)
locales_embedding = tsne.fit_transform(svd.qi)
projection = pd.DataFrame(columns=['x', 'y'], data=locales_embedding)
projection['nombreGenerado'] = locales['name']

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 130 samples in 0.000s...
[t-SNE] Computed neighbors for 130 samples in 0.484s...
[t-SNE] Computed conditional probabilities for sample 130 / 130
[t-SNE] Mean sigma: 0.332756
[t-SNE] Computed conditional probabilities in 0.008s
[t-SNE] Iteration 50: error = 61.9534111, gradient norm = 0.5679761 (50 iterations in 0.032s)
[t-SNE] Iteration 100: error = 61.3031044, gradient norm = 0.7477433 (50 iterations in 0.031s)
[t-SNE] Iteration 150: error = 72.5574112, gradient norm = 0.3381661 (50 iterations in 0.029s)
[t-SNE] Iteration 200: error = 65.6973190, gradient norm = 0.6834382 (50 iterations in 0.030s)
[t-SNE] Iteration 250: error = 68.6984100, gradient norm = 0.4008325 (50 iterations in 0.040s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 68.698410
[t-SNE] Iteration 300: error = 1.2796247, gradient norm = 0.0120367 (50 iterations in 0.028s)
[t-SNE] Iteration 350: error = 1.0648321, gradient norm = 0.0137937 (

In [19]:
import plotly.express as px
import datapane as dp

fig = px.scatter(
    projection, x='x', y='y'
)
fig.show()

report = dp.Report(dp.Plot(fig) )

In [20]:
from collections import defaultdict
from surprise import Dataset, SVD

def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [21]:
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x29c8182c4f0>

In [22]:
locales_visitados_usuario=df_ratings.loc[(df_ratings.userID=='U1077'),'placeID']
locales_no_visitados_usuario=df_ratings.loc[~df_ratings.placeID.isin(locales_visitados_usuario),'placeID'].unique()

pred=[]
for local in locales_no_visitados_usuario:
    pred.append(svd.predict(uid='U1077', iid=local))

In [23]:
pred

[Prediction(uid='U1077', iid=135104, r_ui=None, est=1.2143072568008864, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132740, r_ui=None, est=1.0812679729270631, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132663, r_ui=None, est=1.1055093306308295, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132732, r_ui=None, est=1.0927208057374418, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132630, r_ui=None, est=1.3081038473554412, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132584, r_ui=None, est=1.2914671950409704, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132733, r_ui=None, est=1.3416418243120074, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132560, r_ui=None, est=1.1533381404013792, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132613, r_ui=None, est=1.2569714471847833, details={'was_impossible': False}),
 Prediction(uid='U1077', iid

In [24]:
top_n = get_top_n(pred, n=10)
top_n

defaultdict(list,
            {'U1077': [(135075, 1.6141779702705705),
              (132922, 1.5711398091622801),
              (132768, 1.5376544453747105),
              (132858, 1.4456067060458866),
              (135030, 1.420600064867265),
              (134986, 1.4159102494019544),
              (135074, 1.4037510693652941),
              (134975, 1.4022076587106571),
              (132866, 1.40065945335016),
              (135028, 1.3936205505996113)]})

In [25]:
for uid, user_ratings in top_n.items():
    print([locales.loc[locales.placeID==iid,'name'].values[0] for (iid, _) in user_ratings])

['Mariscos El Pescador', 'cafe punta del cielo', 'Mariscos Tia Licha', 'Hamburguesas Valle Dorado', 'Preambulo Wifi Zone Cafe', 'Restaurant Las Mananitas', 'Restaurante la Parroquia Potosina', 'Rincon del Bife', 'Chaires', 'La Virreina']


# Comparacion desempeño algoritmos

In [27]:
import pandas as pd
from surprise import Dataset, Reader
from surprise.model_selection import GridSearchCV, cross_validate, KFold
from surprise.prediction_algorithms import NormalPredictor, BaselineOnly, SVD, KNNBasic
from collections import defaultdict
from sklearn.model_selection import train_test_split as sklearn_train_test_split

ratings_data_f = df_ratings.sample(n=100000, replace=True)
ratings_data_f = ratings_data_f[['userID', 'placeID', 'rating', 'food_rating', 'service_rating']]
reader = Reader(rating_scale=(0, 2))

def evaluate(data_train, algo):
    cv_results = cross_validate(algo, data_train, measures=['RMSE'], cv=4, verbose=False, n_jobs=-1)
    return round(cv_results['test_rmse'].mean(), 3)


def precision_recall_at_k(predictions, k=10, threshold=2.0):
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold)) for (est, true_r) in user_ratings[:k])
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0
    return precisions, recalls

# Evaluate precision and recall
def evaluate_precision_recall(data_train, algo):
    kf = KFold(n_splits=4)
    precision = []
    recall = []
    for trainset, testset in kf.split(data_train):
        algo.fit(trainset)
        predictions = algo.test(testset)
        precisions, recalls = precision_recall_at_k(predictions, k=10, threshold=2.0)
        precision.append(sum(prec for prec in precisions.values()) / len(precisions))
        recall.append(sum(rec for rec in recalls.values()) / len(recalls))
    return sum(precision) / len(precision), sum(recall) / len(recall)


algorithms = {
    'Random': (NormalPredictor, {}),
    'Baseline': (BaselineOnly, {}),
    'SVD': (SVD, {'n_factors': [50, 100], 'reg_all': [0.02, 0.05]}),
    'KNN_cosine': (KNNBasic, {'k': [20, 30], 'sim_options': {'name': ['cosine'], 'user_based': [False]}}),
    'KNN_msd': (KNNBasic, {'k': [20, 30], 'sim_options': {'name': ['msd'], 'user_based': [False]}})
}

results_list = []

# Custom train-test split function
def train_test_split_custom(df, column, reader):
    train, test = sklearn_train_test_split(df[['userID', 'placeID', column]], test_size=0.20, random_state=100)
    data_train = Dataset.load_from_df(train, reader)
    return data_train, test

for column in ['rating', 'food_rating', 'service_rating']:
    print(f'Evaluating {column}...')
    data_train, test = train_test_split_custom(ratings_data_f, column, reader)

    for algo_name, (algo_class, param_grid) in algorithms.items():
        print(f'Tuning {algo_name}...')
        try:
            gs = GridSearchCV(algo_class, param_grid, measures=['rmse'], cv=4)
            gs.fit(data_train)

            best_algo = gs.best_estimator['rmse']

            rmse = evaluate(data_train, best_algo)

            precision, recall = evaluate_precision_recall(data_train, best_algo)
            
            # Debug
            print(f"Sample predictions for {algo_name} on {column}:")
            for i, (uid, iid, true_r, est, _) in enumerate(best_algo.test(data_train.build_full_trainset().build_testset())):
                print(f"User: {uid}, Item: {iid}, True Rating: {true_r}, Estimated Rating: {est}")
                if i >= 5:  #Imprimir los primeros 5
                    break

            results_list.append({
                'Algoritmo': algo_name,
                'RMSE CV': rmse,
                'Precision': precision,
                'Recall': recall,
                'Rating Type': column
            })
        except ZeroDivisionError as e:
            print(f'Error tuning {algo_name}: {e}')

# Results dataframe
results_df = pd.DataFrame(results_list)
print(results_df)


Evaluating rating...
Tuning Random...
Sample predictions for Random on rating:
User: U1045, Item: 135052, True Rating: 1.0, Estimated Rating: 2
User: U1045, Item: 132825, True Rating: 2.0, Estimated Rating: 1.9834116555962908
User: U1045, Item: 135060, True Rating: 2.0, Estimated Rating: 1.78809527262852
User: U1045, Item: 135060, True Rating: 2.0, Estimated Rating: 2
User: U1045, Item: 132825, True Rating: 2.0, Estimated Rating: 0.2592583993890508
User: U1045, Item: 135032, True Rating: 0.0, Estimated Rating: 1.3728407818927375
Tuning Baseline...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Sample predictions for Baseline on rating:
User: U1045, Item: 135052, True Rating: 1.0, Estimated Rating: 1.4162043316845006
User: U1045, Item: 132825, True Rating: 2.0, Estimated Rating: 1.30743668