In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['font.family'] = 'Times New Roman'

# Preparacion de datos

In [34]:
df_ratings = pd.read_csv('./datos/rating_final.csv')
df_places = pd.read_csv('./datos/geoplaces2.csv')

datasets_locales = ['./datos/chefmozaccepts.csv', './datos/chefmozcuisine.csv', './datos/chefmozhours4.csv', './datos/chefmozparking.csv']

for dataset in datasets_locales:
    df_temp = pd.read_csv(dataset)
    merged_places = pd.merge(df_places, df_temp, on='placeID', how='left')

print(f"Shape del DataFrame original de locales: {merged_places.shape}")

merged_places = df_places.drop_duplicates(subset=['placeID'])

merged_places.to_csv('./datos/dataLocal.csv', sep=';', index=False)

for dataset in datasets_locales:
    df_temp = pd.read_csv(dataset)
    df_places = pd.merge(df_places, df_temp, on='placeID', how='left')

avg_ratings = df_ratings.groupby('placeID')[['rating', 'food_rating', 'service_rating']].mean().reset_index()

df_places = pd.merge(df_places, avg_ratings, on='placeID', how='left')
df_places = df_places.drop_duplicates(subset=['placeID'])
df_places.to_csv('./datos/dataLocal_with_averages.csv', sep=';', index=False)

print(f"Shape del DataFrame final de locales con promedios: {df_places.shape}")

Shape del DataFrame original de locales: (130, 22)
Shape del DataFrame final de locales con promedios: (130, 29)


In [35]:
userprofile_file = './datos/userprofile.csv'
usercuisine_file = './datos/usercuisine.csv'
userpayment_file = './datos/userpayment.csv'

df_userprofile = pd.read_csv(userprofile_file, sep=',')
df_usercuisine = pd.read_csv(usercuisine_file, sep=',')
df_userpayment = pd.read_csv(userpayment_file, sep=',')

df_users = pd.merge(df_userprofile, df_usercuisine, on='userID', how='left')
df_users = pd.merge(df_users, df_userpayment, on='userID', how='left')

df_users = df_users.drop_duplicates(subset=['userID'])

avg_user_ratings = df_ratings.groupby('userID')[['rating', 'food_rating', 'service_rating']].mean().reset_index()

df_users = pd.merge(df_users, avg_user_ratings, on='userID', how='left')

df_users.to_csv('./datos/dataUser.csv', sep=';', index=False)

print(f"Shape del DataFrame original de usuarios: {df_users.shape}")

Shape del DataFrame original de usuarios: (138, 24)


In [36]:
ratings = df_users
locales = df_places

In [37]:
print(ratings.columns)
print(locales.columns)

Index(['userID', 'latitude', 'longitude', 'smoker', 'drink_level',
       'dress_preference', 'ambience', 'transport', 'marital_status', 'hijos',
       'birth_year', 'interest', 'personality', 'religion', 'activity',
       'color', 'weight', 'budget', 'height', 'Rcuisine', 'Upayment', 'rating',
       'food_rating', 'service_rating'],
      dtype='object')
Index(['placeID', 'latitude', 'longitude', 'the_geom_meter', 'name', 'address',
       'city', 'state', 'country', 'fax', 'zip', 'alcohol', 'smoking_area',
       'dress_code', 'accessibility', 'price', 'url', 'Rambience', 'franchise',
       'area', 'other_services', 'Rpayment', 'Rcuisine', 'hours', 'days',
       'parking_lot', 'rating', 'food_rating', 'service_rating'],
      dtype='object')


# Similitud del coseno

In [38]:
df_ratings = pd.read_csv('./dataRatings.csv', delimiter=';')

df_ratings = df_ratings.drop_duplicates(subset=['userID', 'placeID'])

df_ratings.to_csv('./dataRatings.csv', index=False, sep=';')

FileNotFoundError: [Errno 2] No such file or directory: './dataRatings.csv'

In [None]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics.pairwise import cosine_similarity

categorical_features = ['alcohol', 'smoking_area', 'dress_code', 'accessibility', 'price', 'Rambience', 'franchise', 'area', 'other_services', 'Rpayment', 'Rcuisine']
numerical_features = ['latitude', 'longitude', 'rating', 'food_rating', 'service_rating']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)])

pipeline = Pipeline(steps=[('preprocessor', preprocessor)])
locales_preprocessed = pipeline.fit_transform(locales)

similarity_matrix = cosine_similarity(locales_preprocessed, locales_preprocessed)

print(similarity_matrix)

[[1.         0.29052311 0.26067155 ... 0.43389833 0.7050241  0.17912732]
 [0.29052311 1.         0.62682767 ... 0.13611863 0.49574583 0.57941364]
 [0.26067155 0.62682767 1.         ... 0.22696956 0.15812585 0.33536408]
 ...
 [0.43389833 0.13611863 0.22696956 ... 1.         0.51621685 0.15246621]
 [0.7050241  0.49574583 0.15812585 ... 0.51621685 1.         0.43144178]
 [0.17912732 0.57941364 0.33536408 ... 0.15246621 0.43144178 1.        ]]


In [None]:
def recommend_locales_based_on_similarity(local_input, n=15):
    try:
        local_index = locales.loc[locales.name == local_input].index[0]
    except IndexError:
        return "Local no encontrado. Asegúrate de que el nombre sea correcto."

    similarity_score = list(enumerate(similarity_matrix[local_index]))
    similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)
    similarity_score = similarity_score[1:n+1]
    local_indices = [i[0] for i in similarity_score]

    return locales[['name', 'address', 'city', 'rating', 'food_rating', 'service_rating']].iloc[local_indices]

recomendaciones = recommend_locales_based_on_similarity('carnitas_mata', n=15)
print(recomendaciones)

                                    name  \
522            Hamburguesas Valle Dorado   
639                    Hamburguesas saul   
57             Abondance Restaurante Bar   
645                      Rincon Huasteco   
669                         Cafe Chaires   
705                 Restaurant Orizatlan   
898                     sirloin stockade   
330                            Luna Cafe   
441                           Potzocalli   
354                          el pueblito   
261      Restaurante la Estrella de Dima   
465  Restaurante El Chivero S.A. de C.V.   
531                          dairy queen   
54                        Restaurante 75   
3                        puesto de tacos   

                                      address             city    rating  \
522                                 Av. Coral  San Luis Potosi  0.800000   
639  Av. Saan Luis enttre moctezuma y salinas  San Luis Potosi  0.600000   
57                Industrias 908 Valle Dorado  San Luis Potosi  0.5

# SR usando Surprise

In [None]:
print(df_ratings)

     userID  placeID  rating  food_rating  service_rating   latitude  \
0     U1077   135085       2            2               2  22.150802   
1     U1077   135038       2            2               1  22.155651   
2     U1077   132825       2            2               2  22.147392   
3     U1077   135060       1            2               2  22.156883   
4     U1068   135104       1            1               2  23.752982   
...     ...      ...     ...          ...             ...        ...   
1156  U1043   132630       1            1               1  23.752931   
1157  U1011   132715       1            1               0  23.732423   
1158  U1068   132733       1            1               0  23.752707   
1159  U1068   132594       1            1               1  23.752168   
1160  U1068   132660       0            0               0  23.752943   

       longitude                                     the_geom_meter  \
0    -100.982680  0101000020957F00009F823DA6094858C18A2D4D37F9A4

In [None]:
from surprise import Dataset
from surprise import Reader

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df_ratings[['userID', 'placeID', 'rating']], reader)

In [None]:
from surprise import SVD
from surprise.model_selection import cross_validate

svd = SVD(verbose=True, n_epochs=10)
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.7149  0.7607  0.7151  0.7303  0.0215  
MAE (testset)     0.6107  0.6472  0.6149  0.6243  0.0163  
Fit time          0.00    0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    


{'test_rmse': array([0.71493218, 0.76072994, 0.71513276]),
 'test_mae': array([0.61070342, 0.64719216, 0.6149175 ]),
 'fit_time': (0.0029358863830566406,
  0.0021986961364746094,
  0.002148866653442383),
 'test_time': (0.001399993896484375,
  0.0007622241973876953,
  0.0007810592651367188)}

In [None]:
trainset = data.build_full_trainset()
svd.fit(trainset)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1468bad00>

In [None]:
svd.predict(uid=10, iid=100)

Prediction(uid=10, iid=100, r_ui=None, est=1.1998277347114557, details={'was_impossible': False})

In [None]:
print(df_ratings)

     userID  placeID  rating  food_rating  service_rating   latitude  \
0     U1077   135085       2            2               2  22.150802   
1     U1077   135038       2            2               1  22.155651   
2     U1077   132825       2            2               2  22.147392   
3     U1077   135060       1            2               2  22.156883   
4     U1068   135104       1            1               2  23.752982   
...     ...      ...     ...          ...             ...        ...   
1156  U1043   132630       1            1               1  23.752931   
1157  U1011   132715       1            1               0  23.732423   
1158  U1068   132733       1            1               0  23.752707   
1159  U1068   132594       1            1               1  23.752168   
1160  U1068   132660       0            0               0  23.752943   

       longitude                                     the_geom_meter  \
0    -100.982680  0101000020957F00009F823DA6094858C18A2D4D37F9A4

In [None]:
df_ratings.loc[df_ratings.name.str.contains('Tortas Locas Hipocampo'), :]

Unnamed: 0,userID,placeID,rating,food_rating,service_rating,latitude,longitude,the_geom_meter,name,address,...,url,Rambience,franchise,area,other_services,Rpayment,Rcuisine,hours,days,parking_lot
0,U1077,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
68,U1108,135085,1,2,1,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
211,U1081,135085,1,2,1,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
246,U1056,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
257,U1134,135085,2,1,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
347,U1116,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
403,U1132,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
420,U1120,135085,0,0,0,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
450,U1036,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public
485,U1084,135085,2,2,2,22.150802,-100.98268,0101000020957F00009F823DA6094858C18A2D4D37F9A4...,Tortas Locas Hipocampo,Venustiano Carranza 719 Centro,...,?,familiar,f,closed,none,cash,Fast_Food,00:00-00:00;,Mon;Tue;Wed;Thu;Fri;,public


# Generando recomendaciones

In [None]:
import difflib
import random

def get_local_id(nombre_local, metadata):
    
    existing_names = list(metadata['name'].values)
    closest_names = difflib.get_close_matches(nombre_local, existing_names)
    place_id = metadata[metadata['name'] == closest_names[0]]['placeID'].values[0]
    return place_id

def get_local_info(local_id, metadata):
    
    local_info = metadata[metadata['placeID'] == local_id][['id', 'latitude', 
                                                    'longitude', 'address']]
    return local_info.to_dict(orient='records')

def predict_review(userID, local_name, model, metadata):
    
    placeID = get_local_id(local_name, metadata)
    review_prediction = model.predict(uid=userID, iid=placeID)
    return review_prediction.est

def generate_recommendation(userID, model, metadata, thresh=4):
    
    local_names = list(metadata['name'].values)
    random.shuffle(local_names)
    
    for local_name in local_names:
        rating = predict_review(userID, local_name, model, metadata)
        if rating >= thresh:
            placeID = get_local_id(local_name, metadata)
            return get_local_info(placeID, metadata)

In [None]:
generate_recommendation('U1103', svd, locales, 4.8)

# TSNE

In [None]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, n_iter=500, verbose=3, random_state=1)
locales_embedding = tsne.fit_transform(svd.qi)
projection = pd.DataFrame(columns=['x', 'y'], data=locales_embedding)
projection['nombreGenerado'] = locales['name']

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 130 samples in 0.000s...
[t-SNE] Computed neighbors for 130 samples in 0.110s...
[t-SNE] Computed conditional probabilities for sample 130 / 130
[t-SNE] Mean sigma: 0.339318
[t-SNE] Computed conditional probabilities in 0.002s
[t-SNE] Iteration 50: error = 60.3092232, gradient norm = 0.4570706 (50 iterations in 0.239s)
[t-SNE] Iteration 100: error = 63.6116791, gradient norm = 0.4830212 (50 iterations in 0.046s)
[t-SNE] Iteration 150: error = 66.6615143, gradient norm = 0.3698289 (50 iterations in 0.019s)
[t-SNE] Iteration 200: error = 62.6667862, gradient norm = 0.9120907 (50 iterations in 0.021s)
[t-SNE] Iteration 250: error = 64.7644424, gradient norm = 0.6266498 (50 iterations in 0.021s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.764442
[t-SNE] Iteration 300: error = 1.1716886, gradient norm = 0.0664756 (50 iterations in 0.023s)
[t-SNE] Iteration 350: error = 1.0557528, gradient norm = 0.0082118 (

In [None]:
import plotly.express as px
import datapane as dp

fig = px.scatter(
    projection, x='x', y='y'
)
fig.show()

report = dp.Report(dp.Plot(fig) )

In [None]:
from collections import defaultdict
from surprise import Dataset, SVD

def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [None]:
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x146ae9790>

In [None]:
locales_visitados_usuario=df_ratings.loc[(df_ratings.userID=='U1077'),'placeID']
locales_no_visitados_usuario=df_ratings.loc[~df_ratings.placeID.isin(locales_visitados_usuario),'placeID'].unique()

pred=[]
for local in locales_no_visitados_usuario:
    pred.append(svd.predict(uid='U1077', iid=local))

In [None]:
pred

[Prediction(uid='U1077', iid=135104, r_ui=None, est=1.0403427940819578, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132740, r_ui=None, est=1.0009882997919972, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132663, r_ui=None, est=1.1595628826753517, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132732, r_ui=None, est=1.157455486225634, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132630, r_ui=None, est=1.2769883504308048, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132584, r_ui=None, est=1.1099401657295729, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132733, r_ui=None, est=1.4483497585107947, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132560, r_ui=None, est=1, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132613, r_ui=None, est=1.3513165183983191, details={'was_impossible': False}),
 Prediction(uid='U1077', iid=132667, r_ui=None

In [None]:
top_n = get_top_n(pred, n=10)
top_n

defaultdict(list,
            {'U1077': [(135028, 1.6085031919996686),
              (135030, 1.6042528038330732),
              (132937, 1.5018636975562754),
              (135034, 1.4722856724086713),
              (132955, 1.4687350947530933),
              (132768, 1.459720246244411),
              (134986, 1.4554823407097695),
              (132733, 1.4483497585107947),
              (135075, 1.438949323339212),
              (132846, 1.4039828990004322)]})

In [None]:
for uid, user_ratings in top_n.items():
    print([locales.loc[locales.placeID==iid,'name'].values[0] for (iid, _) in user_ratings])

['La Virreina', 'Preambulo Wifi Zone Cafe', 'rockabilly ', 'Michiko Restaurant Japones', 'emilianos', 'Mariscos Tia Licha', 'Restaurant Las Mananitas', 'Little Cesarz', 'Mariscos El Pescador', 'el lechon potosino ']


# Comparacion desempano algoritmos

In [None]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import SVD, KNNBasic, NormalPredictor, BaselineOnly, NMF
from sklearn.model_selection import train_test_split

train, test = train_test_split(df_ratings, test_size=0.20, random_state=100)

reader = Reader(rating_scale=(1, 5))

data_train = Dataset.load_from_df(train[['userID', 'placeID', 'rating']], reader)

algo_svd = SVD()
algo_knn = KNNBasic()
algo_random = NormalPredictor()
algo_baseline = BaselineOnly()
algo_nmf = NMF()

trainset = data_train.build_full_trainset()
algo_svd.fit(trainset)
algo_knn.fit(trainset)
algo_random.fit(trainset)
algo_baseline.fit(trainset)
algo_nmf.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...


<surprise.prediction_algorithms.matrix_factorization.NMF at 0x290a06e20>

In [None]:
cv_random=cross_validate(algo_random, data_train, measures=['RMSE'],cv=4, verbose=False, n_jobs=-1)

In [None]:
cv_baseline=cross_validate(algo_baseline, data_train, measures=['RMSE'],cv=4, verbose=False, n_jobs=-1)
cv_svd=cross_validate(algo_svd, data_train, measures=['RMSE'],cv=4, verbose=False, n_jobs=-1)
cv_knn=cross_validate(algo_knn, data_train, measures=['RMSE'],cv=4, verbose=False, n_jobs=-1)
cv_nmf=cross_validate(algo_nmf, data_train, measures=['RMSE'],cv=4, verbose=False, n_jobs=-1)

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


In [None]:
res=[]
res.append(round(cv_random['test_rmse'].mean(),3))
res.append(round(cv_baseline['test_rmse'].mean(),3))
res.append(round(cv_svd['test_rmse'].mean(),3))
res.append(round(cv_knn['test_rmse'].mean(),3))
res.append(round(cv_nmf['test_rmse'].mean(),3))

res_df=pd.DataFrame(pd.concat([pd.Series(['Random','Baseline','SVD','KNN','NMF']),pd.Series(res)],axis=1))
res_df.columns=['Algoritmo','RMSE CV']
res_df

Unnamed: 0,Algoritmo,RMSE CV
0,Random,0.949
1,Baseline,0.723
2,SVD,0.716
3,KNN,0.813
4,NMF,0.73


Los mejores algoritmos parecen ser Random y Baseline

In [None]:
from collections import defaultdict

from surprise import Dataset, SVD
from surprise.model_selection import KFold


def precision_recall_at_k(predictions, k=10, threshold=3.5):
    """Return precision and recall at k metrics for each user"""

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(
            ((true_r >= threshold) and (est >= threshold))
            for (est, true_r) in user_ratings[:k]
        )

        # Precision@K: Proportion of recommended items that are relevant
        # When n_rec_k is 0, Precision is undefined. We here set it to 0.

        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0

        # Recall@K: Proportion of relevant items that are recommended
        # When n_rel is 0, Recall is undefined. We here set it to 0.

        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0

    return precisions, recalls

In [None]:
kf = KFold(n_splits=4)
# algo = SVD()
precision=[]
recall=[]
for trainset, testset in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    precisions, recalls = precision_recall_at_k(predictions, k=10, threshold=4)

    # Precision and recall can then be averaged over all users
    precision.append(sum(prec for prec in precisions.values()) / len(precisions))
    recall.append(sum(rec for rec in recalls.values()) / len(recalls))

In [None]:
print(f'Precision media: {np.array(precision).mean()}')
print(f'Recall medio: {np.array(recall).mean()}')

Precision media: 0.0
Recall medio: 0.0
