# RecTools with features - PROJECTS

### import libraries

In [1]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from implicit.nearest_neighbours import TFIDFRecommender, BM25Recommender
from implicit.als import AlternatingLeastSquares
from lightfm import LightFM

from rectools import Columns
from rectools.dataset import Dataset
from rectools.models import ImplicitALSWrapperModel
from rectools.models import LightFMWrapperModel, ImplicitItemKNNWrapperModel, RandomModel, PopularModel


from numpy import genfromtxt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from rectools.metrics import Precision, Recall, MeanInvUserFreq, Serendipity, calc_metrics, MAP, MCC, MRR, IntraListDiversity, F1Beta, Accuracy, NDCG, IntraListDiversity
from rectools.model_selection import TimeRangeSplitter, cross_validate

In [3]:
from rectools.metrics.distances import PairwiseHammingDistanceCalculator

### load data

In [4]:
projects = pd.read_csv(
    "проекты_и_компании_2024-09-13T14_31_07.415586Z.csv",
    engine="c",
    usecols=['ID Проекта', 'Название']
    )
projects.columns = ['ID', 'Title']

In [5]:
oldViews = pd.read_csv(
    "просмотры_проектов_2024-09-20T10_22_58.285588Z.csv",
    engine="python",  # Because of 2-chars separators
    header=0,
    names=[Columns.Datetime, Columns.User, Columns.Item]
    )
oldViews['weight'] = np.full(oldViews.shape[0], 1)

In [6]:
views = pd.read_csv(
    "просмотры_проектов__clickhouse__2024-09-13T14_24_44.471163Z.csv",
    engine="python",  # Because of 2-chars separators
    header=0,
    names=[Columns.Datetime, Columns.User, Columns.Item]
    )
views['weight'] = np.full(views.shape[0], 1)

In [7]:
responses = pd.read_csv(
    "участие_в_проектах_2024-09-13T14_49_23.189163Z.csv",
    engine="python",
    header=0,
    usecols=['Дата', 'ID Проекта', 'ID Пользователя'],
    )
responses['weight'] = np.full(responses.shape[0], 2)
responses = responses.rename(columns={'Дата': Columns.Datetime, 'ID Проекта': Columns.Item, 'ID Пользователя': Columns.User, 'weight': Columns.Weight})
responses = responses[['datetime', 'user_id', 'item_id', 'weight']]

In [8]:
oldViews = oldViews.loc[(oldViews["datetime"] < min(views['datetime']))]

In [9]:
data = pd.concat([oldViews, views, responses], axis=0)
data = data.sort_values('datetime').reset_index(drop=True)
data.shape

(12698, 4)

### train & test

In [10]:
data['datetime'] = pd.to_datetime(data['datetime'])
split_date = '2024-06-20'

# разделяем выборку на две части
df_train = data[data['datetime'] < split_date]
df_test = data[data['datetime'] >= split_date]

In [11]:
print(len(df_train), len(df_test))

10014 2684


### load features

In [12]:
users = pd.read_csv(
    "все_студенты__без_пд__2024-09-06T14_56_40.946035Z.csv",
    engine="python",  # Because of 2-chars separators
    header=0,
    usecols=["ID ИОТ", "Группа", "Специальность", "Уровень образования"]
    )
users = users.rename(columns={"ID ИОТ": "user_id", "Группа": "course", "Специальность": "spec", "Уровень образования": "graduation" })

In [13]:
users = users.loc[users["user_id"].isin(df_train["user_id"])].copy()
len(users)

324

In [14]:
user_features_frames = []
for feature in ["course", "spec", "graduation"]:
    feature_frame = users.reindex(columns=["user_id", feature])
    feature_frame.columns = ["id", "value"]
    feature_frame["feature"] = feature
    user_features_frames.append(feature_frame)
user_features = pd.concat(user_features_frames)
user_features.sort_values("id")

Unnamed: 0,id,value,feature
0,1305,2935,course
14702,1305,6304,course
17992,1305,Бакалавриат,graduation
17992,1305,6304,course
0,1305,Аспирантура,graduation
...,...,...,...
10486,22815,Информатика и вычислительная техника,spec
10486,22815,Магистратура,graduation
10555,22884,Магистратура,graduation
10555,22884,9310,course


In [15]:
item_features = pd.read_csv(
    "Cosine_Similarity_new.csv",
    engine="python",  # Because of 2-chars separators
    )
print(item_features.shape)

(134, 134)


In [16]:
my_data = genfromtxt("Cosine_Similarity_new.csv", delimiter=',')

In [17]:
projectsM = pd.read_csv(
    "проекты_и_компании_2024-09-13T14_31_07.415586Z.csv",
    engine="c",
    usecols=['ID Проекта']
    )
projectsM.columns = ['ID']

my_data_f = my_data[1:]
projectsM['CosSimularity'] = ''

i = 0
for md in my_data_f:
    projectsM['CosSimularity'][i] = my_data_f[i]
    i+=1

cossimularity_feature = projectsM[["ID", "CosSimularity"]].explode("CosSimularity")
cossimularity_feature.columns = ["id", "value"]
cossimularity_feature['feature'] = ''

md = projectsM['ID'].to_numpy()

for i in range(len(md)):
    cossimularity_feature.at[i, "feature"] = md

md_list = projectsM['ID'].to_numpy()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  projectsM['CosSimularity'][i] = my_data_f[i]


In [20]:
cossimularity_feature_train = cossimularity_feature.loc[cossimularity_feature["id"].isin(df_train["item_id"])].copy()
cossimularity_feature_train

Unnamed: 0,id,value,feature
0,1,1.0,1
0,1,0.36081,2
0,1,0.372803,3
0,1,0.887113,4
0,1,0.857938,5
...,...,...,...
121,123,0.175262,131
121,123,0.281886,132
121,123,0.402911,133
121,123,0.362141,134


### create dataset

In [12]:
dataset = Dataset.construct(df_train)

In [38]:
sparse_features_dataset = Dataset.construct(
    df_train,
    user_features_df=user_features,  # our flatten dataframe
    cat_user_features=["graduation", "spec"], # these will be one-hot-encoded. All other features must be numerical already
    make_dense_user_features=False  # for `sparse` format
)

ИЛИ

In [22]:
dataset_full_features = Dataset.construct(
    interactions_df=df_train,
    user_features_df=user_features,
    cat_user_features=["graduation", "spec"],
    make_dense_user_features=False,
    item_features_df=cossimularity_feature_train,
    cat_item_features=md_list
)

### model create

In [39]:
model = ImplicitALSWrapperModel(AlternatingLeastSquares(10, num_threads=32))
model.fit(sparse_features_dataset)

  check_blas_config()
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]


<rectools.models.implicit_als.ImplicitALSWrapperModel at 0x26f372abb20>

ИЛИ

In [23]:
model = ImplicitALSWrapperModel(AlternatingLeastSquares(10, num_threads=32))
model.fit(dataset_full_features)

  check_blas_config()
100%|██████████| 1/1 [00:00<00:00, 153.73it/s]
100%|██████████| 1/1 [00:28<00:00, 28.17s/it]


<rectools.models.implicit_als.ImplicitALSWrapperModel at 0x13a63c07c10>

### get recomendations

In [25]:
recos = model.recommend(
    users=df_train[Columns.User].unique(),
    dataset=dataset_full_features, #sparse_features_dataset
    k=15,
    filter_viewed=True,
)

In [26]:
closedProjects = pd.read_csv(
    "время_жизни_проектов_2024-09-15T14_24_25.319296Z.csv"
)
closedProjects['createdAt'] = pd.to_datetime(closedProjects['createdAt'], format='%Y-%m-%d, %H:%M')
closedProjects = closedProjects.loc[closedProjects.groupby('id')['createdAt'].idxmax()]
closedProjects = closedProjects.loc[(closedProjects["status"] == 'active')]
clearRecos = recos.loc[recos['item_id'].isin(closedProjects['id'])]
clearRecos

Unnamed: 0,user_id,item_id,score,rank
2,802,99,1.027564,3
4,802,92,0.680830,5
5,802,98,0.649672,6
6,802,103,0.611007,7
11,802,96,0.458624,12
...,...,...,...,...
4608,23847,102,0.556189,4
4609,23847,115,0.412364,5
4610,23847,98,0.342980,6
4611,23847,123,0.296729,7


### My Metrics

In [35]:
merged = pd.merge(clearRecos, df_test, on=['user_id', 'item_id'], how='left', indicator=True)
merged

Unnamed: 0,user_id,item_id,score,rank,datetime,weight,_merge
0,802,99,0.932804,2,NaT,,left_only
1,802,54,0.909573,3,NaT,,left_only
2,802,92,0.908329,4,NaT,,left_only
3,802,94,0.844778,5,NaT,,left_only
4,802,97,0.696693,7,NaT,,left_only
...,...,...,...,...,...,...,...
2573,23847,102,0.508048,4,NaT,,left_only
2574,23847,123,0.394315,6,NaT,,left_only
2575,23847,98,0.309956,8,NaT,,left_only
2576,23847,100,0.284277,10,NaT,,left_only


In [36]:
# MAP
def average_precision(recs, test_items):
    hits = 0
    sum_precisions = 0
    for i, item in enumerate(recs['item_id'], start=1):
        if item in test_items['item_id'].values:
            hits += 1
            precision_at_i = hits / i
            sum_precisions += precision_at_i
    if hits == 0:
        return 0
    return sum_precisions / hits

In [37]:
def serendipity_for_user(user_recs, user_relevant, user_history):
    serendipity_count = 0
    for rec_item in user_recs['item_id']:
        is_relevant = rec_item in user_relevant['item_id'].values
        is_unexpected = rec_item not in user_history['item_id'].values
        if is_relevant and is_unexpected:
            serendipity_count += 1
    return serendipity_count / len(user_recs) if len(user_recs) > 0 else 0

In [38]:
def calculate_popularity_bias(recommendations, all_data, k):
    from collections import Counter
    
    all_items = [item for sublist in all_data for item in sublist]
    item_counts = Counter(all_items)
    total_items = len(all_items)
    
    recommended_items = [item for sublist in recommendations for item in sublist[:k]]
    recommended_item_counts = Counter(recommended_items)
    
    recommended_item_popularity = [
        item_counts[item] / total_items if item in item_counts else 0
        for item in recommended_items
    ]
    
    avg_popularity = sum(recommended_item_popularity) / len(recommended_item_popularity)
    
    return avg_popularity


In [39]:
def calculate_metrics(recommendations, all_data, test_data, beta=1, k=10):
    merged = pd.merge(recommendations, test_data, on=['user_id', 'item_id'], how='left', indicator=True)

    TP = merged['_merge'].value_counts().get('both', 0)
    FP = merged['_merge'].value_counts().get('left_only', 0)
    FN = test_data.shape[0] - TP
    
    # precision
    if TP + FP == 0:
        precision = 0
    else:
        precision = TP / (TP + FP)

    print("precision:", precision)
    
    # recall
    if TP + FN == 0:
        recall = 0
    else:
        recall = TP / (TP + FN)

    print("recall:", recall)
    
    # F1-score
    if precision + recall == 0:
        f1_score = 0
    else:
        f1_score = 2 * (precision * recall) / (precision + recall)
    
    # F1-beta
    if precision + recall == 0:
        f1_beta = 0
    else:
        f1_beta = (1 + beta**2) * (precision * recall) / (beta**2 * precision + recall)
    
    print("f1_beta:", f1_beta)

    users = recommendations['user_id'].unique()
    ap_sum = 0
    for user in users:
        user_recs = recommendations[recommendations['user_id'] == user]
        user_test = test_data[test_data['user_id'] == user]
        ap_sum += average_precision(user_recs, user_test)
    
    map_score = ap_sum / len(users)
    print("MAP:", map_score)

    # DCG@k
    def dcg_at_k(recs, test_items, k):
        dcg = 0.0
        for i, item in enumerate(recs['item_id'][:k], start=1):
            if item in test_items['item_id'].values:
                dcg += 1 / np.log2(i + 1) 
        return dcg
    
    # IDCG@k
    def idcg_at_k(test_items, k):
        ideal_rel_count = min(k, len(test_items)) 
        idcg = sum([1 / np.log2(i + 1) for i in range(1, ideal_rel_count + 1)])
        return idcg

    # NDCG@k
    ndcg_sum = 0
    for user in users:
        user_recs = recommendations[recommendations['user_id'] == user]
        user_test = test_data[test_data['user_id'] == user]
        dcg = dcg_at_k(user_recs, user_test, k)
        idcg = idcg_at_k(user_test, k)
        if idcg > 0:
            ndcg = dcg / idcg
        else:
            ndcg = 0
        ndcg_sum += ndcg

    ndcg_at_k = ndcg_sum / len(users)
    print(f"NDCG@{k}:", ndcg_at_k)

    # MRR
    def reciprocal_rank(recs, test_items):
        for i, item in enumerate(recs['item_id'], start=1):
            if item in test_items['item_id'].values:
                return 1 / i
        return 0

    mrr_sum = 0
    for user in users:
        user_recs = recommendations[recommendations['user_id'] == user]
        user_test = test_data[test_data['user_id'] == user]
        mrr_sum += reciprocal_rank(user_recs, user_test)

    mrr_score = mrr_sum / len(users)
    print("MRR:", mrr_score)

    # Novelty
    item_popularity = all_data['item_id'].value_counts()
    max_popularity = item_popularity.max()

    def novelty(item, max_popularity, item_popularity):
        popularity = item_popularity.get(item, 0)
        return 1 - (popularity / max_popularity)

    novelty_sum = 0
    for user in users:
        user_recs = recommendations[recommendations['user_id'] == user]
        novelty_user_sum = user_recs['item_id'].apply(lambda item: novelty(item, max_popularity, item_popularity)).sum()
        novelty_sum += novelty_user_sum / len(user_recs)

    novelty_score = novelty_sum / len(users)
    print("Novelty:", novelty_score)

    serendipity_sum = 0
    temporal_diversity_sum = 0
    users = recommendations['user_id'].unique()
    
    for user in users:
        user_recs = recommendations[recommendations['user_id'] == user].head(k)
        user_relevant = test_data[test_data['user_id'] == user]
        user_history = all_data[(all_data['user_id'] == user) & (all_data['item_id'].isin(test_data['item_id']) == False)]
        serendipity_sum += serendipity_for_user(user_recs, user_relevant, user_history)
        
    
    serendipity_score = serendipity_sum / len(users) if len(users) > 0 else 0
    print("Serendipity:", serendipity_score)

    # item coverage
    all_items = all_data['item_id'].unique()
    recommended_items = recommendations['item_id'].unique()
    item_coverage = len(recommended_items) / len(all_items) if len(all_items) > 0 else 0
    print("Item Coverage:", item_coverage)

    # User coverage
    # total_users = all_data['user_id'].unique()
    # recommended_users = recommendations['user_id'].unique()
    # user_coverage = len(recommended_users) / len(total_users) if len(total_users) > 0 else 0
    # print("User Coverage:", user_coverage)
    
    popularity_bias = calculate_popularity_bias(recommendations, all_data, k)
    print("popularity_bias:", popularity_bias)
    
    pass

In [40]:
calculate_metrics(recos, data, df_test)

precision: 0.0008652390222799049
recall: 0.0014903129657228018
f1_beta: 0.0010948405638428905
MAP: 0.0008116883116883117
NDCG@10: 0.00030775442977126874
MRR: 0.0008116883116883117
Novelty: 0.6996055273229184
Serendipity: 0.0003246753246753247
Item Coverage: 0.7927927927927928
popularity_bias: 0.08229813664596275


### RecTools Metrics

In [27]:
precision = Precision(k=10)
recall = Recall(k=10)
f1 = F1Beta(k=10)

precision_value = precision.calc(reco=recos, interactions=df_test)
print(f"precision: {precision_value}")

recall_value = recall.calc(reco=recos, interactions=df_test)
print(f"recall: {recall_value}")


precision: 0.010126582278481013
recall: 0.0019856043683296105


In [27]:
n_splits = 3

splitter = TimeRangeSplitter(
    test_size="30D",
    n_splits=n_splits,
    filter_already_seen=True,
    filter_cold_items=True,
    filter_cold_users=True,
)

In [28]:
splitter.get_test_fold_borders(dataset_full_features.interactions)

[(Timestamp('2024-03-22 00:00:00', freq='30D'),
  Timestamp('2024-04-21 00:00:00', freq='30D')),
 (Timestamp('2024-04-21 00:00:00', freq='30D'),
  Timestamp('2024-05-21 00:00:00', freq='30D')),
 (Timestamp('2024-05-21 00:00:00', freq='30D'),
  Timestamp('2024-06-20 00:00:00', freq='30D'))]

In [29]:
# Take few simple models to compare
models = {
    "random": RandomModel(random_state=42),
    "popular": PopularModel(),
    "most_raited": PopularModel(popularity="sum_weight"),
    "tfidf_k=5": ImplicitItemKNNWrapperModel(model=TFIDFRecommender(K=5)),
    "tfidf_k=10": ImplicitItemKNNWrapperModel(model=TFIDFRecommender(K=10)),
    "bm25_k=5_k1=0.05_b=0.1": ImplicitItemKNNWrapperModel(model=BM25Recommender(K=5, K1=0.05, B=0.1)),
    "Implicit ALS": ImplicitALSWrapperModel(AlternatingLeastSquares(10, num_threads=32))
}

In [30]:
metrics = {
    "prec@1": Precision(k=1),
    "prec@10": Precision(k=10),
    'Serendipity@1': Serendipity(k=1),
    'Serendipity@5': Serendipity(k=5),
    'Serendipity@10': Serendipity(k=10),
    'MIUF@1': MeanInvUserFreq(k=1),
    'MIUF@5': MeanInvUserFreq(k=5),
    'MIUF@10': MeanInvUserFreq(k=10),
    'MAP@1': MAP(k=1),
    'MAP@5': MAP(k=5),
    'MAP@10': MAP(k=10),
    "recall": Recall(k=10),
    "MCC": MCC(k=10),
    "MRR": MRR(k=10),
    "F1Beta": F1Beta(k=10)
}

K_RECS = 10

In [32]:
%%time

# For each fold generate train and test part of dataset
# Then fit every model, generate recommendations and calculate metrics

cv_results = cross_validate(
    dataset=dataset_full_features,
    splitter=splitter,
    models=models,
    metrics=metrics,
    k=K_RECS,
    filter_viewed=True,
)

100%|██████████| 1/1 [00:00<00:00, 109.32it/s]
100%|██████████| 1/1 [00:35<00:00, 35.47s/it]
100%|██████████| 1/1 [00:00<00:00, 31.59it/s]
100%|██████████| 1/1 [00:38<00:00, 38.42s/it]
100%|██████████| 1/1 [00:00<00:00, 94.98it/s]
100%|██████████| 1/1 [00:39<00:00, 39.60s/it]

CPU times: total: 12min 20s
Wall time: 1min 56s





In [33]:
pivot_results = (
    pd.DataFrame(cv_results["metrics"])
    .drop(columns="i_split")
    .groupby(["model"], sort=False)
    .agg(["mean", "std"])
)
mean_metric_subset = [(metric, "mean") for metric in pivot_results.columns.levels[0]]
(
    pivot_results.style
    .highlight_min(subset=mean_metric_subset, color='lightcoral', axis=0)
    .highlight_max(subset=mean_metric_subset, color='lightgreen', axis=0)
)

Unnamed: 0_level_0,prec@1,prec@1,prec@10,prec@10,recall,recall,MCC,MCC,F1Beta,F1Beta,MRR,MRR,MAP@1,MAP@1,MAP@5,MAP@5,MAP@10,MAP@10,MIUF@1,MIUF@1,MIUF@5,MIUF@5,MIUF@10,MIUF@10,Serendipity@1,Serendipity@1,Serendipity@5,Serendipity@5,Serendipity@10,Serendipity@10
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2
random,0.363636,0.629837,0.163636,0.112021,0.204894,0.122498,0.028639,0.090059,0.142907,0.068761,0.106902,0.063414,0.030303,0.052486,0.043124,0.047364,0.062232,0.045724,5.216897,0.292492,5.346545,0.051009,5.209761,0.030083,0.013034,0.022575,0.017697,0.015861,0.025364,0.012744
popular,0.927273,0.220443,0.350303,0.170961,0.298184,0.121521,0.197531,0.031709,0.256082,0.069804,0.301804,0.128465,0.090646,0.095082,0.143337,0.068411,0.164661,0.076973,2.095299,0.047705,2.425977,0.067415,2.59022,0.09408,0.006256,0.000339,0.012649,0.011537,0.009891,0.00476
most_raited,0.121212,0.209946,0.38303,0.299561,0.286834,0.086472,0.21091,0.148879,0.259765,0.099564,0.21271,0.112557,0.001955,0.003386,0.059733,0.008822,0.088667,0.02661,2.516142,0.045108,2.461061,0.084989,2.675248,0.100988,0.000326,0.000564,0.001574,0.001379,0.004538,0.002402
tfidf_k=5,0.121212,0.209946,0.454545,0.230135,0.38749,0.128655,0.293111,0.04829,0.297883,0.006233,0.205159,0.081999,0.031281,0.054179,0.09996,0.065848,0.135228,0.074419,3.931074,0.197204,3.725686,0.140715,3.852167,0.028074,0.01173,0.020317,0.026199,0.021869,0.044131,0.036819
tfidf_k=10,0.060606,0.104973,0.486667,0.456216,0.30567,0.108695,0.276776,0.289083,0.274746,0.207849,0.172559,0.10421,0.000978,0.001693,0.083715,0.02974,0.112735,0.047993,3.854695,0.196202,3.740356,0.079405,3.795861,0.128445,0.002281,0.003951,0.04253,0.063421,0.04469,0.062447
bm25_k=5_k1=0.05_b=0.1,3.060606,3.238423,0.478182,0.394797,0.325497,0.126722,0.285287,0.247006,0.299598,0.153437,0.413925,0.19615,0.165646,0.136293,0.218316,0.118834,0.236063,0.12005,2.668513,0.087364,2.99164,0.12053,3.052196,0.173959,0.050701,0.046622,0.045236,0.056141,0.04382,0.050803
Implicit ALS,2.460606,3.759608,0.559394,0.422978,0.350254,0.146382,0.341199,0.238107,0.319485,0.178211,0.310895,0.138882,0.093603,0.079234,0.143637,0.053114,0.178224,0.054427,3.124817,0.195649,3.200408,0.069771,3.334841,0.173319,0.053307,0.057616,0.062733,0.047517,0.04988,0.042432


In [34]:
pd.DataFrame(cv_results["splits"])

Unnamed: 0,i_split,start,end,train,train_users,train_items,test,test_users,test_items
0,0,2024-03-22,2024-04-21,8856,289,93,273,11,29
1,1,2024-04-21,2024-05-21,9357,292,93,98,5,13
2,2,2024-05-21,2024-06-20,9693,301,93,17,5,6


### ❗❗❗ TESTING ZONE ❗❗❗

In [28]:
def calculate_precision(recos, df_test):
    test_data_grouped = df_test.groupby('user_id')['item_id'].apply(set).to_dict()
    
    precisions = []
    
    for user_id, group in recos.groupby('user_id'):
        recommended_items = set(group['item_id'].head(15))
        true_items = test_data_grouped.get(user_id, set())
        correct_recommendations = len(recommended_items & true_items)
        precision = correct_recommendations / len(recommended_items) if recommended_items else 0
        precisions.append(precision)
    
    return sum(precisions) / len(precisions) if precisions else 0

In [29]:
calculate_precision(df_test, clearRecos)

0.0