In [1]:
import pandas as pd
import numpy as np
from some_functions import get_db

In [2]:
readers = pd.read_csv(r"C:\Users\a814811\OneDrive - Atos\RecommenderSystem\readers.csv")
readers = readers.rename(columns={"id":"user_id", "art_id":"nzz_id"})
art_db = get_db(r'C:\Users\a814811\OneDrive - Atos\RecommenderSystem\art_clean_wt_all_popularity.csv')
art_db = art_db.loc[:,['nzz_id','author','department','popularity']] #skrócenie do potrzebnych rzeczy

In [3]:
read_counts = readers["user_id"].value_counts(sort=True)
read_counts = read_counts.rename_axis("user_id").reset_index(name="read_count")

# Biorę pod uwagę tylko użytkowników, którzy przeczytali minimum 5 artykułów
min_read_count = 3
read_counts = read_counts[read_counts["read_count"] > min_read_count]

readers = readers[readers["user_id"].isin(read_counts["user_id"])]

In [4]:
# Train/Test split
from sklearn.model_selection import train_test_split

random_state = None
readers_train, readers_test = train_test_split(readers,
                                   stratify=readers["user_id"], 
                                   test_size=0.20,
                                   random_state=random_state)
                                   
print(f"Train set size {len(readers_train)}")
print(f"test set size {len(readers_test)}")
unique_train = readers_train["nzz_id"].unique()
unique_test = readers_test["nzz_id"].unique()
print(f"n users in test {len(unique_test)}")
print(f"n users in train {len(unique_train)}")

Train set size 22284
test set size 5571
n users in test 4055
n users in train 10001


In [5]:
import sys
sys.path.append('../code')
from popularity_model import *
from model_evaluator import ModelEvaluator
model_evaluator = ModelEvaluator(k_list = [5, 10])

### sprawdzam submodele i główny
#### dla authora są mocno zaniżone wyniki, bo często nie rekomenduje nic a test tego nie uwzględnia, podobnie dla department (w mniejszym stopniu)
#### merged i popularity działają normalnie (zawsze coś jest rekomednowane)

In [6]:
# popularity model
p_model = Popularity_model(art_db,readers)
cf_global_metrics, cf_detailed_results_df = model_evaluator.evaluate_model(p_model, readers, readers_train, readers_test)
print('\nGlobal metrics:\n%s' % cf_global_metrics)

999 users processed

Global metrics:
{'modelName': 'p_model', 'recall@5': 0.2432063492063492, 'precision@5': 0.04864126984126984, 'f1_score@5': 0.08106878306878307, 'ndcg@5': 0.16576039603223658, 'recall@10': 0.36596388888888887, 'precision@10': 0.03659638888888889, 'f1_score@10': 0.0665388888888889, 'ndcg@10': 0.20434010334582028}


In [12]:
# author popularity model
p_model = Popularity_model_author(art_db,readers)
cf_global_metrics, cf_detailed_results_df = model_evaluator.evaluate_model(p_model, readers, readers_train, readers_test)
print(f'\nGlobal metrics:\n{cf_global_metrics}')

999 users processed

Global metrics:
{'modelName': 'p_model', 'recall@5': 0.11133888888888889, 'precision@5': 0.02226777777777778, 'f1_score@5': 0.03711296296296296, 'ndcg@5': 0.09470721160985855, 'recall@10': 0.1117611111111111, 'precision@10': 0.011176111111111112, 'f1_score@10': 0.020320202020202023, 'ndcg@10': 0.09471658799059719}


In [10]:
# department popularity model
p_model = Popularity_model_department(art_db,readers)
cf_global_metrics, cf_detailed_results_df = model_evaluator.evaluate_model(p_model, readers, readers_train, readers_test)
print(f'\nGlobal metrics:\n{cf_global_metrics}')

999 users processed

Global metrics:
{'modelName': 'p_model', 'recall@5': 0.3183547619047619, 'precision@5': 0.06367095238095238, 'f1_score@5': 0.10611825396825397, 'ndcg@5': 0.2223098158596075, 'recall@10': 0.44578769841269844, 'precision@10': 0.044578769841269836, 'f1_score@10': 0.0810523088023088, 'ndcg@10': 0.25908419412702155}


In [11]:
# merged model
p_model = Popularity_model_merge(art_db,readers)
cf_global_metrics, cf_detailed_results_df = model_evaluator.evaluate_model(p_model, readers, readers_train, readers_test)
print(f'\nGlobal metrics:\n{cf_global_metrics}')

999 users processed

Global metrics:
{'modelName': 'p_model', 'recall@5': 0.23530436507936509, 'precision@5': 0.047060873015873014, 'f1_score@5': 0.07843478835978837, 'ndcg@5': 0.16137849778207283, 'recall@10': 0.36453492063492066, 'precision@10': 0.03645349206349206, 'f1_score@10': 0.06627907647907648, 'ndcg@10': 0.20549662349385597}


In [12]:
cf_detailed_results_df

Unnamed: 0,hits@5_count,interacted_count,recall@5,precision@5,f1_score@5,ndcg@5,hits@10_count,recall@10,precision@10,f1_score@10,ndcg@10,_person_id
452,1,10,0.1,0.02,0.033333,0.050000,2,0.2,0.02,0.036364,0.068954,481
603,2,10,0.2,0.04,0.066667,0.143068,3,0.3,0.03,0.054545,0.161649,907
1,2,10,0.2,0.04,0.066667,0.200000,2,0.2,0.02,0.036364,0.200000,392
185,7,10,0.7,0.14,0.233333,0.613093,9,0.9,0.09,0.163636,0.634977,494
184,2,10,0.2,0.04,0.066667,0.093068,3,0.3,0.03,0.054545,0.121974,323
...,...,...,...,...,...,...,...,...,...,...,...,...
909,1,1,1.0,0.20,0.333333,0.500000,1,1.0,0.10,0.181818,0.630930,721
893,0,1,0.0,0.00,0.000000,0.000000,1,1.0,0.10,0.181818,0.289065,165
403,0,1,0.0,0.00,0.000000,0.000000,0,0.0,0.00,0.000000,0.000000,512
875,0,1,0.0,0.00,0.000000,0.000000,1,1.0,0.10,0.181818,0.289065,592
