In [1]:
import pandas as pd
import numpy as np
import altair as alt

In [2]:
readers = pd.read_csv("../data/readers.csv")
readers = readers.rename(columns={"id":"user_id", "art_id":"nzz_id"})
readers.head()

Unnamed: 0,user_id,nzz_id
0,1,ld.154103
1,1,ld.142559
2,1,1.18331199
3,1,ld.144819
4,1,ld.1293110


In [3]:
read_counts = readers["user_id"].value_counts(sort=True)
read_counts = read_counts.rename_axis("user_id").reset_index(name="read_count")

# Biorę pod uwagę tylko użytkowników, którzy przeczytali minimum 5 artykułów
min_read_count = 3
read_counts = read_counts[read_counts["read_count"] > min_read_count]

readers = readers[readers["user_id"].isin(read_counts["user_id"])]

In [4]:
# Train/Test split
from sklearn.model_selection import train_test_split

random_state = None
readers_train, readers_test = train_test_split(readers,
                                   stratify=readers["user_id"], 
                                   test_size=0.20,
                                   random_state=12)
                                   
print(f"Train set size {len(readers_train)}")
print(f"test set size {len(readers_test)}")
unique_train = readers_train["nzz_id"].unique()
unique_test = readers_test["nzz_id"].unique()
print(f"n users in test {len(unique_test)}")
print(f"n users in train {len(unique_train)}")


Train set size 22284
test set size 5571
n users in test 3995
n users in train 10046


In [5]:
import sys
sys.path.append('../code')
from cf_model import CFModel
from model_evaluator import ModelEvaluator
from random_model import RandomModel
from implicit_model import ImplicitModel
model_evaluator = ModelEvaluator(k_list = [10])

In [10]:
#cf_recommender_model = CFModel(n_latent_factors=200)
implicit_recommender_model = ImplicitModel(n_latent_factors=500, regularization=150, alpha=50, iterations=10)
implicit_recommender_model.fit(readers_train)
implicit_global_metrics, implicit_detailed_results_df = model_evaluator.evaluate_model(implicit_recommender_model, readers, readers_train, readers_test, interactio0s=0)
print('\nGlobal metrics:\n%s' % implicit_global_metrics)

999 users processed

Global metrics:
{'modelName': 'implicit_model', 'recall@10': 0.426, 'precision@10': 0.0426, 'f1_score@10': 0.07745454545454547, 'ndcg@10': 0.2711274840559555, 'personalization@10': 0.9978624624624625}


In [7]:
#cf_recommender_model = CFModel(n_latent_factors=200)
cf_recommender_model = CFModel(n_latent_factors=500)
cf_recommender_model.fit(readers_train)
cf_global_metrics, cf_detailed_results_df = model_evaluator.evaluate_model(cf_recommender_model, readers, readers_train, readers_test, interactions=0)
print('\nGlobal metrics:\n%s' % cf_global_metrics)

999 users processed

Global metrics:
{'modelName': 'CF_model', 'recall@10': 0.27898055555555556, 'precision@10': 0.027898055555555558, 'f1_score@10': 0.05072373737373737, 'ndcg@10': 0.1714511320219907, 'personalization@10': 0.9986016016016016}


In [8]:
global_m = [cf_global_metrics, implicit_global_metrics]
global_df = pd.DataFrame().from_dict(global_m)
global_df.head()

Unnamed: 0,modelName,recall@10,precision@10,f1_score@10,ndcg@10,personalization@10
0,CF_model,0.278981,0.027898,0.050724,0.171451,0.998602
1,implicit_model,0.417608,0.041761,0.075929,0.271204,0.997812


In [9]:
f1_score_chart = alt.Chart(global_df, width=400).mark_bar(width=50).encode(
    y="f1_score@10:Q",
    x=alt.X("modelName:O",axis=alt.Axis(labelAngle=0)))

ndcg_chart = alt.Chart(global_df, width=400).mark_bar(width=50).encode(
    y="ndcg@10:Q",
    x=alt.X("modelName:O",axis=alt.Axis(labelAngle=0)))

personalization_chart = alt.Chart(global_df, width=400).mark_bar(width=50).encode(
    y="personalization@10:Q",
    x=alt.X("modelName:O",axis=alt.Axis(labelAngle=0)))

f1_score_chart | ndcg_chart | personalization_chart