In [4]:
import sys
import os

# Add the project root directory to Python's path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Import from metrics module
from metrics import evaluate_recommendations_df, evaluate_all_models, format_evaluation_results

# 3. Popularity/Recency on training set

In [16]:
import pandas as pd
import numpy as np
from itertools import product

# ── CONFIG ────────────────────────────────────────────────────
TRAIN_PATH        = "../datasets/train_clicks.parquet"
VAL_PATH          = "../datasets/valid_clicks.parquet"
TEST_PATH         = "../datasets/test_clicks.parquet"
META_PATH         = "../datasets/articles_metadata.csv"   # or your parquet copy
HALF_LIFE_DAYS    = [1, 3, 7, 14]
BETA_VALUES       = [0.0, 0.05, 0.1, 0.2]
FRESH_WINDOW_DAYS = 1
TOP_M             = 500
RECALL_K          = 10


In [6]:
# 1) Load train / val / test
train_df = pd.read_parquet(TRAIN_PATH)
val_df   = pd.read_parquet(VAL_PATH)
test_df  = pd.read_parquet(TEST_PATH)

# ensure timestamps are datetime
for df in (train_df, val_df, test_df):
    df["click_timestamp"] = pd.to_datetime(df["click_timestamp"], unit="ms")

print(f"Train: {train_df.shape}, Val: {val_df.shape}, Test: {test_df.shape}")

Train: (2857109, 12), Val: (65536, 12), Test: (65536, 12)


In [7]:
# 2) Load metadata
dtypes = {
        "article_id": "uint32",
        "category_id": "uint16",
        "publisher_id": "uint8",
        "words_count": "uint16"
    }

articles = pd.read_csv(META_PATH,dtype=dtypes)

articles["created_at_ts"] = pd.to_datetime(articles["created_at_ts"], unit="ms")
print(articles.shape)
articles.head()


(364047, 5)


Unnamed: 0,article_id,category_id,created_at_ts,publisher_id,words_count
0,0,0,2017-12-13 05:53:39,0,168
1,1,1,2014-07-14 12:45:36,0,189
2,2,1,2014-08-22 00:35:06,0,250
3,3,1,2014-08-19 17:11:53,0,230
4,4,1,2014-08-03 13:06:11,0,162


In [8]:
# import numpy as np

def compute_scores(train_df, articles, half_life_days, beta, fresh_window_days, top_m=500):
    # 1) global reference time
    now = train_df["click_timestamp"].max()
    # 2) decay constant (per second)
    λ = np.log(2) / pd.Timedelta(days=half_life_days).total_seconds()
    # 3) per-click weights
    ages    = (now - train_df["click_timestamp"]).dt.total_seconds()
    weights = np.exp(-λ * ages)
    # 4) aggregate per article
    pop = (
        train_df.assign(weight=weights)
                .groupby("click_article_id")["weight"]
                .sum()
                .rename("pop_score")
                .to_frame()
    )
    # 5) join publication times
    pop = pop.join(
        articles.set_index("article_id")[["created_at_ts"]],
        how="left"
    )
    # 6) apply freshness boost
    is_fresh = (now - pop["created_at_ts"]) <= pd.Timedelta(days=fresh_window_days)
    pop["final_score"] = pop["pop_score"] * (1 + beta * is_fresh.astype(float))
    # 7) build cleaned candidates DF
    top_series = pop["final_score"].nlargest(top_m)
    cands = pd.DataFrame({
        "article_id": top_series.index.astype(int),
        "final_score": top_series.values
    })
    return cands

In [9]:
from itertools import product

results = []
for counter, (h, beta) in enumerate(product(HALF_LIFE_DAYS, BETA_VALUES)):
    print(f"Entry {counter+1}/{len(HALF_LIFE_DAYS)*len(BETA_VALUES)}: h={h}, β={beta}")
    
    # Generate recommendations using your existing function
    cands = compute_scores(train_df, articles, h, beta, FRESH_WINDOW_DAYS, top_m=TOP_M)
    
    # Convert to DataFrame format if not already
    if not isinstance(cands, pd.DataFrame):
        cands_df = pd.DataFrame({"article_id": cands})
    else:
        cands_df = cands
    
    # Evaluate with the new metrics function
    metrics = evaluate_recommendations_df(
        recommendations_df=cands_df,
        test_df=val_df,
        articles_df=articles,
        k_values=[RECALL_K]  # Use your existing RECALL_K
    )
    
    # Get the recall value for your existing K
    r = metrics.get(f'recall@{RECALL_K}', 0)
    
    # Store results in the same format as before
    result_entry = {
        "half_life_days": h,
        "beta": beta,
        f"recall@{RECALL_K}": r
    }
    
    # Add other metrics if you want them
    for metric in ['precision', 'f1', 'ndcg']:
        metric_key = f'{metric}@{RECALL_K}'
        if metric_key in metrics:
            result_entry[metric_key] = metrics[metric_key]
    
    results.append(result_entry)
    
    # Print results
    print(f" → Recall@{RECALL_K}: {r:.4f}")
    
    # Optionally print other metrics
    for metric in ['precision', 'f1', 'ndcg']:
        metric_key = f'{metric}@{RECALL_K}'
        if metric_key in metrics:
            print(f" → {metric_key}: {metrics[metric_key]:.4f}")
    print()

# Create DataFrame and find best parameters as before
df_res = pd.DataFrame(results)
best = df_res.sort_values(f"recall@{RECALL_K}", ascending=False).iloc[0]
print("Best hyperparameters:\n", best)

Entry 1/16: h=1, β=0.0
 → Recall@10: 0.0714
 → precision@10: 0.0071
 → f1@10: 0.0130
 → ndcg@10: 0.0210

Entry 2/16: h=1, β=0.05
 → Recall@10: 0.0714
 → precision@10: 0.0071
 → f1@10: 0.0130
 → ndcg@10: 0.0210

Entry 3/16: h=1, β=0.1
 → Recall@10: 0.0714
 → precision@10: 0.0071
 → f1@10: 0.0130
 → ndcg@10: 0.0210

Entry 4/16: h=1, β=0.2
 → Recall@10: 0.0714
 → precision@10: 0.0071
 → f1@10: 0.0130
 → ndcg@10: 0.0210

Entry 5/16: h=3, β=0.0
 → Recall@10: 0.1470
 → precision@10: 0.0147
 → f1@10: 0.0267
 → ndcg@10: 0.0641

Entry 6/16: h=3, β=0.05
 → Recall@10: 0.1470
 → precision@10: 0.0147
 → f1@10: 0.0267
 → ndcg@10: 0.0641

Entry 7/16: h=3, β=0.1
 → Recall@10: 0.1470
 → precision@10: 0.0147
 → f1@10: 0.0267
 → ndcg@10: 0.0641

Entry 8/16: h=3, β=0.2
 → Recall@10: 0.1470
 → precision@10: 0.0147
 → f1@10: 0.0267
 → ndcg@10: 0.0641

Entry 9/16: h=7, β=0.0
 → Recall@10: 0.0582
 → precision@10: 0.0058
 → f1@10: 0.0106
 → ndcg@10: 0.0297

Entry 10/16: h=7, β=0.05
 → Recall@10: 0.0582
 → prec

## 3.2 Combine training and validation sets

In [10]:
import pandas as pd

# Cell: Final Evaluation on Test Set

# 1) Load splits (if not already in memory)
train_df = pd.read_parquet("../datasets/train_clicks.parquet")
val_df   = pd.read_parquet("../datasets/valid_clicks.parquet")
test_df  = pd.read_parquet("../datasets/test_clicks.parquet")

# Ensure timestamps are datetime
for df in (train_df, val_df, test_df):
    df["click_timestamp"] = pd.to_datetime(df["click_timestamp"], unit="ms")

In [11]:
# 2) Load metadata (if not already in memory)
articles = pd.read_csv(
    "../datasets/articles_metadata.csv",
    dtype={
        "article_id": "uint32",
        "category_id": "uint16",
        "publisher_id": "uint8",
        "words_count": "uint16"
    }
)
articles["created_at_ts"] = pd.to_datetime(articles["created_at_ts"], unit="ms")

In [14]:
# 3) Combine train + validation
train_plus_val = pd.concat([train_df, val_df], ignore_index=True)

In [None]:
compute_scores(train_df, articles, half_life_days, beta, fresh_window_days, top_m=500

In [20]:
# 5) Evaluate on test set
print("Evaluating final model on test set...")

# Generate recommendations with your best parameters
cands = compute_scores(train_plus_val, articles, half_life_days=3, beta=0, fresh_window_days=FRESH_WINDOW_DAYS, top_m=TOP_M)

# Convert to DataFrame format if not already
if not isinstance(cands, pd.DataFrame):
    cands_df = pd.DataFrame({"article_id": cands})
else:
    cands_df = cands

# Calculate item popularity for novelty metric
item_popularity = train_df['click_article_id'].value_counts().to_dict()
total_interactions = len(train_df)

# Evaluate with all metrics
metrics = evaluate_recommendations_df(
    recommendations_df=cands_df,
    test_df=test_df,
    articles_df=articles,
    k_values=[5, 10, 20, 50],  # Multiple K values
    item_popularity=item_popularity,
    total_interactions=total_interactions
)

# Print all metrics in a formatted way
print("\n===== Final Test Results =====")
print(f"Model: Popularity-Recency (h=3, β=0)")
print("-----------------------------")

# Group metrics by type and k value
metric_types = ['recall', 'precision', 'f1', 'ndcg', 'novelty']
k_values = [5, 10, 20, 50]

for k in k_values:
    print(f"\nAt K = {k}:")
    for metric in metric_types:
        metric_key = f'{metric}@{k}'
        if metric_key in metrics:
            print(f"  {metric.capitalize():10}: {metrics[metric_key]:.4f}")

# Create a summary DataFrame for easy visualization
import pandas as pd
summary = pd.DataFrame({
    'K': [],
    'Recall': [],
    'Precision': [],
    'F1': [],
    'NDCG': [],
    'Novelty': []
})

for k in k_values:
    row = {'K': k}
    for metric in metric_types:
        metric_key = f'{metric}@{k}'
        if metric_key in metrics:
            row[metric.capitalize()] = metrics[metric_key]
    summary = pd.concat([summary, pd.DataFrame([row])], ignore_index=True)

# Display the summary table
print("\n===== Summary Table =====")
print(summary.set_index('K').round(4))

# Highlight the original metric for comparison
print(f"\nOriginal Test Recall@{RECALL_K}: {metrics[f'recall@{RECALL_K}']:.4f}")

Evaluating final model on test set...

===== Final Test Results =====
Model: Popularity-Recency (h=3, β=0)
-----------------------------

At K = 5:
  Recall    : 0.0915
  Precision : 0.0183
  F1        : 0.0305
  Ndcg      : 0.0500
  Novelty   : 8.0724

At K = 10:
  Recall    : 0.1211
  Precision : 0.0121
  F1        : 0.0220
  Ndcg      : 0.0597
  Novelty   : 7.9813

At K = 20:
  Recall    : 0.1819
  Precision : 0.0091
  F1        : 0.0173
  Ndcg      : 0.0746
  Novelty   : 8.4886

At K = 50:
  Recall    : 0.2868
  Precision : 0.0057
  F1        : 0.0112
  Ndcg      : 0.0953
  Novelty   : 9.0322

===== Summary Table =====
      Recall  Precision      F1  NDCG  Novelty    Ndcg
K                                                     
5.0   0.0915     0.0183  0.0305   NaN   8.0724  0.0500
10.0  0.1211     0.0121  0.0220   NaN   7.9813  0.0597
20.0  0.1819     0.0091  0.0173   NaN   8.4886  0.0746
50.0  0.2868     0.0057  0.0112   NaN   9.0322  0.0953

Original Test Recall@10: 0.1211
