# Day 7 - Candidate Generation: Popularity-Based Recall
In this notebook, we generate item candidates based on popularity (i.e., top-clicked, top-carted, top-ordered items).

In [None]:
import pandas as pd
import numpy as np
# 设置显示选项
pd.set_option('display.max_rows', 100)

In [None]:
# 载入 events 数据
df = pd.read_parquet('/kaggle/input/otto-recommender-system/train.parquet')
df.head()

In [None]:
# 映射行为类型编号为文字
type_map = {0: 'clicks', 1: 'carts', 2: 'orders'}
df['type_name'] = df['type'].map(type_map)

In [None]:
# 分别计算每种行为下最常见的商品（aid）
popular_items = {}
for action in ['clicks', 'carts', 'orders']:
    popular_items[action] = (
        df[df['type_name'] == action]
        .groupby('aid')
        .size()
        .sort_values(ascending=False)
    )

In [None]:
# 定义召回函数：将热门商品与每个 session 配对形成训练样本
def create_popularity_candidates(sessions, popular_aids, top_n=50):
    session_ids = sessions['session'].unique()
    candidates = pd.DataFrame({
        'session': np.repeat(session_ids, top_n),
        'aid': np.tile(popular_aids.index[:top_n], len(session_ids))
    })
    return candidates

In [None]:
# 针对每个行为类型创建候选集
candidates_all = []
for action in ['clicks', 'carts', 'orders']:
    candidates = create_popularity_candidates(df, popular_items[action], top_n=50)
    candidates['type'] = action
    candidates_all.append(candidates)
candidates_df = pd.concat(candidates_all)
candidates_df.head()

In [None]:
# 保存结果供后续使用
candidates_df.to_parquet('popularity_candidates.parquet')