# HRFlow Challenge


## Imports & config


In [1]:
from modules.metrics import mrr_at_k, accuracy, challenge_score

import ast
import hashlib
import importlib
import json
import os
import pickle
import warnings
from typing import Dict, List

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import optuna

MODEL_SPECS = {
    "cbf": ("modules.models.cbf", "CBFModel"),
    "mf": ("modules.models.mf", "MFModel"),
    "popularity": ("modules.models.popularity", "PopularityModel"),
    "session_gnn": ("modules.models.session_gnn", "SessionGNNModel"),
    "user_user": ("modules.models.user_user", "UserUserModel"),
    "item_item": ("modules.models.item_item", "ItemItemModel"),
}


def load_available_models(model_specs: Dict[str, tuple]) -> Dict[str, type]:
    registry = {}
    for model_name, (module_path, class_name) in model_specs.items():
        try:
            module = importlib.import_module(module_path)
            model_cls = getattr(module, class_name)
        except Exception as exc:
            warnings.warn(
                f"Skipping model '{model_name}' because import failed: {exc}",
                RuntimeWarning,
            )
            continue

        if not getattr(model_cls, "IS_IMPLEMENTED", False):
            warnings.warn(
                f"Skipping model '{model_name}' because it is not implemented yet.",
                RuntimeWarning,
            )
            continue

        registry[model_name] = model_cls
    return registry


DATA_DIR = "/Users/Administrateur/School/X/ENS Data Competition/data"
print(f"DATA: {DATA_DIR}")

SUBMISSION_PATH = "/Users/Administrateur/School/X/ENS Data Competition/submission.csv"
print(f"SUBMISSION: {SUBMISSION_PATH}")

CACHE_DIR = "/Users/Administrateur/School/X/ENS Data Competition/model_cache"
os.makedirs(CACHE_DIR, exist_ok=True)
print(f"CACHE: {CACHE_DIR}")

OPTUNA_DIR = os.path.join(CACHE_DIR, "optuna")
os.makedirs(OPTUNA_DIR, exist_ok=True)
OPTUNA_STORAGE_PATH = os.path.join(OPTUNA_DIR, "studies.sqlite3")
OPTUNA_FULL_STUDY_NAME = "hrflow_full_hyperparameter_tuning"
print(f"OPTUNA STORAGE: {OPTUNA_STORAGE_PATH}")

CORES = os.cpu_count() or 1
print(f"CORES: {CORES}")

MODE = "full_hyperparameter_tuning"  # "weights_tuning_only" or "full_hyperparameter_tuning"
print(f"TUNING MODE: {MODE}")

SCORE_NORMALIZATION = "minmax"  # "none", "minmax" or "rank"
print(f"NORMALIZATION MODE: {SCORE_NORMALIZATION}")

RANDOM_STATE = 42
VALID_SIZE = 0.2
N_TRIALS = 300
TOP_K = 10

DATA: /Users/Administrateur/School/X/ENS Data Competition/data
SUBMISSION: /Users/Administrateur/School/X/ENS Data Competition/submission.csv
CACHE: /Users/Administrateur/School/X/ENS Data Competition/model_cache
CORES: 10
TUNING MODE: full_hyperparameter_tuning
NORMALIZATION MODE: minmax


## Data loading


In [2]:
X_train_raw = pd.read_csv(os.path.join(DATA_DIR, "x_train.csv"))
y_train_raw = pd.read_csv(os.path.join(DATA_DIR, "y_train.csv"))
X_test_raw = pd.read_csv(os.path.join(DATA_DIR, "x_test.csv"))

with open(os.path.join(DATA_DIR, "job_listings.json"), "r", encoding="utf-8") as f:
    job_listings = json.load(f)


def parse_list_cell(value):
    if isinstance(value, list):
        return value
    if pd.isna(value):
        return []
    try:
        parsed = ast.literal_eval(value)
    except (ValueError, SyntaxError):
        return []
    return parsed if isinstance(parsed, list) else []


def explode_sessions(df: pd.DataFrame) -> pd.DataFrame:
    tmp = df.copy()
    tmp["session_id"] = tmp["session_id"].astype(str)
    tmp["job_ids"] = tmp["job_ids"].apply(parse_list_cell)
    tmp["actions"] = tmp["actions"].apply(parse_list_cell)

    tmp["pair_len"] = tmp.apply(
        lambda r: min(len(r["job_ids"]), len(r["actions"])),
        axis=1,
    )
    tmp["job_ids"] = tmp.apply(
        lambda r: [str(x) for x in r["job_ids"][: r["pair_len"]]], axis=1
    )
    tmp["actions"] = tmp.apply(
        lambda r: [str(x) for x in r["actions"][: r["pair_len"]]], axis=1
    )

    exploded = (
        tmp[["session_id", "job_ids", "actions"]]
        .explode(["job_ids", "actions"], ignore_index=True)
        .rename(columns={"job_ids": "job_id", "actions": "action"})
        .dropna(subset=["job_id", "action"])
    )
    return exploded[["session_id", "job_id", "action"]]


X_train = explode_sessions(X_train_raw)
X_test = explode_sessions(X_test_raw)

y_train = y_train_raw.copy()
y_train["session_id"] = y_train["session_id"].astype(str)
y_train["job_id"] = y_train["job_id"].astype(str)
y_train["action"] = y_train["action"].astype(str)

X_test_for_fit = X_test.copy()
train_sid_num = pd.to_numeric(X_train_raw["session_id"], errors="coerce")
test_sid_num = pd.to_numeric(X_test_for_fit["session_id"], errors="coerce")
if train_sid_num.notna().all() and test_sid_num.notna().all():
    test_session_offset = int(train_sid_num.max()) + 1
    X_test_for_fit["session_id"] = (
        (test_sid_num + test_session_offset).astype(np.int64).astype(str)
    )
    print(
        f"Applied test session_id offset for final retraining: +{test_session_offset}"
    )
else:
    X_test_for_fit["session_id"] = "test_" + X_test_for_fit["session_id"].astype(str)
    print("Applied 'test_' session_id prefix for final retraining.")

X_full = pd.concat(
    [
        X_train[["session_id", "job_id", "action"]],
        y_train[["session_id", "job_id", "action"]],
        X_test_for_fit[["session_id", "job_id", "action"]],
    ],
    ignore_index=True,
)  # for final retraining before inference

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"X_full shape: {X_full.shape}")
print(f"job_listings size: {len(job_listings)}")

Applied test session_id offset for final retraining: +15882
X_train shape: (119747, 3)
y_train shape: (15882, 3)
X_test shape: (13846, 3)
X_full shape: (149475, 3)
job_listings size: 21917


## Ranking


In [3]:
MODEL_REGISTRY = load_available_models(MODEL_SPECS)
if not MODEL_REGISTRY:
    raise RuntimeError(
        "No implemented models are available. Implement at least one model before ranking."
    )

print("Active ranking models:", sorted(MODEL_REGISTRY.keys()))

NON_TUNABLE_MODELS = {"session_gnn"} & set(MODEL_REGISTRY.keys())
all_job_ids = sorted(X_full["job_id"].astype(str).unique().tolist())

session_ids = y_train["session_id"].drop_duplicates().values
rank_train_sessions, rank_valid_sessions = train_test_split(
    session_ids,
    test_size=VALID_SIZE,
    random_state=RANDOM_STATE,
    shuffle=True,
)

X_rank_train = X_train[X_train["session_id"].isin(rank_train_sessions)].copy()
X_rank_valid = X_train[X_train["session_id"].isin(rank_valid_sessions)].copy()
y_rank_train = y_train[y_train["session_id"].isin(rank_train_sessions)].copy()
y_rank_valid = y_train[y_train["session_id"].isin(rank_valid_sessions)].copy()

pop_train = X_rank_train["job_id"].value_counts().index.tolist()
pop_train_set = set(pop_train)
pop_fallback = pop_train + [j for j in all_job_ids if j not in pop_train_set]
candidate_job_ids = pop_fallback

empty_session_frame = pd.DataFrame(columns=["session_id", "job_id", "action"])


_MODEL_LOG_ONCE = set()


def log_model_event(model_name: str, stage: str, once: bool = False) -> None:
    key = (str(model_name), str(stage))
    if once and key in _MODEL_LOG_ONCE:
        return
    print(f"[{model_name}] {stage}...")
    if once:
        _MODEL_LOG_ONCE.add(key)


def normalize_weights(raw: Dict[str, float]) -> Dict[str, float]:
    if not raw:
        return {}

    clipped = {k: max(float(v), 0.0) for k, v in raw.items()}
    total = sum(clipped.values())
    if total == 0:
        uniform = 1.0 / max(len(clipped), 1)
        return {k: uniform for k in clipped}
    return {k: v / total for k, v in clipped.items()}


def model_cache_path(model_name: str, params: Dict) -> str:
    key = hashlib.md5(json.dumps(params, sort_keys=True).encode("utf-8")).hexdigest()
    return os.path.join(CACHE_DIR, f"{model_name}_{key}.pkl")


def train_or_load_model(
    model_name: str,
    model_cls,
    params: Dict,
    interactions: pd.DataFrame,
    targets: pd.DataFrame | None = None,
    val_interactions: pd.DataFrame | None = None,
    val_targets: pd.DataFrame | None = None,
    use_cache: bool = True,
):
    cache_path = model_cache_path(model_name, params)
    if use_cache and os.path.exists(cache_path):
        try:
            with open(cache_path, "rb") as f:
                return pickle.load(f)
        except Exception as exc:
            warnings.warn(
                f"Failed to load cache for model '{model_name}', retraining. Error: {exc}",
                RuntimeWarning,
            )

    try:
        model = model_cls(params=params)
        log_model_event(model_name, "Fitting")
        model.fit(
            interactions,
            targets=targets,
            val_interactions=val_interactions,
            val_targets=val_targets,
            job_listings=job_listings,
            all_job_ids=all_job_ids,
        )
    except NotImplementedError as exc:
        warnings.warn(f"Skipping model '{model_name}': {exc}", RuntimeWarning)
        return None
    except Exception as exc:
        warnings.warn(
            f"Skipping model '{model_name}' because fit failed: {exc}",
            RuntimeWarning,
        )
        return None

    if not getattr(model, "is_fitted", True):
        warnings.warn(
            f"Skipping model '{model_name}' because fit did not complete successfully.",
            RuntimeWarning,
        )
        return None

    if use_cache:
        try:
            tmp_path = f"{cache_path}.tmp.{os.getpid()}.{np.random.randint(1_000_000)}"
            with open(tmp_path, "wb") as f:
                pickle.dump(model, f)
            os.replace(tmp_path, cache_path)
        except Exception as exc:
            warnings.warn(
                f"Could not save cache for model '{model_name}': {exc}",
                RuntimeWarning,
            )

    return model


def normalize_model_scores_vector(
    model_scores: Dict[str, float],
    candidates: List[str],
    mode: str,
) -> np.ndarray:
    mode = str(mode).lower()
    if mode not in {"none", "minmax", "rank"}:
        raise ValueError(
            f"Unknown SCORE_NORMALIZATION={mode!r}. Expected one of ['none', 'minmax', 'rank']."
        )

    values = np.zeros(len(candidates), dtype=np.float64)
    for idx, jid in enumerate(candidates):
        try:
            values[idx] = float(model_scores.get(jid, 0.0))
        except (TypeError, ValueError):
            values[idx] = 0.0

    values = np.where(np.isfinite(values), values, 0.0)

    if mode == "minmax":
        if values.size > 0:
            min_v = float(values.min())
            max_v = float(values.max())
            if max_v - min_v > 1e-12:
                values = (values - min_v) / (max_v - min_v)
            else:
                values = np.zeros_like(values)
    elif mode == "rank":
        if values.size > 1:
            order = np.lexsort((np.arange(values.size), -values))
            ranked = np.zeros_like(values)
            ranked[order] = np.linspace(1.0, 0.0, num=values.size, dtype=np.float64)
            values = ranked
        elif values.size == 1:
            values = np.ones_like(values)

    return values.astype(np.float32, copy=False)


def normalize_model_scores(
    model_scores: Dict[str, float],
    candidates: List[str],
    mode: str,
) -> Dict[str, float]:
    values = normalize_model_scores_vector(
        model_scores=model_scores,
        candidates=candidates,
        mode=mode,
    )
    return {jid: float(values[idx]) for idx, jid in enumerate(candidates)}


def combined_scores(
    session_history: pd.DataFrame,
    models: Dict[str, object],
    weights: Dict[str, float],
    candidates: List[str],
) -> Dict[str, float]:
    scores = {jid: 0.0 for jid in candidates}
    blend_mode = str(SCORE_NORMALIZATION).lower()
    if blend_mode not in {"none", "minmax", "rank"}:
        raise ValueError(
            f"Unknown SCORE_NORMALIZATION={blend_mode!r}. Expected one of ['none', 'minmax', 'rank']."
        )

    for model_name, model in models.items():
        w = float(weights.get(model_name, 0.0))
        if w <= 0:
            continue

        try:
            log_model_event(model_name, "Predicting", once=True)
            raw_model_scores = model.predict(session_history, candidates) or {}
            model_scores = {str(jid): score for jid, score in raw_model_scores.items()}
        except Exception as exc:
            warnings.warn(
                f"Model '{model_name}' predict failed and will be ignored for this request: {exc}",
                RuntimeWarning,
            )
            continue

        normalized_scores = normalize_model_scores(
            model_scores=model_scores,
            candidates=candidates,
            mode=blend_mode,
        )

        for jid, score in normalized_scores.items():
            scores[jid] += w * float(score)

    return scores


def recommend_top_k(
    session_history: pd.DataFrame,
    models: Dict[str, object],
    weights: Dict[str, float],
    candidates: List[str],
    k: int = 10,
) -> List[str]:
    seen = (
        set(session_history["job_id"].astype(str).tolist())
        if not session_history.empty
        else set()
    )
    scores = combined_scores(session_history, models, weights, candidates)

    ranked = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)
    recs = [jid for jid, _ in ranked if jid not in seen][:k]

    if len(recs) < k:
        recs.extend(
            [jid for jid in candidates if jid not in seen and jid not in recs][
                : k - len(recs)
            ]
        )
    if len(recs) < k:
        recs.extend([jid for jid in all_job_ids if jid not in recs][: k - len(recs)])

    return recs[:k]


def evaluate_ranking(
    models: Dict[str, object],
    weights: Dict[str, float],
    X_hist: pd.DataFrame,
    y_next: pd.DataFrame,
    candidates: List[str],
    k: int = 10,
):
    hist_map = {sid: grp for sid, grp in X_hist.groupby("session_id", sort=False)}
    y_true = []
    y_pred = []

    for row in y_next.itertuples(index=False):
        sid = str(row.session_id)
        history = hist_map.get(sid, empty_session_frame)
        y_true.append(str(row.job_id))
        y_pred.append(recommend_top_k(history, models, weights, candidates, k=k))

    return mrr_at_k(y_true, y_pred, k=k), y_true, y_pred


print(f"Ranking train interactions: {len(X_rank_train)}")
print(f"Ranking train sessions: {y_rank_train['session_id'].nunique()}")
print(f"Ranking valid sessions: {y_rank_valid['session_id'].nunique()}")
print(f"Candidate pool size: {len(candidate_job_ids)}")


Active ranking models: ['cbf', 'item_item', 'mf', 'popularity', 'session_gnn', 'user_user']
Ranking train interactions: 95459
Ranking train sessions: 12705
Ranking valid sessions: 3177
Candidate pool size: 21873


### Run tuning

In [4]:
default_params_by_model = {
    name: cls.default_params() for name, cls in MODEL_REGISTRY.items()
}


def _prepare_weights_only_eval_cache(
    models: Dict[str, object],
    X_hist: pd.DataFrame,
    y_next: pd.DataFrame,
    candidates: List[str],
):
    candidate_list = [str(jid) for jid in candidates]
    n_candidates = len(candidate_list)
    if n_candidates == 0:
        return {
            "candidate_list": [],
            "seen_indices": [],
            "true_indices": np.zeros(0, dtype=np.int32),
            "score_mats": {},
        }

    candidate_to_idx = {jid: idx for idx, jid in enumerate(candidate_list)}
    hist_map = {sid: grp for sid, grp in X_hist.groupby("session_id", sort=False)}
    rows = list(y_next.itertuples(index=False))

    session_histories: List[pd.DataFrame] = []
    seen_indices: List[np.ndarray] = []
    true_indices = np.full(len(rows), -1, dtype=np.int32)

    for row_idx, row in enumerate(rows):
        sid = str(row.session_id)
        history = hist_map.get(sid, empty_session_frame)
        session_histories.append(history)
        true_indices[row_idx] = int(candidate_to_idx.get(str(row.job_id), -1))

        if history.empty:
            seen_indices.append(np.empty(0, dtype=np.int32))
            continue

        seen_idx = []
        for jid in history["job_id"].astype(str).tolist():
            idx = candidate_to_idx.get(jid)
            if idx is not None:
                seen_idx.append(int(idx))

        if seen_idx:
            seen_indices.append(np.asarray(sorted(set(seen_idx)), dtype=np.int32))
        else:
            seen_indices.append(np.empty(0, dtype=np.int32))

    blend_mode = str(SCORE_NORMALIZATION).lower()
    approx_gb = (
        len(rows) * n_candidates * max(len(models), 1) * np.dtype(np.float32).itemsize
    ) / (1024**3)
    print(
        f"[weights_tuning_only] score cache footprint (float32, raw): ~{approx_gb:.2f} GB"
    )
    score_mats: Dict[str, np.ndarray] = {}
    for model_name, model in models.items():
        print(
            f"[weights_tuning_only] caching {model_name}: sessions={len(rows)} x candidates={n_candidates}"
        )
        mat = np.zeros((len(rows), n_candidates), dtype=np.float32)
        log_model_event(model_name, "Predicting", once=True)
        for row_idx, history in enumerate(session_histories):
            try:
                raw_model_scores = model.predict(history, candidate_list) or {}
                model_scores = {
                    str(jid): score for jid, score in raw_model_scores.items()
                }
            except Exception as exc:
                warnings.warn(
                    f"Model '{model_name}' predict failed during cache build: {exc}",
                    RuntimeWarning,
                )
                model_scores = {}

            mat[row_idx, :] = normalize_model_scores_vector(
                model_scores=model_scores,
                candidates=candidate_list,
                mode=blend_mode,
            )

        score_mats[model_name] = mat

    return {
        "candidate_list": candidate_list,
        "seen_indices": seen_indices,
        "true_indices": true_indices,
        "score_mats": score_mats,
    }


def evaluate_ranking_with_cached_scores(
    eval_cache: Dict[str, object],
    weights: Dict[str, float],
    k: int = 10,
    return_predictions: bool = True,
):
    candidate_list = eval_cache["candidate_list"]
    seen_indices = eval_cache["seen_indices"]
    true_indices = eval_cache["true_indices"]
    score_mats = eval_cache["score_mats"]

    if len(candidate_list) == 0 or not score_mats:
        return 0.0, [], []

    n_candidates = len(candidate_list)
    n_sessions = len(true_indices)
    k_eff = int(max(1, min(k, n_candidates)))
    reciprocal_ranks: List[float] = []
    y_true: List[str] = [] if return_predictions else []
    y_pred: List[List[str]] = [] if return_predictions else []

    for row_idx in range(n_sessions):
        scores = np.zeros(n_candidates, dtype=np.float32)
        for model_name, mat in score_mats.items():
            w = float(weights.get(model_name, 0.0))
            if w <= 0:
                continue
            scores += w * mat[row_idx, :]

        blocked = seen_indices[row_idx]
        if blocked.size > 0:
            scores[blocked] = -np.inf

        ranked_idx = np.argpartition(scores, -k_eff)[-k_eff:]
        ranked_idx = ranked_idx[np.argsort(scores[ranked_idx])[::-1]]

        true_idx = int(true_indices[row_idx])
        if true_idx >= 0:
            hit = np.where(ranked_idx == true_idx)[0]
            reciprocal_ranks.append(1.0 / float(hit[0] + 1) if hit.size > 0 else 0.0)
        else:
            reciprocal_ranks.append(0.0)

        if return_predictions:
            y_true.append(
                candidate_list[true_idx] if 0 <= true_idx < n_candidates else ""
            )
            y_pred.append([candidate_list[int(idx)] for idx in ranked_idx.tolist()])

    val_mrr = float(np.mean(reciprocal_ranks)) if reciprocal_ranks else 0.0
    return val_mrr, y_true, y_pred


if MODE == "weights_tuning_only":
    trained_models_fixed = {}
    for name, params in default_params_by_model.items():
        trained = train_or_load_model(
            model_name=name,
            model_cls=MODEL_REGISTRY[name],
            params=params,
            interactions=X_rank_train,
            targets=y_rank_train,
            val_interactions=X_rank_valid,
            val_targets=y_rank_valid,
            use_cache=True,
        )
        if trained is not None:
            trained_models_fixed[name] = trained

    if not trained_models_fixed:
        raise RuntimeError("No trainable models available after filtering/skipping.")
    search_model_names = list(trained_models_fixed.keys())

    print("Preparing cached validation scores for weights-only tuning...")
    weights_only_eval_cache = _prepare_weights_only_eval_cache(
        models=trained_models_fixed,
        X_hist=X_rank_valid,
        y_next=y_rank_valid,
        candidates=candidate_job_ids,
    )
    print("Cached validation scores ready.")
else:
    trained_models_fixed = None
    search_model_names = list(MODEL_REGISTRY.keys())
    weights_only_eval_cache = None


def objective(trial):
    if MODE == "weights_tuning_only":
        models = trained_models_fixed
        params_by_model = {name: default_params_by_model[name] for name in models}
    else:
        models = {}
        params_by_model = {}
        for model_name, model_cls in MODEL_REGISTRY.items():
            if model_name in NON_TUNABLE_MODELS:
                params = model_cls.default_params()
            else:
                params = model_cls.suggest_params(trial)

            trained = train_or_load_model(
                model_name=model_name,
                model_cls=model_cls,
                params=params,
                interactions=X_rank_train,
                targets=y_rank_train,
                val_interactions=X_rank_valid,
                val_targets=y_rank_valid,
                use_cache=True,
            )
            if trained is None:
                continue

            params_by_model[model_name] = params
            models[model_name] = trained

    if not models:
        return 0.0

    raw_weights = {
        name: trial.suggest_float(f"w_{name}", 0.0, 1.0) for name in search_model_names
    }
    weights = normalize_weights({name: raw_weights.get(name, 0.0) for name in models})

    if MODE == "weights_tuning_only" and weights_only_eval_cache is not None:
        val_mrr, _, _ = evaluate_ranking_with_cached_scores(
            eval_cache=weights_only_eval_cache,
            weights=weights,
            k=TOP_K,
            return_predictions=False,
        )
    else:
        val_mrr, _, _ = evaluate_ranking(
            models=models,
            weights=weights,
            X_hist=X_rank_valid,
            y_next=y_rank_valid,
            candidates=candidate_job_ids,
            k=TOP_K,
        )

    trial.set_user_attr("weights", weights)
    trial.set_user_attr("model_params", params_by_model)
    return val_mrr


if optuna is None:
    print("Optuna is not installed, skipping optimization and using uniform weights.")
    if MODE == "weights_tuning_only":
        active_names = list(trained_models_fixed.keys())
    else:
        active_names = list(MODEL_REGISTRY.keys())

    best_weights = normalize_weights({name: 1.0 for name in active_names})
    best_model_params = {name: default_params_by_model[name] for name in active_names}
else:
    sampler = optuna.samplers.TPESampler(seed=RANDOM_STATE)

    if MODE == "full_hyperparameter_tuning":
        storage_url = f"sqlite:///{OPTUNA_STORAGE_PATH}"
        study = optuna.create_study(
            direction="maximize",
            sampler=sampler,
            study_name=OPTUNA_FULL_STUDY_NAME,
            storage=storage_url,
            load_if_exists=True,
        )
        completed_trials = len(study.trials)
        remaining_trials = max(0, N_TRIALS - completed_trials)
        print(
            f"Resuming full tuning study '{OPTUNA_FULL_STUDY_NAME}' "
            f"({completed_trials}/{N_TRIALS} trials done, {remaining_trials} remaining)."
        )
        if remaining_trials > 0:
            study.optimize(
                objective,
                n_trials=remaining_trials,
                n_jobs=min(CORES, remaining_trials),
                show_progress_bar=True,
            )
        else:
            print("Target trial count already reached; skipping new optimization trials.")
    else:
        study = optuna.create_study(direction="maximize", sampler=sampler)
        study.optimize(
            objective,
            n_trials=N_TRIALS,
            n_jobs=min(CORES, N_TRIALS),
            show_progress_bar=True,
        )

    best_trial = study.best_trial
    best_weights = best_trial.user_attrs["weights"]
    best_model_params = best_trial.user_attrs["model_params"]

    print(f"Best validation MRR@{TOP_K}: {best_trial.value:.6f}")
    print("Best weights:", best_weights)


best_ranking_models = {}
for model_name, params in best_model_params.items():
    if model_name not in MODEL_REGISTRY:
        continue

    trained = train_or_load_model(
        model_name=model_name,
        model_cls=MODEL_REGISTRY[model_name],
        params=params,
        interactions=X_rank_train,
        targets=y_rank_train,
        val_interactions=X_rank_valid,
        val_targets=y_rank_valid,
        use_cache=True,
    )
    if trained is not None:
        best_ranking_models[model_name] = trained

if not best_ranking_models:
    raise RuntimeError("No ranking model is available for validation scoring.")

best_weights = normalize_weights(
    {name: best_weights.get(name, 0.0) for name in best_ranking_models}
)

if MODE == "weights_tuning_only" and weights_only_eval_cache is not None:
    val_mrr, val_job_true, val_job_pred = evaluate_ranking_with_cached_scores(
        eval_cache=weights_only_eval_cache,
        weights=best_weights,
        k=TOP_K,
        return_predictions=True,
    )
else:
    val_mrr, val_job_true, val_job_pred = evaluate_ranking(
        models=best_ranking_models,
        weights=best_weights,
        X_hist=X_rank_valid,
        y_next=y_rank_valid,
        candidates=candidate_job_ids,
        k=TOP_K,
    )

print(f"Validation MRR@{TOP_K}: {val_mrr:.6f}")
# previous best: 0.12

[32m[I 2026-02-24 16:02:56,901][0m A new study created in memory with name: no-name-37306b54-6d79-4d14-b337-fc18165d7298[0m


  0%|          | 0/300 [00:00<?, ?it/s]

[cbf] Fitting...
[cbf] Fitting...
[cbf] Fitting...
[cbf] Fitting...
[cbf] Fitting...
[cbf] Fitting...
[cbf] Fitting...
[cbf] Fitting...
[cbf] Fitting...
[cbf] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[mf] Fitting...
[mf] Fitting...
[mf] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[mf] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[mf] Fitting...
[mf] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...
[mf] Fitting...
[popularity] Fitting...
[popularity] Fitting...
[session_gnn] Fitting...
[session_gnn] Fitting...
[popularity] Fitting...
[session_gnn] Fitting...
[popularity] Fitting...
[popularity] Fitting...
[popularity] Fitting...
[session_gnn] Fitting...
[session_gnn] Fitting...
[session_gnn] Fitting...
[popularity] Fitting...
[popularity] Fitting...
[popularity] Fitting...
[popularity] Fitting...
[session_gnn] Fitting...
[session_gnn] Fitting...
[session_gnn] Fitting...
[session_gnn] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...
[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...
[user_user] Fitting...
[user_user] Fitting...
[user_user] Fitting...
[user_user] Fitting...
[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...
[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...
[item_item] Fitting...
[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...
[item_item] Fitting...
[item_item] Fitting...
[cbf] Predicting...
[mf] Predicting...
[popularity] Predicting...
[session_gnn] Predicting...
[user_user] Predicting...
[item_item] Predicting...
[32m[I 2026-02-24 18:35:09,780][0m Trial 2 finished with value: 0.10153321742084724 and parameters: {'w_view': 1.7713837300060584, 'w_apply': 2.773176026198449, 'recency_decay': 0.634440820804321, 'blend_alpha_lexical': 0.5417123538429048, 'last_item_boost': 1.340548164976539, 'lr_all': 0.013261836145939782, 'n_epochs': 80, 'n_factors': 64, 'query_recency_decay': 0.869160846390223, 'rating_clip_max': 2.2133469740220666, 'reg_all': 0.014738180254580083, 'sim_power_gamma': 1.7991725888293744, 'train_recency_decay': 0.9275058373940808, 'pop_recency_decay': 0.5395485417468442, 'pop_smooth_alpha': 1.7177454898976885, 'pop_w_apply': 6.813809403278494, 'pop_w_view': 1.2425847815575137, 'iuf_exponent': 1.4040352258640025, 'neighbor_item_recency_decay': 0.9303044935285025, 'neighbo

  optuna_warn(
  optuna_warn(


[mf] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...
[popularity] Fitting...
[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[mf] Fitting...
[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...
[32m[I 2026-02-24 20:54:50,672][0m Trial 11 finished with value: 0.09338312074383824 and parameters: {'w_view': 1.6226706792895005, 'w_apply': 2.849758437993871, 'recency_decay': 0.6399025176260726, 'blend_alpha_lexical': 0.5788190365922542, 'last_item_boost': 1.3012563409778752, 'lr_all': 0.013761343904895687, 'n_epochs': 79, 'n_factors': 64, 'query_recency_decay': 0.8612955054936987, 'rating_clip_max': 2.3771926344552354, 'reg_all': 0.02022700926954406, 'sim_power_gamma': 1.8697994443700237, 'train_recency_decay': 0.8371405069538831, 'pop_recency_decay': 0.5852992500120557, 'pop_smooth_alpha': 2.9333766661121294, 'pop_w_apply': 5.174903576984538, 'pop_w_view': 1.2256775213794127, 'iuf_exponent': 1.3962246253333572, 'neighbor_item_recency_decay': 0.9416160715098127, 'neighbor_length_norm': 0.23965052016804655, 'neighbor_position_boost': 1.3089330434806672, 'neighbor_target_boost': 1.296727700546369, 'top_sessions': 82, 'last_n': 11, 'shrinkage': 17, 'tversky_a

  optuna_warn(
  optuna_warn(


[mf] Fitting...
[32m[I 2026-02-24 20:59:32,063][0m Trial 10 finished with value: 0.09546292549125403 and parameters: {'w_view': 1.772539764706218, 'w_apply': 2.9120651485927374, 'recency_decay': 0.631604944838253, 'blend_alpha_lexical': 0.5525909820916293, 'last_item_boost': 1.3064169409956452, 'lr_all': 0.013645874783062971, 'n_epochs': 78, 'n_factors': 256, 'query_recency_decay': 0.8613781829076496, 'rating_clip_max': 2.2585325726614736, 'reg_all': 0.017227325912727798, 'sim_power_gamma': 1.8412879371779276, 'train_recency_decay': 0.943132664704766, 'pop_recency_decay': 0.5473696478046546, 'pop_smooth_alpha': 2.2839595416871092, 'pop_w_apply': 6.867907050433507, 'pop_w_view': 1.428231295367334, 'iuf_exponent': 1.4112197406323321, 'neighbor_item_recency_decay': 0.922474854196988, 'neighbor_length_norm': 0.21597523386224898, 'neighbor_position_boost': 1.3033963951802012, 'neighbor_target_boost': 1.3080485391976662, 'top_sessions': 85, 'last_n': 9, 'shrinkage': 17, 'tversky_alpha': 0.

  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...
[32m[I 2026-02-24 21:00:36,882][0m Trial 13 finished with value: 0.10783695809663701 and parameters: {'w_view': 1.7359369096988997, 'w_apply': 2.5553047370492257, 'recency_decay': 0.6202578441159414, 'blend_alpha_lexical': 0.578367255728302, 'last_item_boost': 1.3156378169449776, 'lr_all': 0.013384326908586815, 'n_epochs': 81, 'n_factors': 256, 'query_recency_decay': 0.871158240779321, 'rating_clip_max': 2.085760730040109, 'reg_all': 0.01668740126567695, 'sim_power_gamma': 1.7808337767597613, 'train_recency_decay': 0.9804519145366444, 'pop_recency_decay': 0.5608782886667274, 'pop_smooth_alpha': 2.5765604941575075, 'pop_w_apply': 6.303399668291305, 'pop_w_view': 1.0845291572228903, 'iuf_exponent': 1.4017223563799655, 'neighbor_item_recency_decay': 0.9306427625416033, 'neighbor_length_norm': 0.21871292186188263, 'neighbor_position_boost': 1.3137056853248519, 'neighbor_target_boost': 1.2894980434578966, 'top_sessions': 81, 'last_n': 12, 'shrinkage': 16, 'tversky_a

  optuna_warn(
  optuna_warn(


[mf] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...
[popularity] Fitting...
[popularity] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...
[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...
[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[mf] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(


[mf] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...
[popularity] Fitting...
[mf] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...
[popularity] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[user_user] Fitting...
[item_item] Fitting...
[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...


  optuna_warn(
  optuna_warn(
  optuna_warn(
  optuna_warn(


[item_item] Fitting...
[32m[I 2026-02-24 23:59:27,880][0m Trial 20 finished with value: 0.10414499552837596 and parameters: {'w_view': 1.6831656047611496, 'w_apply': 2.70908144600927, 'recency_decay': 0.6337965062157741, 'blend_alpha_lexical': 0.5404495815645513, 'last_item_boost': 1.271858126473142, 'lr_all': 0.013202144679958039, 'n_epochs': 81, 'n_factors': 64, 'query_recency_decay': 0.8732529907756892, 'rating_clip_max': 2.3160628759894237, 'reg_all': 0.015823804500183794, 'sim_power_gamma': 1.8107812043931983, 'train_recency_decay': 0.9760636973456998, 'pop_recency_decay': 0.5392429544974319, 'pop_smooth_alpha': 2.5205250242086663, 'pop_w_apply': 6.312962757500674, 'pop_w_view': 1.0895516370389065, 'iuf_exponent': 1.4113747820244997, 'neighbor_item_recency_decay': 0.9212658296624936, 'neighbor_length_norm': 0.21399752234155694, 'neighbor_position_boost': 1.3012840769169336, 'neighbor_target_boost': 1.3090329246789716, 'top_sessions': 85, 'last_n': 10, 'shrinkage': 16, 'tversky_a

: 

: 

### MRR/Weight breakdown

In [None]:
individual_val_mrr = {}
if MODE == "weights_tuning_only" and weights_only_eval_cache is not None:
    for model_name in best_ranking_models:
        model_mrr, _, _ = evaluate_ranking_with_cached_scores(
            eval_cache=weights_only_eval_cache,
            weights={model_name: 1.0},
            k=TOP_K,
            return_predictions=False,
        )
        individual_val_mrr[model_name] = model_mrr
else:
    for model_name, model in best_ranking_models.items():
        model_mrr, _, _ = evaluate_ranking(
            models={model_name: model},
            weights={model_name: 1.0},
            X_hist=X_rank_valid,
            y_next=y_rank_valid,
            candidates=candidate_job_ids,
            k=TOP_K,
        )
        individual_val_mrr[model_name] = model_mrr

individual_mrr_df = (
    pd.DataFrame(
        [
            {
                "model": name,
                f"mrr@{TOP_K}": score,
                "ensemble_weight": best_weights.get(name, 0.0),
                "final_params": best_model_params.get(name, {}),
            }
            for name, score in individual_val_mrr.items()
        ]
    )
    .sort_values(by=f"mrr@{TOP_K}", ascending=False)
    .reset_index(drop=True)
)

display(individual_mrr_df)


Unnamed: 0,model,mrr@10,ensemble_weight,final_params
0,user_user,0.25941,0.537188,"{'variant': 'U2U_EXACT_IUF_COSINE', 'iuf_expon..."
1,item_item,0.235247,0.000528,"{'variant': 'II_TVERSKY', 'last_n': 10, 'neigh..."
2,session_gnn,0.210105,0.46178,"{'model_version': 2, 'seed': 42, 'device': 'au..."
3,mf,0.076471,5.9e-05,"{'variant': 'MF_IMPLICIT_BPR', 'last_item_boos..."
4,cbf,0.031889,0.000185,"{'variant': 'LEXICAL_DENSE_BLEND', 'preprocess..."
5,popularity,0.002976,0.00026,"{'variant': 'POP_SESSION_WEIGHTED', 'include_t..."


## Action prediction


In [None]:
class LastActionPredictor:
    ACTION_TO_INT = {"view": 0, "apply": 1}
    INT_TO_ACTION = {0: "view", 1: "apply"}

    def fit(self, interactions: pd.DataFrame):
        print(f"[ActionPredictor] fitting job with {len(interactions)} interactions")
        return self

    def predict_binary(
        self, interactions: pd.DataFrame, session_ids: List[str]
    ) -> List[int]:
        last_action = (
            interactions.groupby("session_id", sort=False)["action"]
            .last()
            .astype(str)
            .to_dict()
        )
        labels = [last_action.get(str(sid), "view") for sid in session_ids]
        return [self.ACTION_TO_INT.get(label, 0) for label in labels]

    def predict(self, interactions: pd.DataFrame, session_ids: List[str]) -> List[str]:
        binary = self.predict_binary(interactions, session_ids)
        return [self.INT_TO_ACTION.get(v, "view") for v in binary]


print("[LastActionPredictor] Fitting...")
action_model = LastActionPredictor().fit(X_rank_train)

valid_session_ids = y_rank_valid["session_id"].astype(str).tolist()
val_action_true_bin = [
    LastActionPredictor.ACTION_TO_INT.get(v, 0)
    for v in y_rank_valid["action"].astype(str).tolist()
]
print("[LastActionPredictor] Predicting...")
val_action_pred_bin = action_model.predict_binary(X_rank_valid, valid_session_ids)

val_acc = accuracy(val_action_true_bin, val_action_pred_bin)
print(f"Validation ACC: {val_acc:.6f}")


[LastActionPredictor] Fitting...
[ActionPredictor] fitting job with 83682 interactions
[LastActionPredictor] Predicting...
Validation ACC: 0.835467


## Score evaluation

In [None]:
val_score = challenge_score(val_mrr, val_acc)
print(f"Challenge score (0.7 * MRR + 0.3 * ACC): {val_score:.6f}")

Challenge score (0.7 * MRR + 0.3 * ACC): 0.441920


## Submission


In [None]:
full_pop = X_full["job_id"].value_counts().index.tolist()
full_pop_set = set(full_pop)
full_candidates = full_pop + [jid for jid in all_job_ids if jid not in full_pop_set]

# Final retraining on X_full
final_models = {}
for model_name, params in best_model_params.items():
    if model_name not in MODEL_REGISTRY:
        continue

    model_cls = MODEL_REGISTRY[model_name]
    try:
        model = model_cls(params=params)
        print(f"[{model_name}] Fitting...")
        model.fit(
            X_full, targets=None, job_listings=job_listings, all_job_ids=all_job_ids
        )
    except Exception as exc:
        warnings.warn(
            f"Skipping final model '{model_name}' because fit failed: {exc}",
            RuntimeWarning,
        )
        continue

    if not getattr(model, "is_fitted", True):
        warnings.warn(
            f"Skipping final model '{model_name}' because fit did not complete successfully.",
            RuntimeWarning,
        )
        continue

    final_models[model_name] = model

if not final_models:
    raise RuntimeError("No ranking model available for final inference.")

final_weights = normalize_weights(
    {name: best_weights.get(name, 0.0) for name in final_models}
)

print("[LastActionPredictor] Fitting...")
final_action_model = LastActionPredictor().fit(X_full)

test_session_ids = X_test_raw["session_id"].astype(str).tolist()
test_hist_map = {sid: grp for sid, grp in X_test.groupby("session_id", sort=False)}

test_job_preds = []


def coerce_job_id(value):
    try:
        return int(value)
    except (TypeError, ValueError):
        return value


for sid in test_session_ids:
    history = test_hist_map.get(sid, empty_session_frame)
    recs = recommend_top_k(
        session_history=history,
        models=final_models,
        weights=final_weights,
        candidates=full_candidates,
        k=TOP_K,
    )
    test_job_preds.append([coerce_job_id(jid) for jid in recs])

print("[LastActionPredictor] Predicting...")
test_action_preds = final_action_model.predict(X_test, test_session_ids)

submission = pd.DataFrame(
    {
        "session_id": test_session_ids,
        "action": test_action_preds,
        "job_id": test_job_preds,
    }
)

os.makedirs(os.path.dirname(SUBMISSION_PATH), exist_ok=True)
submission.to_csv(SUBMISSION_PATH, index=False)

print(f"Submission saved to: {SUBMISSION_PATH}")
submission.head()

[cbf] Fitting...
[mf] Fitting...
[popularity] Fitting...
[session_gnn] Fitting...
[user_user] Fitting...
[item_item] Fitting...
[LastActionPredictor] Fitting...
[ActionPredictor] fitting job with 149475 interactions
[LastActionPredictor] Predicting...
Submission saved to: /Users/Administrateur/School/X/ENS Data Competition/submission.csv


Unnamed: 0,session_id,action,job_id
0,0,view,"[3259, 1746, 2179, 2239, 2908, 2962, 1921, 384..."
1,1,apply,"[1382, 2959, 357, 2910, 3191, 1746, 974, 3418,..."
2,2,view,"[1367, 1377, 925, 777, 2957, 2359, 4547, 2347,..."
3,3,view,"[2580, 1137, 1093, 1, 1148, 2449, 2380, 3709, ..."
4,4,view,"[214, 912, 2104, 1247, 455, 1116, 1237, 445, 5..."
