In [1]:
import sys
import os

sys.path.append(os.path.abspath('..'))

import yaml
import pandas as pd
import joblib

from src.data.load import load_data
from src.data.prepare import prepare_data
from src.features.features import feature_engineering
from src.features.utils import build_rank_input
from src.models.ranker import Ranker
from src.models.retrieval import Retrieval
from src.models.co_visit import CoVisit
from src.models.baseline import popular_items
from src.models.utils import set_global_seed

**Config & Data Preparation**

- Read project configurations and settings
- Load and transform the 3 datasets

In [2]:
# read config
with open('../main/config.yml', 'r') as file:
    config=yaml.load(file, Loader= yaml.SafeLoader)
del file

# ensure reproducibility
set_global_seed(seed=config["general"]["seed"])

In [3]:
# load and prepare data
config["data_loader"]["path"] = "../" + config["data_loader"]["path"]
dfs = load_data(config=config['data_loader'])
dfs = prepare_data(dataframes=dfs, config=config["data_preparation"])

**Inference**

- Load models and get candidates
- Rank candidates

In [4]:
candidates = CoVisit(methods=["directional"], k=50).fit(ui_matrix=dfs["data"])
candidates = candidates.rename(columns={"score": "rating"})
candidates["rating"] = candidates["rating"].round()

In [5]:
user_item_features = feature_engineering(dataframes=dfs)
df = build_rank_input(ratings=candidates, features=user_item_features)

In [6]:
for algorithm in config["train"]["ranker"].keys():

    # load model and get candidates
    clf = joblib.load(f'../main/artifacts/{algorithm}.joblib')
    clf.predict(X=df["X"])