In [5]:
import json
from pathlib import Path
import pandas as pd
import numpy as np


# ------------ Параметры -------------
LAST_K = 5  # последних товаров для similarity
N_ALS = 100
N_SIM = 50
N_POP = 50
EVENT_WEIGHTS = {"transaction": 5, "addtocart": 3, "view": 1}

data_dir = Path("data")
ALS_dir = Path("ALS_assets")
range_features_dir = Path("range_features")

range_features_dir.mkdir(exist_ok=True, parents=True)

In [6]:
events = pd.read_csv(data_dir / "events.csv")

In [7]:
def preprocess_events(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["ts_event"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
    df = df.sort_values(["visitorid", "ts_event"]).reset_index(drop=True)
    df = df.groupby(["visitorid", "ts_event", "itemid", "event"], as_index=False).tail(
        1
    )
    return df


events = preprocess_events(events)

In [8]:
def make_sessions(
    df: pd.DataFrame,
    inactivity_minutes: int = 30,
    reset_on_day_change: bool = False,
) -> pd.DataFrame:
    df = df.sort_values(["visitorid", "ts_event"]).reset_index(drop=True)
    df["prev_ts"] = df.groupby("visitorid")["ts_event"].shift(1)
    df["gap_sec"] = (df["ts_event"] - df["prev_ts"]).dt.total_seconds()
    # Новая сессия: первая строка пользователя или пауза > inactivity
    new_sess = df["gap_sec"].isna() | (df["gap_sec"] > inactivity_minutes * 60)

    if reset_on_day_change:
        prev_day = df.groupby("visitorid")["ts_event"].shift(1).dt.date
        new_sess = new_sess | (df["ts_event"].dt.date != prev_day)

    # Порядковый номер сессии внутри пользователя
    df["session_order"] = new_sess.groupby(df["visitorid"]).cumsum().astype(int)
    df["session_id"] = (
        df["visitorid"].astype(str) + "_" + df["session_order"].astype(str)
    )
    return df


def sessions_agg(df: pd.DataFrame) -> pd.DataFrame:
    sess = (
        df.groupby(["visitorid", "session_order", "session_id"])
        .agg(
            session_start=("ts_event", "min"),
            session_end=("ts_event", "max"),
            n_events=("ts_event", "count"),
            n_items=("itemid", pd.Series.nunique),
        )
        .reset_index()
    )
    # Распределение типов событий по сессии (если есть колонка event)
    if "event" in df.columns:
        ev_cnt = df.groupby(["session_id", "event"]).size().unstack(fill_value=0)
        ev_cnt = ev_cnt.add_prefix("cnt_event_").reset_index()
        sess = sess.merge(ev_cnt, on="session_id", how="left")
    sess["duration_sec"] = (
        sess["session_end"] - sess["session_start"]
    ).dt.total_seconds()
    return sess


events_with_sessions = make_sessions(
    events, inactivity_minutes=30, reset_on_day_change=False
)

sessions_all = sessions_agg(events_with_sessions)

In [9]:
def split_sessions_by_date(
    events_sess: pd.DataFrame, split_date_str: str = "2015-08-29"
) -> tuple[
    pd.DataFrame,
    pd.DataFrame,
    pd.DataFrame,
    pd.DataFrame,
]:
    split_dt = pd.Timestamp(split_date_str, tz="UTC")
    print(f"Дата сплита: {split_dt}")

    df = events_sess.copy()
    # Сессии, пересекающие границу
    s_bounds = (
        df.groupby("session_id")["ts_event"].agg(start="min", end="max").reset_index()
    )
    cross = s_bounds[(s_bounds["start"] < split_dt) & (s_bounds["end"] >= split_dt)][
        "session_id"
    ]
    print(f"Сессий пересекающих границу: {len(cross)}")

    left_mask = df["ts_event"] < split_dt
    right_mask = ~left_mask

    # Делим такие сессии на две (_L/_R), затем перенумеровываем порядок у пользователя
    df["session_id_adj"] = df["session_id"]
    df.loc[df["session_id"].isin(cross) & left_mask, "session_id_adj"] = (
        df["session_id"] + "_L"
    )
    df.loc[df["session_id"].isin(cross) & right_mask, "session_id_adj"] = (
        df["session_id"] + "_R"
    )

    df = df.sort_values(["visitorid", "ts_event"]).reset_index(drop=True)

    df["session_order_adj"] = (
        df.groupby("visitorid")["session_id_adj"]
        .transform(lambda s: s.ne(s.shift()).cumsum())
        .astype(int)
    )

    # Создаем новые session_id на основе скорректированного порядка
    df["session_id"] = (
        df["visitorid"].astype(str) + "_" + df["session_order_adj"].astype(str)
    )

    train_ev = df[df["ts_event"] < split_dt].copy()
    test_ev = df[df["ts_event"] >= split_dt].copy()

    # Пересоберём агрегаты по сессиям в каждой части
    sess_train = sessions_agg(train_ev)
    sess_test = sessions_agg(test_ev)
    return train_ev, test_ev, sess_train, sess_test


train_ev, test_ev, sessions_train, sessions_test = split_sessions_by_date(
    events_with_sessions, split_date_str="2015-08-29"
)

Дата сплита: 2015-08-29 00:00:00+00:00
Сессий пересекающих границу: 18


In [10]:
def ensure_event_weight(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["event_weight"] = df["event"].map(EVENT_WEIGHTS).fillna(0).astype(np.float32)
    return df


def build_next_session_targets(ev_with_sessions: pd.DataFrame) -> pd.DataFrame:
    sess_ord = ev_with_sessions[
        ["visitorid", "session_id", "session_order_adj"]
    ].drop_duplicates()
    sess_ord = sess_ord.sort_values(["visitorid", "session_order_adj"])
    sess_ord["next_session_id"] = sess_ord.groupby("visitorid")["session_id"].shift(-1)
    pairs = sess_ord.dropna(subset=["next_session_id"]).copy()
    pairs["next_session_id"] = pairs["next_session_id"].astype(str)

    next_ev = ev_with_sessions[
        ["visitorid", "session_id", "itemid", "event_weight"]
    ].copy()
    next_ev = next_ev.rename(columns={"session_id": "next_session_id"})
    targets = pairs.merge(next_ev, on=["visitorid", "next_session_id"], how="left")
    targets = targets.groupby(
        ["visitorid", "session_id", "itemid"], as_index=False
    ).agg(gain=("event_weight", "max"))
    targets = targets.rename(columns={"session_id": "anchor_session_id"})
    return targets


train_ev = ensure_event_weight(train_ev)
test_ev = ensure_event_weight(test_ev)

targets_train = build_next_session_targets(train_ev)
targets_test = build_next_session_targets(test_ev)

In [11]:
def load_mappings(pth: Path) -> tuple[dict, dict]:
    with open(pth / "hash_visitoridx_train.json") as f:
        idx2user = {int(float(k)): int(float(v)) for k, v in json.load(f).items()}
    with open(pth / "hash_itemidx_train.json") as f:
        idx2item = {int(float(k)): int(float(v)) for k, v in json.load(f).items()}
    return idx2user, idx2item


def load_als_and_sim(idx2user: dict, idx2item: dict, pth_in: Path) -> tuple[dict, dict]:
    als_recs = pd.read_parquet(pth_in / "als_recommendations.parquet")
    als_recs["visitorid"] = als_recs["visitoridx"].map(idx2user)
    als_recs["itemid"] = als_recs["itemidx"].map(idx2item)
    als_recs = als_recs.dropna(subset=["visitorid", "itemid"])
    als_recs.rename(columns={"rating": "als_score"}, inplace=True)

    als_user_lookup = {
        uid: dict(zip(df_u["itemid"], df_u["als_score"]))
        for uid, df_u in als_recs.groupby("visitorid")
    }

    sim_df = pd.read_parquet(pth_in / "similar_items_df.parquet")

    sim_df["itemid"] = sim_df["items_idx"].map(idx2item)
    sim_df["sim_itemid"] = sim_df["sim_item_id_idx"].map(idx2item)
    sim_df = sim_df.dropna(subset=["itemid", "sim_itemid"])
    sim_df = sim_df[sim_df["itemid"] != sim_df["sim_itemid"]]

    sim_index = {
        iid: list(zip(g["sim_itemid"], g["score"]))
        for iid, g in sim_df.groupby("itemid")
    }

    return als_user_lookup, sim_index


def build_popularity_by_ev_weight(train_ev) -> tuple[dict, list]:
    pop = train_ev.groupby("itemid")["event_weight"].sum().sort_values(ascending=False)
    return pop.to_dict(), list(pop.index)

In [12]:
idx2user, idx2item = load_mappings(ALS_dir)

als_user_lookup, sim_index = load_als_and_sim(idx2user, idx2item, ALS_dir)

item_pop, popular_items = build_popularity_by_ev_weight(train_ev)

In [13]:
def load_item_props(pth: Path) -> pd.DataFrame:
    props = pd.read_parquet(pth / "item_properties_cats_tree.parquet")
    props["ts_prop"] = pd.to_datetime(props["timestamp"], unit="ms", utc=True)
    props = props.sort_values(["itemid", "ts_prop"])
    return props

In [14]:
item_propertys = load_item_props(data_dir)

In [15]:
def prepare_session_features(sessions_part: pd.DataFrame) -> pd.DataFrame:
    """Подготовка session features для join"""
    cols_map = {
        "session_id": "anchor_session_id",
        "n_events": "sess_n_events",
        "n_items": "sess_n_items",
        "duration_sec": "sess_duration",
        "cnt_event_view": "sess_cnt_view",
        "cnt_event_addtocart": "sess_cnt_addtocart",
        "cnt_event_transaction": "sess_cnt_transaction",
    }

    sess_cols = ["session_id"] + [
        c for c in cols_map.keys() if c in sessions_part.columns and c != "session_id"
    ]
    sess_feats = sessions_part[sess_cols].copy()
    sess_feats = sess_feats.rename(columns=cols_map)

    return sess_feats


def enrich_with_item_props_simple(X: pd.DataFrame, props: pd.DataFrame) -> pd.DataFrame:
    """
    Упрощенное обогащение: берем только последние известные свойства для каждого товара
    """
    if props is None or props.empty:
        print("    Свойства товаров отсутствуют")
        return X

    # Берем последние свойства для каждого товара
    props_clean = props.dropna(subset=["itemid"]).copy()
    if props_clean.empty:
        return X

    X["itemid"] = X["itemid"].astype(str)
    props_clean["itemid"] = props_clean["itemid"].astype(str)

    # Для каждого товара берем последние свойства
    if "ts_prop" in props_clean.columns:
        latest_props = (
            props_clean.sort_values("ts_prop")
            .groupby("itemid", as_index=False)
            .last()
            .drop(columns=["ts_prop"])
        )
    else:
        latest_props = props_clean.drop_duplicates("itemid", keep="last")

    X_enriched = X.merge(latest_props, on="itemid", how="left")

    print(
        f"обогащение свойставми item: {len(X_enriched):,} строк, добавлено {len(latest_props.columns) - 1} колонок свойств"
    )
    return X_enriched

In [16]:
import heapq


def build_dataset_for_range_model(
    events_part: pd.DataFrame,
    sessions_part: pd.DataFrame,
    targets_part: pd.DataFrame,
    tag: str,
    props: pd,
    LAST_K: int = 5,
    N_ALS: int = 100,
    N_SIM: int = 50,
    N_POP: int = 50,
    als_user_lookup: pd.DataFrame = None,
    sim_index: pd.DataFrame = None,
    popular_items: list = None,
    item_pop: dict = None,
) -> pd.DataFrame:
    print(f"Начинаем  сборку {tag} датасета...")

    # 0) Anchors с оптимизацией типов
    anchors = targets_part[["visitorid", "anchor_session_id"]].drop_duplicates()
    if anchors.empty:
        print(f"Пустой {tag}: нет anchors")
        return None

    anchor_sessions = set(anchors["anchor_session_id"].unique())

    # 1) Предрасчет с фильтрацией
    events_filtered = events_part[
        events_part["session_id"].isin(anchor_sessions)
    ].copy()
    if events_filtered.empty:
        print(f"Пустой {tag}: нет событий для anchors")
        return None

    # 1.1) векторно Anchor timestamps
    anchor_ts_df = (
        events_filtered.groupby("session_id")["ts_event"]
        .min()
        .rename("anchor_ts")
        .reset_index()
        .rename(columns={"session_id": "anchor_session_id"})
    )

    # 1.2) Последние K товаров
    last_items_list = []
    for session_id, group in events_filtered.groupby("session_id"):
        if len(group) <= LAST_K:
            items = group["itemid"].tolist()
        else:
            # Берем последние по времени
            items = group.nlargest(LAST_K, "ts_event")["itemid"].tolist()

        for item in items:
            last_items_list.append((session_id, item))

    last_df = pd.DataFrame(last_items_list, columns=["anchor_session_id", "last_item"])
    if last_df.empty:
        print(f"Пустой {tag}: нет последних товаров")
        return None

    # 2) SIM-кандидаты с предварительной фильтрацией
    sim_candidates = {}

    for anchor_session, group_last in last_df.groupby("anchor_session_id"):
        last_items = group_last["last_item"].tolist()
        n_items = len(last_items)
        allowed_per_item = max(1, N_SIM // n_items) if n_items > 0 else N_SIM

        candidates = []
        for last_item in last_items:
            sim_pairs = sim_index.get(last_item, [])
            for item_id, score in sim_pairs[:allowed_per_item]:
                candidates.append((item_id, score))

        if candidates:
            top_candidates = heapq.nlargest(N_SIM, candidates, key=lambda x: x[1])
            sim_candidates[anchor_session] = top_candidates

    # Преобразуем в DataFrame
    sim_rows = []
    sim_max_dict = {}

    for anchor_session, candidates in sim_candidates.items():
        for item_id, score in candidates:
            sim_rows.append((anchor_session, item_id))
            key = (anchor_session, item_id)
            sim_max_dict[key] = max(sim_max_dict.get(key, 0), score)

    if sim_rows:
        sim_pairs = pd.DataFrame(sim_rows, columns=["anchor_session_id", "itemid"])
        sim_pairs = sim_pairs.merge(anchors, on="anchor_session_id", how="left")
    else:
        sim_pairs = pd.DataFrame(columns=["visitorid", "anchor_session_id", "itemid"])

    # 3) ALS-кандидаты
    need_uids = set(anchors["visitorid"].unique())

    als_candidates = {}
    for uid in need_uids:
        user_scores = als_user_lookup.get(uid, {})
        if user_scores:
            als_candidates[uid] = heapq.nlargest(
                N_ALS, user_scores.items(), key=lambda x: x[1]
            )

    als_rows = []
    als_scores_dict = {}

    for uid, candidates in als_candidates.items():
        for item_id, score in candidates:
            als_rows.append((uid, item_id))
            als_scores_dict[(uid, item_id)] = score

    if als_rows:
        als_top = pd.DataFrame(als_rows, columns=["visitorid", "itemid"])
        als_pairs = anchors.merge(als_top, on="visitorid", how="inner")
    else:
        als_pairs = pd.DataFrame(columns=["visitorid", "anchor_session_id", "itemid"])

    # 4) POP-кандидаты
    if popular_items and N_POP > 0:
        pop_items_list = popular_items[:N_POP]
        pop_pairs = pd.DataFrame(
            {
                "visitorid": np.repeat(anchors["visitorid"].values, N_POP),
                "anchor_session_id": np.repeat(
                    anchors["anchor_session_id"].values, N_POP
                ),
                "itemid": np.tile(pop_items_list, len(anchors)),
            }
        )
    else:
        pop_pairs = pd.DataFrame(columns=["visitorid", "anchor_session_id", "itemid"])

    # 5) Позитивы
    pos_pairs = targets_part[
        ["visitorid", "anchor_session_id", "itemid"]
    ].drop_duplicates()

    # 6) Объединяем кандидатов
    all_candidates = []
    for df in [als_pairs, sim_pairs, pop_pairs, pos_pairs]:
        if not df.empty:
            all_candidates.append(df[["visitorid", "anchor_session_id", "itemid"]])

    if not all_candidates:
        print(f"Пустой {tag} датасет")
        return None

    X = pd.concat(all_candidates, ignore_index=True).drop_duplicates()

    # 7) Добавляем признаки
    # 7.1) ALS scores
    if als_scores_dict:
        X["als_score"] = X.apply(
            lambda row: als_scores_dict.get((row["visitorid"], row["itemid"]), 0.0),
            axis=1,
        )
    else:
        X["als_score"] = 0.0
    X["als_score"] = X["als_score"].astype("float32")

    # 7.2) Item popularity
    if item_pop:
        X["item_pop_w"] = X["itemid"].map(item_pop).fillna(0.0).astype("float32")
    else:
        X["item_pop_w"] = 0.0

    # 7.3) Similarity scores
    if sim_max_dict:
        X["sim_max"] = X.apply(
            lambda row: sim_max_dict.get(
                (row["anchor_session_id"], row["itemid"]), 0.0
            ),
            axis=1,
        )
    else:
        X["sim_max"] = 0.0
    X["sim_max"] = X["sim_max"].astype("float32")

    # 7.4) Сессионные признаки
    sess_features = prepare_session_features(sessions_part)
    X = X.merge(sess_features, on="anchor_session_id", how="left")

    sess_cols = [
        c
        for c in [
            "sess_n_events",
            "sess_n_items",
            "sess_duration",
            "sess_cnt_view",
            "sess_cnt_addtocart",
            "sess_cnt_transaction",
        ]
        if c in X.columns
    ]
    for c in sess_cols:
        X[c] = X[c].fillna(0).astype("float32")

    # 7.5) Gain
    if not targets_part.empty:
        gain_dict = targets_part.set_index(
            ["visitorid", "anchor_session_id", "itemid"]
        )["gain"].to_dict()
        X["gain"] = X.apply(
            lambda row: gain_dict.get(
                (row["visitorid"], row["anchor_session_id"], row["itemid"]), 0.0
            ),
            axis=1,
        )
    else:
        X["gain"] = 0.0
    X["gain"] = X["gain"].astype("float32")

    # 7.6) Anchor timestamp
    if not anchor_ts_df.empty:
        anchor_ts_dict = anchor_ts_df.set_index("anchor_session_id")[
            "anchor_ts"
        ].to_dict()
        X["anchor_ts"] = X["anchor_session_id"].map(anchor_ts_dict)

    # 8) Обогащение свойствами
    print("  Обогащение свойствами товаров...")
    X = enrich_with_item_props_simple(X, props)

    # 9) Оптимизация типов в конце
    for col in ["visitorid", "itemid"]:
        if col in X.columns:
            X[col] = X[col].astype("int64")

    print(f"Статистика {tag}:")
    print(f"Строк: {len(X):,}")
    print(f"Позитивных: {(X['gain'] > 0).sum():,} ({(X['gain'] > 0).mean() * 100}%)")
    print(f"Колонок: {len(X.columns)}")
    print(f"Память: {X.memory_usage(deep=True).sum() / 1024**2} MB")

    return X

In [17]:
train_X = build_dataset_for_range_model(
    train_ev,
    sessions_train,
    targets_train,
    "train",
    item_propertys,
    LAST_K=LAST_K,
    N_ALS=N_ALS,
    N_SIM=N_SIM,
    N_POP=N_POP,
    als_user_lookup=als_user_lookup,
    sim_index=sim_index,
    popular_items=popular_items,
    item_pop=item_pop,
)
test_X = build_dataset_for_range_model(
    test_ev,
    sessions_test,
    targets_test,
    "test",
    item_propertys,
    LAST_K=LAST_K,
    N_ALS=N_ALS,
    N_SIM=N_SIM,
    N_POP=N_POP,
    als_user_lookup=als_user_lookup,
    sim_index=sim_index,
    popular_items=popular_items,
    item_pop=item_pop,
)

Начинаем  сборку train датасета...
  Обогащение свойствами товаров...
обогащение свойставми item: 46,063,403 строк, добавлено 18 колонок свойств
Статистика train:
Строк: 46,063,403
Позитивных: 500,931 (1.087481530619872%)
Колонок: 32
Память: 11831.885006904602 MB
Начинаем  сборку test датасета...
  Обогащение свойствами товаров...
обогащение свойставми item: 3,056,972 строк, добавлено 18 колонок свойств
Статистика test:
Строк: 3,056,972
Позитивных: 55,419 (1.8128723455759488%)
Колонок: 32
Память: 785.8192224502563 MB


In [20]:
train_X.head()

Unnamed: 0,visitorid,anchor_session_id,itemid,als_score,item_pop_w,sim_max,sess_n_events,sess_n_items,sess_duration,sess_cnt_view,...,value_max,value_mean,value_std,level_0,level_1,level_2,level_3,level_4,level_5,root_category
0,7,7_1,164941,0.115865,173.0,0.0,2.0,2.0,186.063995,2.0,...,18000.0,18000.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,7,7_1,30990,0.102595,234.0,0.0,2.0,2.0,186.063995,2.0,...,53520.0,53520.0,0.0,1532.0,113.0,1114.0,-1.0,-1.0,-1.0,1532.0
2,7,7_1,259147,0.099174,145.0,0.911824,2.0,2.0,186.063995,2.0,...,1322464.0,1113572.0,221176.607715,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,7,7_1,190515,0.09763,158.0,0.0,2.0,2.0,186.063995,2.0,...,1322464.0,982863.3,403267.109712,1532.0,113.0,1114.0,-1.0,-1.0,-1.0,1532.0
4,7,7_1,379841,0.09331,204.0,0.0,2.0,2.0,186.063995,2.0,...,57360.0,57360.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [21]:
train_X.query("visitorid == 7")

Unnamed: 0,visitorid,anchor_session_id,itemid,als_score,item_pop_w,sim_max,sess_n_events,sess_n_items,sess_duration,sess_cnt_view,...,value_max,value_mean,value_std,level_0,level_1,level_2,level_3,level_4,level_5,root_category
0,7,7_1,164941,0.115865,173.0,0.000000,2.0,2.0,186.063995,2.0,...,18000.0,1.800000e+04,0.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,7,7_1,30990,0.102595,234.0,0.000000,2.0,2.0,186.063995,2.0,...,53520.0,5.352000e+04,0.000000,1532.0,113.0,1114.0,-1.0,-1.0,-1.0,1532.0
2,7,7_1,259147,0.099174,145.0,0.911824,2.0,2.0,186.063995,2.0,...,1322464.0,1.113572e+06,221176.607715,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,7,7_1,190515,0.097630,158.0,0.000000,2.0,2.0,186.063995,2.0,...,1322464.0,9.828633e+05,403267.109712,1532.0,113.0,1114.0,-1.0,-1.0,-1.0,1532.0
4,7,7_1,379841,0.093310,204.0,0.000000,2.0,2.0,186.063995,2.0,...,57360.0,5.736000e+04,0.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31813013,7,7_1,248455,0.000000,868.0,0.000000,2.0,2.0,186.063995,2.0,...,91380.0,9.138000e+04,0.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
31813014,7,7_1,102306,0.000000,850.0,0.000000,2.0,2.0,186.063995,2.0,...,1263524.0,5.205688e+05,391201.212890,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
31813015,7,7_1,231482,0.000000,843.0,0.000000,2.0,2.0,186.063995,2.0,...,184905.0,1.849050e+05,0.000000,140.0,384.0,955.0,1051.0,-1.0,-1.0,140.0
31813016,7,7_1,303828,0.000000,827.0,0.000000,2.0,2.0,186.063995,2.0,...,1297729.0,6.066514e+05,408667.514751,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [22]:
train_X.to_parquet(range_features_dir / "train_X.parquet")

test_X.to_parquet(range_features_dir / "test_X.parquet")

In [23]:
events_with_sessions.to_parquet(range_features_dir / "events_with_sessions_all.parquet")
train_ev.to_parquet(range_features_dir / "events_with_sessions_train.parquet")
test_ev.to_parquet(range_features_dir / "events_with_sessions_test.parquet")
sessions_train.to_parquet(range_features_dir / "sessions_train.parquet")
sessions_test.to_parquet(range_features_dir / "sessions_test.parquet")
targets_train.to_parquet(range_features_dir / "targets_next_session_train.parquet")
targets_test.to_parquet(range_features_dir / "targets_next_session_test.parquet")