In [1]:
import sys

sys.path.append('../')

In [2]:
import pandas as pd

In [3]:
from iftorch.mapping import RawInnerMap
from iftorch.dataset import UserItemSet
from iftorch.base import BaseFeature, BaseRecommender
from iftorch.save_utils import (
    get_path, create_directory,
    save_mapping, load_mapping,
    save_scipy_sparse_csr, load_scipy_sparse_csr
)

## Import data

In [4]:
data = pd.read_csv('../data/test_data.csv')

## Train Period

In [5]:
train_data = data[data['is_train']]

## Test Period

In [6]:
test_data = data[~data['is_train']]

## Raw train/test user/item ids

In [7]:
train_users_raw = set(train_data['raw_user_id'])
train_items_raw = set(train_data['raw_item_id'])

In [8]:
test_users_raw = set(test_data['raw_user_id'])
test_items_raw = set(test_data['raw_item_id'])

In [9]:
# Sort by user/item ids in train period followed by test period

users_raw = sorted(train_users_raw) + sorted(test_users_raw - train_users_raw)
items_raw = sorted(train_items_raw) + sorted(test_items_raw - train_items_raw)

# RawInnerMap

In [10]:
raw2inner_user = {user_raw: user_inner for user_inner, user_raw in enumerate(users_raw)}
raw2inner_item = {item_raw: item_inner for item_inner, item_raw in enumerate(items_raw)}

In [11]:
raw_inner_map = RawInnerMap(raw2inner_user=raw2inner_user,
                            raw2inner_item=raw2inner_item)

# Save RawInnerMap

In [12]:
raw_inner_map.save_to_file('raw_inner_map', '../data/')

## UserItemSet

In [13]:
train_set = UserItemSet.load_cls_from_raw(
    users_raw=train_data['raw_user_id'].values,
    items_raw=train_data['raw_item_id'].values,
    raw_inner_map=raw_inner_map
)

In [14]:
test_set = UserItemSet.load_cls_from_raw(
    users_raw=test_data['raw_user_id'].values,
    items_raw=test_data['raw_item_id'].values,
    raw_inner_map=raw_inner_map
)

# Save UserItemSet

In [15]:
train_set.save_to_file('trainset', '../data/')
test_set.save_to_file('testset', '../data/')