In [1]:
from lib.process_data import instacart_process
from lib.data_class import DataSet
from models.latent_models import UserModel, ProductModel
from models.feature_models import MainFeatureModel
from models.main_models import NonTopModel

Using TensorFlow backend.


In [2]:
import numpy as np
import logging
log = logging.getLogger("TR_logger")

In [3]:
IC_DATA_DIR = './data/instacart_2017_05_01_small/'
logging.basicConfig()
log.setLevel(logging.INFO)

In [4]:
# set random seed for consistent tests
np.random.seed(42)

# load data from instacart csv files (values below use testing directory)
order_data, product_data = instacart_process(data_dir=IC_DATA_DIR)

# create dataset
ic_dataset = DataSet(order_df=order_data, product_df=product_data)

# train-val-test split
train_dataset, test_val_dataset = ic_dataset.train_test_split()
val_dataset, test_dataset = test_val_dataset.train_test_split(test_frac=0.5)

In [5]:
print(ic_dataset.order_df.shape)
print(train_dataset.order_df.shape)
print(val_dataset.order_df.shape)
print(test_dataset.order_df.shape)

(334091, 4)
(253109, 4)
(38666, 4)
(42316, 4)


In [6]:
# create user latent model, fit and transform
user_latent = UserModel()
user_latent.fit(train_dataset)
user_encoding = user_latent.transform(train_dataset)
print(user_encoding.shape)

(98027, 32)


In [7]:
# create product latent model, fit and transform
product_latent = ProductModel()
product_latent.fit(train_dataset)
product_encoding = product_latent.transform(train_dataset)
print(product_encoding.shape)

(98027, 10)


In [8]:
# create feature model
feature_model = MainFeatureModel()
X = feature_model.transform(train_dataset)
print(X.shape)

(98027, 8)


In [9]:
# fit model to train_dataset
model = NonTopModel(user_latent_model=user_latent, product_latent_model=product_latent, feature_model=feature_model)
model.fit(train_dataset, fit_latent=False, epochs=50)
print(model.input_dim)

50


In [10]:
# predict on test_dataset
model.predict(test_dataset)

array([0.28959152, 0.2912253 , 0.29087073, ..., 0.04699376, 0.00533277,
       0.00488029], dtype=float32)

In [11]:
print(model.find_threshold(val_dataset))

0.21052631578947367


In [12]:
model.evaluate(val_dataset)

[0.97052460789616,
 0.42684660847595657,
 0.46896857742622355,
 0.3716608715738018,
 0.6816989127095386]