# Neural Collaborative Filtering - Amazon Review Dataset

## Imports and Global Variables

In [1]:
import pandas as pd

from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.datasets.ncd import Dataset as NCFDataset
from recommenders.datasets.python_splitters import python_chrono_split

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
DATA_PATH = "../data/amazon_reviews/Office_Products.csv"
TRAIN_FILE = "train.csv"
TEST_FILE = "test.csv"
LOO_TEST_FILE = "loo_test.csv"

MIN_REVIEWS = 15
SEED = 42
EPOCHS = 100 
BATCH_SIZE = 256

## Data

In [None]:
# Load Dataframe
df = pd.read_csv(DATA_PATH, names=["itemID", "userID", "rating", "timestamp"])
df

In [None]:
# Look at number of unqiue column values
df.nunique()

In [None]:
# Get value counts per user id 
vc = df.userID.value_counts() 

# Filter only users with more reviews than MIN_REVIEWS 
df = df[df.userID.isin(vc[vc > MIN_REVIEWS].index)] 

In [None]:
# Split into train and test
train, test = python_chrono_split(df, .75)

# Filter out any users or items in test that do note appear in training set
test = test[test["userID"].isin(train["userID"].unique())]
test = test[test["itemID"].isin(train["itemID"].unique())]

# Leave one out testing
loo_test = test.groupby("userID").last().reset_index()

In [None]:
# Save datasets into csv files
train.to_csv(TRAIN_FILE, index=False)
test.to_csv(TEST_FILE, index=False)
loo_test.to_csv(LOO_TEST_FILE, index=False)

In [None]:
data = NCFDataset(train_file=TRAIN_FILE, test_file=LOO_TEST_FILE, seed=SEED, overwrite_test_file_full=True)

## Model

In [None]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time.interval))