In [1]:
import numpy as np, pandas as pd

df_full = pd.read_csv("ratings.dat", delimiter="::", engine="python", header=None)
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

df_full.columns = ["UserId", "ItemId", "Rating", "Timestamp"]
df_full = df_full.drop("Timestamp", axis=1)
df_full["UserId"], _ = pd.factorize(df_full["UserId"])
df_full["ItemId"], _ = pd.factorize(df_full["ItemId"])
df_train["UserId"], users_train = pd.factorize(df_train["UserId"])
df_train["ItemId"], items_train = pd.factorize(df_train["ItemId"])
df_test["UserId"] = pd.Categorical(df_test["UserId"], users_train).codes
df_test["ItemId"] = pd.Categorical(df_test["ItemId"], items_train).codes

In [2]:
remap = {"UserId":"user", "ItemId":"item", "Rating":"rating"}
df_full = df_full.rename(columns=remap)
df_train = df_train.rename(columns=remap)
df_test = df_test.rename(columns=remap)

In [3]:
from lenskit.algorithms.svd import BiasedSVD
from lenskit.algorithms.als import BiasedMF

In [4]:
%%time
model = BiasedSVD(50)
model.fit(df_full)

CPU times: user 19.9 s, sys: 1.46 s, total: 21.4 s
Wall time: 7.97 s


In [5]:
%%time
model = BiasedMF(50, iterations=15, reg=0.05,
                 damping=5, bias=True, method='cd')
model.fit(df_full)

CPU times: user 3min 53s, sys: 5min 11s, total: 9min 5s
Wall time: 49.5 s


<lenskit.algorithms.als.BiasedMF at 0x7f246d962090>

In [6]:
%%time
model = BiasedMF(50, iterations=15, reg=0.05,
                 damping=5, bias=True, method='lu')
model.fit(df_full)

CPU times: user 18min 46s, sys: 37min 5s, total: 55min 51s
Wall time: 3min 42s


<lenskit.algorithms.als.BiasedMF at 0x7f2460ea1890>

In [7]:
rating_test = df_test.rating
df_test = df_test[["user", "item"]]

def print_rmse(pred, real):
    errors = pred - real
    rmse = np.sqrt(np.mean(np.array(errors) ** 2))
    print("RMSE: %f" % rmse)

In [8]:
model = BiasedSVD(50)
model.fit(df_train)
pred = model.predict(df_test)
print_rmse(pred, rating_test)

RMSE: 0.838297


In [9]:
model = BiasedMF(50, iterations=15, reg=0.05,
                 damping=5, bias=True, method='cd')
model.fit(df_train)
pred = model.predict(df_test)
print_rmse(pred, rating_test)

RMSE: 0.796156


In [10]:
model = BiasedMF(50, iterations=15, reg=0.05,
                 damping=5, bias=True, method='lu')
model.fit(df_train)
pred = model.predict(df_test)
print_rmse(pred, rating_test)

RMSE: 0.796129
