In [1]:
import numpy as np, pandas as pd

df_full = pd.read_csv("ratings.dat", delimiter="::", engine="python", header=None)
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

df_full.columns = ["UserId", "ItemId", "Rating", "Timestamp"]
df_full = df_full.drop("Timestamp", axis=1)
df_full["UserId"], _ = pd.factorize(df_full["UserId"])
df_full["ItemId"], _ = pd.factorize(df_full["ItemId"])
df_train["UserId"], users_train = pd.factorize(df_train["UserId"])
df_train["ItemId"], items_train = pd.factorize(df_train["ItemId"])
df_test["UserId"] = pd.Categorical(df_test["UserId"], users_train).codes
df_test["ItemId"] = pd.Categorical(df_test["ItemId"], items_train).codes

In [2]:
import os
from pyreclab import SVD

df_full.to_csv("df_full_pyreclab.tsv", sep="\t", index=False)

In [3]:
%%time
model = SVD(factors = 50,
            dataset = "df_full_pyreclab.tsv",
            dlmchar = b'\t',
            header = True,
            usercol = 0,
            itemcol = 1,
            ratingcol = 2)
model.train(maxiter = 15, lamb = 0.05, progress = False)

CPU times: user 1min 30s, sys: 430 ms, total: 1min 30s
Wall time: 1min 30s


In [4]:
os.remove("df_full_pyreclab.tsv")

In [5]:
df_train.to_csv("df_train_pyreclab.tsv", sep="\t", index=False)
model = SVD(factors = 50,
            dataset = "df_train_pyreclab.tsv",
            dlmchar = b'\t',
            header = True,
            usercol = 0,
            itemcol = 1,
            ratingcol = 2)
model.train(maxiter = 15, lamb = 0.05, progress = False)
os.remove("df_train_pyreclab.tsv")

In [6]:
def predict_from_model(model, df):
    pred = np.empty(df.shape[0])
    uid = df.UserId.astype(str)
    iid = df.ItemId.astype(str)
    for ix in range(df.shape[0]):
        pred[ix] = model.predict(uid.iloc[ix], iid.iloc[ix])
    return pred

def print_rmse(pred, real):
    errors = pred - real
    rmse = np.sqrt(np.mean(np.array(errors) ** 2))
    print("RMSE: %f" % rmse)
    
pred = predict_from_model(model, df_test)
print_rmse(pred, df_test.Rating)

RMSE: 0.812566
