In [1]:
import numpy as np, pandas as pd

df_full = pd.read_csv("ratings.dat", delimiter="::", engine="python", header=None)
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

df_full.columns = ["UserId", "ItemId", "Rating", "Timestamp"]
df_full = df_full.drop("Timestamp", axis=1)
df_full["UserId"], _ = pd.factorize(df_full["UserId"])
df_full["ItemId"], _ = pd.factorize(df_full["ItemId"])
df_train["UserId"], users_train = pd.factorize(df_train["UserId"])
df_train["ItemId"], items_train = pd.factorize(df_train["ItemId"])
df_test["UserId"] = pd.Categorical(df_test["UserId"], users_train).codes
df_test["ItemId"] = pd.Categorical(df_test["ItemId"], items_train).codes

In [2]:
from scipy.sparse import coo_matrix

X_full = coo_matrix((df_full.Rating, (df_full.UserId, df_full.ItemId)))
X_train = coo_matrix((df_train.Rating, (df_train.UserId, df_train.ItemId)))

In [3]:
from cornac.data.dataset import Dataset
from cornac.models.mf.recom_mf import MF

def coo_to_cornac(X):
    return Dataset(
        X.shape[0], X.shape[1],
        {i:i for i in range(X.shape[0])},
        {i:i for i in range(X.shape[1])},
        (X.row, X.col, X.data),
        seed=1)

dt_full = coo_to_cornac(X_full)
dt_train = coo_to_cornac(X_train)

In [4]:
%%time
model = MF(k=50, max_iter=15, lambda_reg=0.05,
           use_bias=True, early_stop=False, verbose=False, seed=1)
model.fit(dt_full)

CPU times: user 13.9 s, sys: 7.68 ms, total: 13.9 s
Wall time: 13.9 s


<cornac.models.mf.recom_mf.MF at 0x7f1e395992d0>

In [5]:
model = MF(k=50, max_iter=15, lambda_reg=0.05,
           use_bias=True, early_stop=False, verbose=False, seed=1)
model.fit(dt_train)

<cornac.models.mf.recom_mf.MF at 0x7f1e2f947150>

In [6]:
from cmfrec import CMF

def cornac_to_cmf(model):
    return CMF.from_model_matrices(
        A=model.u_factors, B=model.i_factors,
        glob_mean=model.global_mean,
        user_bias=model.u_biases,
        item_bias=model.i_biases,
        precompute=False)

def print_rmse(pred, real):
    errors = pred - real
    rmse = np.sqrt(np.mean(np.array(errors) ** 2))
    print("RMSE: %f" % rmse)
    
model_cmf = cornac_to_cmf(model)
pred = model_cmf.predict(df_test.UserId, df_test.ItemId)
print_rmse(pred, df_test.Rating)

RMSE: 0.816548
