In [11]:
from data import data_creation as dc
import os
import numpy as np
import pandas as pd

from multimodalrec.viewership import CollaborativeFiltering

directory = os.path.dirname(os.path.realpath("__file__"))+'/data/ml-1m/ratings.dat'
all_movies_dir = os.path.dirname(os.path.realpath("__file__"))+'/data/all_movies.txt'
pickles_dir = os.path.dirname(os.path.realpath("__file__"))+'/data/pickles/'

In [2]:
data = dc.get_movielens_1M(directory=directory, all_movies_dir=all_movies_dir, pickles_dir=pickles_dir, min_positive_score=0)

In [3]:
# There is no movies exist both in training and test set
assert len(set(data['training'].Movie.unique().tolist()) - set(data['test'].Movie.unique().tolist())) == len(data['training'].Movie.unique().tolist())

In [4]:
CF = CollaborativeFiltering(data['training']) # 0 Threshold trim

The sparsity level of training dataset is 95.1%


In [25]:
user_latent, movie_latent, sigma = CF.compute_latent_factors(algorithm='SVD', k=256)

In [26]:
user_latent.shape

(6040, 256)

In [27]:
movie_latent.shape

(2814, 256)

In [28]:
all_user_predicted_ratings = np.dot(np.dot(user_latent, sigma), movie_latent.T) + CF.user_ratings_mean.reshape(-1, 1)

In [29]:
preds = pd.DataFrame(all_user_predicted_ratings, columns = CF.Ratings.columns)
preds.head()

Movie,1,2,3,6,7,8,9,10,11,12,...,3927,3928,3929,3930,3931,3932,3933,3934,3940,3942
0,4.713227,0.350277,0.243687,-0.162407,0.284001,-0.004636,-0.031741,-0.400758,0.172223,-0.001217,...,0.030711,0.031155,0.189989,0.02025,-0.011669,0.064763,0.010837,0.026753,0.022292,-0.004894
1,0.089044,-0.018035,0.129055,0.468278,0.339735,0.125887,0.131451,1.136423,0.070785,0.131048,...,-0.094842,0.03462,-0.110594,-0.057414,0.017177,-0.231932,0.059083,0.079986,0.010712,0.040609
2,0.707848,0.485784,-0.041892,-0.118159,0.027398,-0.022895,-0.051439,1.040665,-0.291188,0.189337,...,0.127651,0.027041,-0.159871,0.156089,-0.025927,-0.058249,-0.000636,-0.041094,-0.006965,-0.015796
3,-0.071408,0.176077,0.019421,-0.152679,-0.000881,-0.020237,-0.013045,-0.147421,0.083727,0.065879,...,0.014705,0.067798,-0.017181,0.026439,0.001986,0.0883,0.0124,0.004586,-0.016609,-0.020486
4,-0.574249,-0.584995,0.079,1.330697,0.091802,-0.019495,-0.073461,-0.001723,-0.250169,-0.142926,...,0.317748,0.523946,-0.008077,0.264701,0.04394,-0.01537,0.035427,0.122357,0.001162,0.032594


In [30]:
I,J = np.where((CF.Ratings.values >= 1)==True)
predicted_values = preds.values
Ratings_values = CF.Ratings.values

In [31]:
tot_abs_err = 0.
predictions = []
targets = []
for enum, (i,j) in enumerate(zip(I,J)):
    tot_abs_err += abs(Ratings_values[i,j] - predicted_values[i,j])
    predictions.append(predicted_values[i,j])
    targets.append(Ratings_values[i,j])
tot_abs_err/float(enum)

1.2445614388699124