-
Notifications
You must be signed in to change notification settings - Fork 91
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #27 from guedes-joaofelipe/dev-guedesjoaofelipe
Including Non-negative Matrix Factorization and fixing RatingPredictionEvaluation
- Loading branch information
Showing
24 changed files
with
267 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,8 @@ Rating Prediction: | |
|
||
- SVD | ||
|
||
- Non-negative Matrix Factorization | ||
|
||
- SVD++ | ||
|
||
- ItemKNN | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +0,0 @@ | ||
__author__ = "Arthur Fortes" | ||
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+2.9 KB
caserec/evaluation/__pycache__/item_recomendation_functions.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file added
BIN
+245 Bytes
caserec/recommenders/rating_prediction/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file added
BIN
+5.67 KB
caserec/recommenders/rating_prediction/__pycache__/base_rating_prediction.cpython-37.pyc
Binary file not shown.
Binary file added
BIN
+6.02 KB
caserec/recommenders/rating_prediction/__pycache__/nnmf.cpython-37.pyc
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
# coding=utf-8 | ||
""" | ||
Non-negative Matrix Factorization | ||
[Rating Prediction] | ||
Literature: | ||
Badrul Sarwar , George Karypis , Joseph Konstan , John Riedl: | ||
Incremental Singular Value Decomposition Algorithms for Highly Scalable Recommender Systems | ||
Fifth International Conference on Computer and Information Science 2002. | ||
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.7894 | ||
""" | ||
|
||
# © 2018. Case Recommender (MIT License) | ||
|
||
import numpy as np | ||
from sklearn.decomposition import NMF | ||
|
||
from caserec.recommenders.rating_prediction.base_rating_prediction import BaseRatingPrediction | ||
from caserec.utils.extra_functions import timed | ||
|
||
__author__ = 'Joao Felipe Guedes <guedes.joaofelipe@poli.ufrj.br>' | ||
|
||
|
||
class NNMF(BaseRatingPrediction): | ||
def __init__(self, train_file=None, test_file=None, output_file=None, factors=10, sep='\t', output_sep='\t', | ||
random_seed=None): | ||
""" | ||
Matrix Factorization for rating prediction | ||
Matrix factorization models map both users and items to a joint latent factor space of dimensionality f, | ||
such that user-item interactions are modeled as inner products in that space. | ||
Usage:: | ||
>> MatrixFactorization(train, test).compute() | ||
:param train_file: File which contains the train set. This file needs to have at least 3 columns | ||
(user item feedback_value). | ||
:type train_file: str | ||
:param test_file: File which contains the test set. This file needs to have at least 3 columns | ||
(user item feedback_value). | ||
:type test_file: str, default None | ||
:param output_file: File with dir to write the final predictions | ||
:type output_file: str, default None | ||
:param factors: Number of latent factors per user/item | ||
:type factors: int, default 10 | ||
:param sep: Delimiter for input files | ||
:type sep: str, default '\t' | ||
:param output_sep: Delimiter for output file | ||
:type output_sep: str, default '\t' | ||
:param random_seed: Number of seed. Lock random numbers for reproducibility of experiments. | ||
:type random_seed: int, default None | ||
""" | ||
super(NNMF, self).__init__(train_file=train_file, test_file=test_file, output_file=output_file, sep=sep, | ||
output_sep=output_sep) | ||
|
||
self.recommender_name = 'NNMF' | ||
self.factors = factors | ||
|
||
if random_seed is not None: | ||
np.random.seed(random_seed) | ||
|
||
# internal vars | ||
self.feedback_triples = None | ||
self.prediction_matrix = None | ||
|
||
def init_model(self): | ||
""" | ||
Method to treat and initialize the model | ||
""" | ||
|
||
self.feedback_triples = [] | ||
|
||
# Map interaction with ids | ||
for user in self.train_set['feedback']: | ||
for item in self.train_set['feedback'][user]: | ||
self.feedback_triples.append((self.user_to_user_id[user], self.item_to_item_id[item], | ||
self.train_set['feedback'][user][item])) | ||
|
||
self.create_matrix() | ||
|
||
def fit(self): | ||
""" | ||
This method performs Non-negative matrix factorization over the training data. | ||
""" | ||
|
||
model = NMF(n_components=self.factors, init='random', random_state=0) | ||
|
||
P = model.fit_transform(self.matrix) | ||
|
||
Q = model.components_ | ||
|
||
self.prediction_matrix = np.dot(P, Q) | ||
|
||
def predict_score(self, u, i, cond=True): | ||
""" | ||
Method to predict a single score for a pair (user, item) | ||
:param u: User ID | ||
:type u: int | ||
:param i: Item ID | ||
:type i: int | ||
:param cond: Use max and min values of train set to limit score | ||
:type cond: bool, default True | ||
:return: Score generate for pair (user, item) | ||
:rtype: float | ||
""" | ||
|
||
rui = self.train_set["mean_value"] + self.prediction_matrix[u][i] | ||
|
||
if cond: | ||
if rui > self.train_set["max_value"]: | ||
rui = self.train_set["max_value"] | ||
elif rui < self.train_set["min_value"]: | ||
rui = self.train_set["min_value"] | ||
|
||
return rui | ||
|
||
def predict(self): | ||
""" | ||
This method computes a final rating for unknown pairs (user, item) | ||
""" | ||
|
||
if self.test_file is not None: | ||
for user in self.test_set['users']: | ||
for item in self.test_set['feedback'][user]: | ||
self.predictions.append((user, item, self.predict_score(self.user_to_user_id[user], | ||
self.item_to_item_id[item], True))) | ||
else: | ||
raise NotImplemented | ||
|
||
def compute(self, verbose=True, metrics=None, verbose_evaluation=True, as_table=False, table_sep='\t'): | ||
""" | ||
Extends compute method from BaseRatingPrediction. Method to run recommender algorithm | ||
:param verbose: Print recommender and database information | ||
:type verbose: bool, default True | ||
:param metrics: List of evaluation measures | ||
:type metrics: list, default None | ||
:param verbose_evaluation: Print the evaluation results | ||
:type verbose_evaluation: bool, default True | ||
:param as_table: Print the evaluation results as table | ||
:type as_table: bool, default False | ||
:param table_sep: Delimiter for print results (only work with verbose=True and as_table=True) | ||
:type table_sep: str, default '\t' | ||
""" | ||
|
||
super(NNMF, self).compute(verbose=verbose) | ||
|
||
if verbose: | ||
self.init_model() | ||
print("training_time:: %4f sec" % timed(self.fit)) | ||
if self.extra_info_header is not None: | ||
print(self.extra_info_header) | ||
|
||
print("prediction_time:: %4f sec" % timed(self.predict)) | ||
|
||
print('\n') | ||
|
||
else: | ||
# Execute all in silence without prints | ||
self.init_model() | ||
self.fit() | ||
self.predict() | ||
|
||
self.write_predictions() | ||
|
||
if self.test_file is not None: | ||
self.evaluate(metrics, verbose_evaluation, as_table=as_table, table_sep=table_sep) | ||
|
||
|
||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
""" | ||
Running Precision and Recall metrics on rating-based algorithms | ||
""" | ||
|
||
from caserec.recommenders.rating_prediction.matrixfactorization import MatrixFactorization | ||
from caserec.recommenders.rating_prediction.nnmf import NNMF | ||
from caserec.utils.process_data import ReadFile | ||
from caserec.evaluation.rating_prediction import RatingPredictionEvaluation | ||
|
||
tr = '../../datasets/ml-100k/folds/0/train.dat' | ||
te = '../../datasets/ml-100k/folds/0/test.dat' | ||
|
||
# File to be saved model's predictions | ||
predictions_output_filepath = './predictions_output.dat' | ||
|
||
# Creating model and computing train / test sets | ||
# model = MatrixFactorization(tr, te, output_file = predictions_output_filepath) | ||
model = NNMF(tr, te, output_file = predictions_output_filepath) | ||
|
||
model.compute(verbose=False) | ||
|
||
# Using ReadFile class to read predictions from file | ||
reader = ReadFile(input_file=predictions_output_filepath) | ||
predictions = reader.read() | ||
|
||
# Creating evaluator with item-recommendation parameters | ||
evaluator = RatingPredictionEvaluation(sep = '\t', n_rank = [10], as_rank = True, metrics = ['PREC']) | ||
|
||
# Getting evaluation | ||
item_rec_metrics = evaluator.evaluate(predictions['feedback'], model.test_set) | ||
|
||
print ('\nItem Recommendation Metrics:\n', item_rec_metrics) | ||
|
||
model.predict() | ||
|
||
print ('\nOriginal Rating Prediction Metrics:\n', model.evaluation_results) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters