Skip to content

Commit

Permalink
Merge pull request #27 from guedes-joaofelipe/dev-guedesjoaofelipe
Browse files Browse the repository at this point in the history
Including Non-negative Matrix Factorization and fixing RatingPredictionEvaluation
  • Loading branch information
arthurfortes committed Jan 22, 2019
2 parents 5606db9 + cfc0cf8 commit 9ba8abf
Show file tree
Hide file tree
Showing 24 changed files with 267 additions and 11 deletions.
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ $RECYCLE.BIN/
.DS_Store
.AppleDouble
.LSOverride
*.ini

# Thumbnails
._*
Expand All @@ -44,3 +45,12 @@ Temporary Items

.idea/*
.vscode/*
*.pyc
*.ipynb_checkpoints
.ipynb_checkpoints


# Ignoring wheel folders
build/
dist/
CaseRecommender.egg-info/
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ Rating Prediction:

- Matrix Factorization (with and without baseline)

- Non-negative Matrix Factorization

- SVD

- SVD++
Expand Down
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ Rating Prediction:

- SVD

- Non-negative Matrix Factorization

- SVD++

- ItemKNN
Expand Down
1 change: 0 additions & 1 deletion caserec/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
__author__ = "Arthur Fortes"
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 2 additions & 2 deletions caserec/evaluation/base_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


class BaseEvaluation(object):
def __init__(self, sep='\t', metrics=None, all_but_one_eval=False, verbose=True, as_table=False, table_sep='\t'):
def __init__(self, sep='\t', metrics=None, all_but_one_eval=False, verbose=True, as_table=False, table_sep='\t', save_eval_file = None):
"""
Class to be base for evaluation strategies
Expand Down Expand Up @@ -143,7 +143,7 @@ def evaluate_folds(self, folds_dir, predictions_file_name, test_file_name, k_fol

return folds_results

def print_results(self, evaluation_results):
def print_results(self, evaluation_results, save_eval_file = None):
"""
Method to print the results
Expand Down
6 changes: 6 additions & 0 deletions caserec/evaluation/item_recommendation.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@ def evaluate(self, predictions, test_set):

})

# if (self.save_eval_file is not None):
# # Saving evaluations to a file
# from caserec.utils.process_data import WriteFile

# WriteFile(output_file=save_eval_file, data=)

if self.verbose:
self.print_results(eval_results)

Expand Down
6 changes: 3 additions & 3 deletions caserec/evaluation/rating_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ def evaluate(self, predictions, test_set):
new_test_set['items_seen_by_user'] = new_test_set
new_test_set['users'] = test_set['users']

ItemRecommendationEvaluation(n_ranks=self.n_rank,
all_but_one_eval=self.all_but_one_eval).evaluate_recommender(
new_predict_set, new_test_set)
eval_results = ItemRecommendationEvaluation(n_ranks=self.n_rank,
all_but_one_eval=self.all_but_one_eval,
metrics=self.metrics).evaluate_recommender(new_predict_set, new_test_set)

return eval_results
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,15 @@ def read_files(self):
"""

self.train_set = ReadFile(self.train_file, sep=self.sep).read()
# Getting train_set as a dict_file = {'feedback': dict_feedback, 'users': list_users, 'items': list_items,
# 'sparsity': sparsity, 'number_interactions': number_interactions, 'users_viewed_item': users_viewed_item, 'items_unobserved': items_unobserved,
# 'items_seen_by_user': items_seen_by_user, 'mean_value': mean_value, 'max_value': max(list_feedback), 'min_value': min(list_feedback)}
self.train_set = ReadFile(self.train_file, sep=self.sep).read()

if self.test_file is not None:
self.test_set = ReadFile(self.test_file, sep=self.sep).read()

# Combining users/items from train and test set
self.users = sorted(set(list(self.train_set['users']) + list(self.test_set['users'])))
self.items = sorted(set(list(self.train_set['items']) + list(self.test_set['items'])))
else:
Expand All @@ -93,7 +98,7 @@ def read_files(self):

def create_matrix(self):
"""
Method to create a feedback matrix
Method to create a feedback matrix having users as rows and items as columns
"""

Expand Down
192 changes: 192 additions & 0 deletions caserec/recommenders/rating_prediction/nnmf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
# coding=utf-8
"""
Non-negative Matrix Factorization
[Rating Prediction]
Literature:
Badrul Sarwar , George Karypis , Joseph Konstan , John Riedl:
Incremental Singular Value Decomposition Algorithms for Highly Scalable Recommender Systems
Fifth International Conference on Computer and Information Science 2002.
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.7894
"""

# © 2018. Case Recommender (MIT License)

import numpy as np
from sklearn.decomposition import NMF

from caserec.recommenders.rating_prediction.base_rating_prediction import BaseRatingPrediction
from caserec.utils.extra_functions import timed

__author__ = 'Joao Felipe Guedes <guedes.joaofelipe@poli.ufrj.br>'


class NNMF(BaseRatingPrediction):
def __init__(self, train_file=None, test_file=None, output_file=None, factors=10, sep='\t', output_sep='\t',
random_seed=None):
"""
Matrix Factorization for rating prediction
Matrix factorization models map both users and items to a joint latent factor space of dimensionality f,
such that user-item interactions are modeled as inner products in that space.
Usage::
>> MatrixFactorization(train, test).compute()
:param train_file: File which contains the train set. This file needs to have at least 3 columns
(user item feedback_value).
:type train_file: str
:param test_file: File which contains the test set. This file needs to have at least 3 columns
(user item feedback_value).
:type test_file: str, default None
:param output_file: File with dir to write the final predictions
:type output_file: str, default None
:param factors: Number of latent factors per user/item
:type factors: int, default 10
:param sep: Delimiter for input files
:type sep: str, default '\t'
:param output_sep: Delimiter for output file
:type output_sep: str, default '\t'
:param random_seed: Number of seed. Lock random numbers for reproducibility of experiments.
:type random_seed: int, default None
"""
super(NNMF, self).__init__(train_file=train_file, test_file=test_file, output_file=output_file, sep=sep,
output_sep=output_sep)

self.recommender_name = 'NNMF'
self.factors = factors

if random_seed is not None:
np.random.seed(random_seed)

# internal vars
self.feedback_triples = None
self.prediction_matrix = None

def init_model(self):
"""
Method to treat and initialize the model
"""

self.feedback_triples = []

# Map interaction with ids
for user in self.train_set['feedback']:
for item in self.train_set['feedback'][user]:
self.feedback_triples.append((self.user_to_user_id[user], self.item_to_item_id[item],
self.train_set['feedback'][user][item]))

self.create_matrix()

def fit(self):
"""
This method performs Non-negative matrix factorization over the training data.
"""

model = NMF(n_components=self.factors, init='random', random_state=0)

P = model.fit_transform(self.matrix)

Q = model.components_

self.prediction_matrix = np.dot(P, Q)

def predict_score(self, u, i, cond=True):
"""
Method to predict a single score for a pair (user, item)
:param u: User ID
:type u: int
:param i: Item ID
:type i: int
:param cond: Use max and min values of train set to limit score
:type cond: bool, default True
:return: Score generate for pair (user, item)
:rtype: float
"""

rui = self.train_set["mean_value"] + self.prediction_matrix[u][i]

if cond:
if rui > self.train_set["max_value"]:
rui = self.train_set["max_value"]
elif rui < self.train_set["min_value"]:
rui = self.train_set["min_value"]

return rui

def predict(self):
"""
This method computes a final rating for unknown pairs (user, item)
"""

if self.test_file is not None:
for user in self.test_set['users']:
for item in self.test_set['feedback'][user]:
self.predictions.append((user, item, self.predict_score(self.user_to_user_id[user],
self.item_to_item_id[item], True)))
else:
raise NotImplemented

def compute(self, verbose=True, metrics=None, verbose_evaluation=True, as_table=False, table_sep='\t'):
"""
Extends compute method from BaseRatingPrediction. Method to run recommender algorithm
:param verbose: Print recommender and database information
:type verbose: bool, default True
:param metrics: List of evaluation measures
:type metrics: list, default None
:param verbose_evaluation: Print the evaluation results
:type verbose_evaluation: bool, default True
:param as_table: Print the evaluation results as table
:type as_table: bool, default False
:param table_sep: Delimiter for print results (only work with verbose=True and as_table=True)
:type table_sep: str, default '\t'
"""

super(NNMF, self).compute(verbose=verbose)

if verbose:
self.init_model()
print("training_time:: %4f sec" % timed(self.fit))
if self.extra_info_header is not None:
print(self.extra_info_header)

print("prediction_time:: %4f sec" % timed(self.predict))

print('\n')

else:
# Execute all in silence without prints
self.init_model()
self.fit()
self.predict()

self.write_predictions()

if self.test_file is not None:
self.evaluate(metrics, verbose_evaluation, as_table=as_table, table_sep=table_sep)



Binary file added caserec/utils/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion caserec/utils/process_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def read(self):

list_feedback = []

dict_feedback = {}
dict_feedback = {} # To be filled as: {user_id: [item_id_1, item_id_2, ..., item_id_N]}
items_unobserved = {}
items_seen_by_user = {}
users_viewed_item = {}
Expand Down
37 changes: 37 additions & 0 deletions examples/ranking_rating_based_algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
Running Precision and Recall metrics on rating-based algorithms
"""

from caserec.recommenders.rating_prediction.matrixfactorization import MatrixFactorization
from caserec.recommenders.rating_prediction.nnmf import NNMF
from caserec.utils.process_data import ReadFile
from caserec.evaluation.rating_prediction import RatingPredictionEvaluation

tr = '../../datasets/ml-100k/folds/0/train.dat'
te = '../../datasets/ml-100k/folds/0/test.dat'

# File to be saved model's predictions
predictions_output_filepath = './predictions_output.dat'

# Creating model and computing train / test sets
# model = MatrixFactorization(tr, te, output_file = predictions_output_filepath)
model = NNMF(tr, te, output_file = predictions_output_filepath)

model.compute(verbose=False)

# Using ReadFile class to read predictions from file
reader = ReadFile(input_file=predictions_output_filepath)
predictions = reader.read()

# Creating evaluator with item-recommendation parameters
evaluator = RatingPredictionEvaluation(sep = '\t', n_rank = [10], as_rank = True, metrics = ['PREC'])

# Getting evaluation
item_rec_metrics = evaluator.evaluate(predictions['feedback'], model.test_set)

print ('\nItem Recommendation Metrics:\n', item_rec_metrics)

model.predict()

print ('\nOriginal Rating Prediction Metrics:\n', model.evaluation_results)
7 changes: 5 additions & 2 deletions examples/rating_prediction_mf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

from caserec.recommenders.rating_prediction.svdplusplus import SVDPlusPlus
from caserec.recommenders.rating_prediction.nnmf import NNMF
from caserec.recommenders.rating_prediction.matrixfactorization import MatrixFactorization
from caserec.utils.cross_validation import CrossValidation

Expand All @@ -28,10 +29,12 @@
"""

# Cross Validation
recommender = MatrixFactorization()
# recommender = MatrixFactorization()

CrossValidation(input_file=db, recommender=recommender, dir_folds=folds_path, header=1, k_folds=5).compute()
# CrossValidation(input_file=db, recommender=recommender, dir_folds=folds_path, header=1, k_folds=5).compute()

# # Simple
# MatrixFactorization(tr, te).compute()
# SVDPlusPlus(tr, te).compute()

NNMF(tr, te, factors = 20).compute()

0 comments on commit 9ba8abf

Please sign in to comment.