-
Notifications
You must be signed in to change notification settings - Fork 91
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
userknn itemknn userattributeknn itemattributeknn
- Loading branch information
1 parent
c3a6f53
commit 751570c
Showing
10 changed files
with
295 additions
and
263 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import numpy as np | ||
|
||
__author__ = 'Arthur Fortes' | ||
|
||
|
||
class BaseKNNRecommenders(object): | ||
def __init__(self, train_set, test_set): | ||
self.train = train_set | ||
self.test = test_set | ||
self.regBi = 10 | ||
self.regBu = 15 | ||
self.bu = dict() | ||
self.bi = dict() | ||
self.bui = dict() | ||
self.matrix = np.zeros((len(self.train['users']), len(self.train['items']))) | ||
self.map_items = dict() | ||
self.map_users = dict() | ||
|
||
for item_id, item in enumerate(self.train['items']): | ||
self.map_items[item] = item_id | ||
|
||
for user_id, user in enumerate(self.train['users']): | ||
self.map_users[user] = user_id | ||
|
||
for u, user in enumerate(self.train['users']): | ||
for item in self.train['feedback'][user]: | ||
self.matrix[u][self.map_items[item]] = self.train['feedback'][user][item] | ||
|
||
def train_baselines(self): | ||
for i in xrange(10): | ||
self.compute_bi() | ||
self.compute_bu() | ||
self.compute_bui() | ||
|
||
def compute_bi(self): | ||
# bi = (rui - mi - bu) / (regBi + number of interactions) | ||
self.bi = dict() | ||
|
||
for item in self.train['items']: | ||
cont = 0 | ||
for user in self.train['di'][item]: | ||
self.bi[item] = self.bi.get(item, 0) + float(self.train['feedback'][user][item]) - \ | ||
self.train['mean_rates'] - self.bu.get(user, 0) | ||
cont += 1 | ||
if cont > 1: | ||
self.bi[item] = float(self.bi[item]) / float(self.regBi + cont) | ||
|
||
def compute_bu(self): | ||
# bu = (rui - mi - bi) / (regBu + number of interactions) | ||
self.bu = dict() | ||
for user in self.train['users']: | ||
cont = 0 | ||
for item in self.train['du'][user]: | ||
self.bu[user] = self.bu.get(user, 0) + float(self.train['feedback'][user][item]) - \ | ||
self.train['mean_rates'] - self.bi.get(item, 0) | ||
cont += 1 | ||
if cont > 1: | ||
self.bu[user] = float(self.bu[user]) / float(self.regBu + cont) | ||
|
||
def compute_bui(self): | ||
# bui = mi + bu + bi | ||
for user in self.train['users']: | ||
for item in self.train['items']: | ||
self.bui.setdefault(user, {}).update({item: self.train['mean_rates'] + self.bu[user] + self.bi[item]}) | ||
del self.bu | ||
del self.bi |
118 changes: 46 additions & 72 deletions
118
recommenders/rating_prediction/base_rating_prediction.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,73 +1,47 @@ | ||
import numpy as np | ||
""" | ||
test | ||
""" | ||
from recommenders.rating_prediction.item_attribute_knn import ItemAttributeKNN | ||
from recommenders.rating_prediction.itemknn import ItemKNN | ||
from recommenders.rating_prediction.user_attribute_knn import UserAttributeKNN | ||
from recommenders.rating_prediction.userknn import UserKNN | ||
from utils.read_file import ReadFile | ||
|
||
__author__ = 'Arthur Fortes' | ||
|
||
|
||
class BaseRatingPrediction(object): | ||
def __init__(self, train_file, test_file="", space_type="\t"): | ||
self.space_type = space_type | ||
self.train_file = train_file | ||
self.test_file = test_file | ||
self.train_feedback, self.train_users, self.train_items, self.train_du, \ | ||
self.train_di, self.train_mean_rates = ReadFile(self.train_file).rating_prediction() | ||
self.test_feedback, self.test_users, self.test_items, self.test_du, \ | ||
self.test_di, self.test_mean_rates = ReadFile(self.test_file).rating_prediction() | ||
|
||
self.regBi = 10 | ||
self.regBu = 15 | ||
self.bu = dict() | ||
self.bi = dict() | ||
self.bui = dict() | ||
self.matrix = np.zeros((len(self.train_users), len(self.train_items))) | ||
self.map_items = dict() | ||
self.map_users = dict() | ||
|
||
for item_id, item in enumerate(self.train_items): | ||
self.map_items[item] = item_id | ||
|
||
for user_id, user in enumerate(self.train_users): | ||
self.map_users[user] = user_id | ||
|
||
for u, user in enumerate(self.train_users): | ||
for item in self.train_feedback[user]: | ||
self.matrix[u][self.map_items[item]] = self.train_feedback[user][item] | ||
|
||
def train_baselines(self): | ||
for i in xrange(10): | ||
self.compute_bi() | ||
self.compute_bu() | ||
self.compute_bui() | ||
|
||
def compute_bi(self): | ||
# bi = (rui - mi - bu) / (regBi + number of interactions) | ||
self.bi = dict() | ||
|
||
for item in self.train_items: | ||
cont = 0 | ||
for user in self.train_di[item]: | ||
self.bi[item] = self.bi.get(item, 0) + float(self.train_feedback[user][item]) - \ | ||
self.train_mean_rates - self.bu.get(user, 0) | ||
cont += 1 | ||
if cont > 1: | ||
self.bi[item] = float(self.bi[item]) / float(self.regBi + cont) | ||
|
||
def compute_bu(self): | ||
# bu = (rui - mi - bi) / (regBu + number of interactions) | ||
self.bu = dict() | ||
for user in self.train_users: | ||
cont = 0 | ||
for item in self.train_du[user]: | ||
self.bu[user] = self.bu.get(user, 0) + float(self.train_feedback[user][item]) - \ | ||
self.train_mean_rates - self.bi.get(item, 0) | ||
cont += 1 | ||
if cont > 1: | ||
self.bu[user] = float(self.bu[user]) / float(self.regBu + cont) | ||
|
||
def compute_bui(self): | ||
# bui = mi + bu + bi | ||
for user in self.train_users: | ||
for item in self.train_items: | ||
self.bui.setdefault(user, {}).update({item: self.train_mean_rates + self.bu[user] + self.bi[item]}) | ||
del self.bu | ||
del self.bi | ||
from utils.write_file import WriteFile | ||
|
||
|
||
class RatingPrediction(object): | ||
def __init__(self, train_file, recommender, test_file=None, prediction_file=None, similarity_metric="correlation", | ||
neighbors=30, distance_matrix=None, space_type="\t"): | ||
self.recommender = str(recommender) | ||
self.predictions = list() | ||
self.train_set = ReadFile(train_file).rating_prediction() | ||
if test_file is not None: | ||
self.test_set = ReadFile(test_file).rating_prediction() | ||
else: | ||
self.test_set = None | ||
|
||
if self.recommender.lower() == "userknn": | ||
self.predictions = UserKNN(self.train_set, self.test_set, similarity_metric=similarity_metric, | ||
neighbors=neighbors) | ||
elif self.recommender.lower() == "itemknn": | ||
self.predictions = ItemKNN(self.train_set, self.test_set, similarity_metric=similarity_metric, | ||
neighbors=neighbors) | ||
elif self.recommender.lower() == "itemattributeknn": | ||
if distance_matrix is not None: | ||
self.predictions = ItemAttributeKNN(self.train_set, self.test_set, similarity_metric=similarity_metric, | ||
neighbors=neighbors, distance_matrix_file=distance_matrix) | ||
else: | ||
print("Error: Invalid Distance Matrix!") | ||
elif self.recommender.lower() == "userattributeknn": | ||
if distance_matrix is not None: | ||
self.predictions = UserAttributeKNN(self.train_set, self.test_set, similarity_metric=similarity_metric, | ||
neighbors=neighbors, distance_matrix_file=distance_matrix) | ||
else: | ||
print("Error: Invalid Distance Matrix!") | ||
else: | ||
print("Error: Invalid Recommender!") | ||
|
||
if self.predictions: | ||
WriteFile(prediction_file, self.predictions, space_type) | ||
else: | ||
print("Error: No predictions!") |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# coding=utf-8 | ||
from recommenders.rating_prediction.base_KNN_recommenders import BaseKNNRecommenders | ||
from utils.read_file import ReadFile | ||
|
||
__author__ = 'Arthur Fortes' | ||
|
||
''' | ||
Its philosophy is as follows: in order to determine the rating of User u on Movie m, we can find other movies that are | ||
similar to Movie m, and based on User u’s ratings on those similar movies we infer his rating on Movie m. | ||
More details: http://cs229.stanford.edu/proj2008/Wen-RecommendationSystemBasedOnCollaborativeFiltering.pdf | ||
''' | ||
|
||
|
||
class ItemAttributeKNN(BaseKNNRecommenders): | ||
def __init__(self, train_set, test_set, distance_matrix_file, similarity_metric="correlation", neighbors=30): | ||
BaseKNNRecommenders.__init__(self, train_set, test_set) | ||
self.k = neighbors | ||
self.distance_matrix_file = distance_matrix_file | ||
self.similarity_metric = similarity_metric | ||
self.predictions = list() | ||
|
||
self.di_matrix = ReadFile(self.distance_matrix_file).read_matrix() | ||
del self.matrix | ||
|
||
# methods | ||
self.train_baselines() | ||
self.predict() | ||
|
||
def predict(self): | ||
if self.test is not None: | ||
for user in self.test['users']: | ||
for item_j in self.test['feedback'][user]: | ||
list_n = list() | ||
try: | ||
ruj = 0 | ||
sum_sim = 0 | ||
for item_i in self.train['feedback'][user]: | ||
try: | ||
sim = self.di_matrix[self.map_items[item_i]][self.map_items[item_j]] | ||
except KeyError: | ||
sim = 0 | ||
list_n.append((item_i, sim)) | ||
list_n = sorted(list_n, key=lambda x: -x[1]) | ||
|
||
for pair in list_n[:self.k]: | ||
ruj += (self.train['feedback'][user][pair[0]] - self.bui[user][pair[0]]) * pair[1] | ||
sum_sim += pair[1] | ||
ruj = self.bui[user][item_j] + (ruj / sum_sim) | ||
if ruj > 5: | ||
ruj = 5.0 | ||
if ruj < 0.5: | ||
ruj = 0.5 | ||
self.predictions.append((user, item_j, ruj)) | ||
except KeyError: | ||
pass |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.