Skip to content

Commit

Permalink
Change implementation:
Browse files Browse the repository at this point in the history
userknn
itemknn
userattributeknn
itemattributeknn
  • Loading branch information
arthurfortes committed Jun 3, 2016
1 parent c3a6f53 commit 751570c
Show file tree
Hide file tree
Showing 10 changed files with 295 additions and 263 deletions.
66 changes: 66 additions & 0 deletions recommenders/rating_prediction/base_KNN_recommenders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import numpy as np

__author__ = 'Arthur Fortes'


class BaseKNNRecommenders(object):
def __init__(self, train_set, test_set):
self.train = train_set
self.test = test_set
self.regBi = 10
self.regBu = 15
self.bu = dict()
self.bi = dict()
self.bui = dict()
self.matrix = np.zeros((len(self.train['users']), len(self.train['items'])))
self.map_items = dict()
self.map_users = dict()

for item_id, item in enumerate(self.train['items']):
self.map_items[item] = item_id

for user_id, user in enumerate(self.train['users']):
self.map_users[user] = user_id

for u, user in enumerate(self.train['users']):
for item in self.train['feedback'][user]:
self.matrix[u][self.map_items[item]] = self.train['feedback'][user][item]

def train_baselines(self):
for i in xrange(10):
self.compute_bi()
self.compute_bu()
self.compute_bui()

def compute_bi(self):
# bi = (rui - mi - bu) / (regBi + number of interactions)
self.bi = dict()

for item in self.train['items']:
cont = 0
for user in self.train['di'][item]:
self.bi[item] = self.bi.get(item, 0) + float(self.train['feedback'][user][item]) - \
self.train['mean_rates'] - self.bu.get(user, 0)
cont += 1
if cont > 1:
self.bi[item] = float(self.bi[item]) / float(self.regBi + cont)

def compute_bu(self):
# bu = (rui - mi - bi) / (regBu + number of interactions)
self.bu = dict()
for user in self.train['users']:
cont = 0
for item in self.train['du'][user]:
self.bu[user] = self.bu.get(user, 0) + float(self.train['feedback'][user][item]) - \
self.train['mean_rates'] - self.bi.get(item, 0)
cont += 1
if cont > 1:
self.bu[user] = float(self.bu[user]) / float(self.regBu + cont)

def compute_bui(self):
# bui = mi + bu + bi
for user in self.train['users']:
for item in self.train['items']:
self.bui.setdefault(user, {}).update({item: self.train['mean_rates'] + self.bu[user] + self.bi[item]})
del self.bu
del self.bi
118 changes: 46 additions & 72 deletions recommenders/rating_prediction/base_rating_prediction.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,47 @@
import numpy as np
"""
test
"""
from recommenders.rating_prediction.item_attribute_knn import ItemAttributeKNN
from recommenders.rating_prediction.itemknn import ItemKNN
from recommenders.rating_prediction.user_attribute_knn import UserAttributeKNN
from recommenders.rating_prediction.userknn import UserKNN
from utils.read_file import ReadFile

__author__ = 'Arthur Fortes'


class BaseRatingPrediction(object):
def __init__(self, train_file, test_file="", space_type="\t"):
self.space_type = space_type
self.train_file = train_file
self.test_file = test_file
self.train_feedback, self.train_users, self.train_items, self.train_du, \
self.train_di, self.train_mean_rates = ReadFile(self.train_file).rating_prediction()
self.test_feedback, self.test_users, self.test_items, self.test_du, \
self.test_di, self.test_mean_rates = ReadFile(self.test_file).rating_prediction()

self.regBi = 10
self.regBu = 15
self.bu = dict()
self.bi = dict()
self.bui = dict()
self.matrix = np.zeros((len(self.train_users), len(self.train_items)))
self.map_items = dict()
self.map_users = dict()

for item_id, item in enumerate(self.train_items):
self.map_items[item] = item_id

for user_id, user in enumerate(self.train_users):
self.map_users[user] = user_id

for u, user in enumerate(self.train_users):
for item in self.train_feedback[user]:
self.matrix[u][self.map_items[item]] = self.train_feedback[user][item]

def train_baselines(self):
for i in xrange(10):
self.compute_bi()
self.compute_bu()
self.compute_bui()

def compute_bi(self):
# bi = (rui - mi - bu) / (regBi + number of interactions)
self.bi = dict()

for item in self.train_items:
cont = 0
for user in self.train_di[item]:
self.bi[item] = self.bi.get(item, 0) + float(self.train_feedback[user][item]) - \
self.train_mean_rates - self.bu.get(user, 0)
cont += 1
if cont > 1:
self.bi[item] = float(self.bi[item]) / float(self.regBi + cont)

def compute_bu(self):
# bu = (rui - mi - bi) / (regBu + number of interactions)
self.bu = dict()
for user in self.train_users:
cont = 0
for item in self.train_du[user]:
self.bu[user] = self.bu.get(user, 0) + float(self.train_feedback[user][item]) - \
self.train_mean_rates - self.bi.get(item, 0)
cont += 1
if cont > 1:
self.bu[user] = float(self.bu[user]) / float(self.regBu + cont)

def compute_bui(self):
# bui = mi + bu + bi
for user in self.train_users:
for item in self.train_items:
self.bui.setdefault(user, {}).update({item: self.train_mean_rates + self.bu[user] + self.bi[item]})
del self.bu
del self.bi
from utils.write_file import WriteFile


class RatingPrediction(object):
def __init__(self, train_file, recommender, test_file=None, prediction_file=None, similarity_metric="correlation",
neighbors=30, distance_matrix=None, space_type="\t"):
self.recommender = str(recommender)
self.predictions = list()
self.train_set = ReadFile(train_file).rating_prediction()
if test_file is not None:
self.test_set = ReadFile(test_file).rating_prediction()
else:
self.test_set = None

if self.recommender.lower() == "userknn":
self.predictions = UserKNN(self.train_set, self.test_set, similarity_metric=similarity_metric,
neighbors=neighbors)
elif self.recommender.lower() == "itemknn":
self.predictions = ItemKNN(self.train_set, self.test_set, similarity_metric=similarity_metric,
neighbors=neighbors)
elif self.recommender.lower() == "itemattributeknn":
if distance_matrix is not None:
self.predictions = ItemAttributeKNN(self.train_set, self.test_set, similarity_metric=similarity_metric,
neighbors=neighbors, distance_matrix_file=distance_matrix)
else:
print("Error: Invalid Distance Matrix!")
elif self.recommender.lower() == "userattributeknn":
if distance_matrix is not None:
self.predictions = UserAttributeKNN(self.train_set, self.test_set, similarity_metric=similarity_metric,
neighbors=neighbors, distance_matrix_file=distance_matrix)
else:
print("Error: Invalid Distance Matrix!")
else:
print("Error: Invalid Recommender!")

if self.predictions:
WriteFile(prediction_file, self.predictions, space_type)
else:
print("Error: No predictions!")
Empty file.
58 changes: 58 additions & 0 deletions recommenders/rating_prediction/item_attribute_knn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# coding=utf-8
from recommenders.rating_prediction.base_KNN_recommenders import BaseKNNRecommenders
from utils.read_file import ReadFile

__author__ = 'Arthur Fortes'

'''
Its philosophy is as follows: in order to determine the rating of User u on Movie m, we can find other movies that are
similar to Movie m, and based on User u’s ratings on those similar movies we infer his rating on Movie m.
More details: http://cs229.stanford.edu/proj2008/Wen-RecommendationSystemBasedOnCollaborativeFiltering.pdf
'''


class ItemAttributeKNN(BaseKNNRecommenders):
def __init__(self, train_set, test_set, distance_matrix_file, similarity_metric="correlation", neighbors=30):
BaseKNNRecommenders.__init__(self, train_set, test_set)
self.k = neighbors
self.distance_matrix_file = distance_matrix_file
self.similarity_metric = similarity_metric
self.predictions = list()

self.di_matrix = ReadFile(self.distance_matrix_file).read_matrix()
del self.matrix

# methods
self.train_baselines()
self.predict()

def predict(self):
if self.test is not None:
for user in self.test['users']:
for item_j in self.test['feedback'][user]:
list_n = list()
try:
ruj = 0
sum_sim = 0
for item_i in self.train['feedback'][user]:
try:
sim = self.di_matrix[self.map_items[item_i]][self.map_items[item_j]]
except KeyError:
sim = 0
list_n.append((item_i, sim))
list_n = sorted(list_n, key=lambda x: -x[1])

for pair in list_n[:self.k]:
ruj += (self.train['feedback'][user][pair[0]] - self.bui[user][pair[0]]) * pair[1]
sum_sim += pair[1]
ruj = self.bui[user][item_j] + (ruj / sum_sim)
if ruj > 5:
ruj = 5.0
if ruj < 0.5:
ruj = 0.5
self.predictions.append((user, item_j, ruj))
except KeyError:
pass
61 changes: 0 additions & 61 deletions recommenders/rating_prediction/itemattributeknn.py

This file was deleted.

64 changes: 31 additions & 33 deletions recommenders/rating_prediction/itemknn.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# coding=utf-8
from scipy.spatial.distance import squareform, pdist
from recommenders.rating_prediction.base_rating_prediction import BaseRatingPrediction
from recommenders.rating_prediction.base_KNN_recommenders import BaseKNNRecommenders
import numpy as np
from utils.write_file import WriteFile

__author__ = 'Arthur Fortes'

Expand All @@ -16,11 +15,10 @@
'''


class ItemKNN(BaseRatingPrediction):
def __init__(self, train_file, output_file, test_file=None, space_type="\t", similarity_metric="correlation", k=30):
BaseRatingPrediction.__init__(self, train_file, test_file, space_type=space_type)
self.output_file = output_file
self.k = k
class ItemKNN(BaseKNNRecommenders):
def __init__(self, train_set, test_set, similarity_metric="correlation", neighbors=30):
BaseKNNRecommenders.__init__(self, train_set, test_set)
self.k = neighbors
self.similarity_metric = similarity_metric
self.predictions = list()

Expand All @@ -31,31 +29,31 @@ def __init__(self, train_file, output_file, test_file=None, space_type="\t", sim
# methods
self.train_baselines()
self.predict()
WriteFile(self.output_file, self.predictions, self.space_type).write_prediction_file()

def predict(self):
for user in self.test_users:
for item_j in self.test_feedback[user]:
list_n = list()
try:
ruj = 0
sum_sim = 0
for item_i in self.train_feedback[user]:
try:
sim = self.di_matrix[self.map_items[item_i]][self.map_items[item_j]]
except KeyError:
sim = 0
list_n.append((item_i, sim))
list_n = sorted(list_n, key=lambda x: -x[1])

for pair in list_n[:self.k]:
ruj += (self.train_feedback[user][pair[0]] - self.bui[user][pair[0]]) * pair[1]
sum_sim += pair[1]
ruj = self.bui[user][item_j] + (ruj / sum_sim)
if ruj > 5:
ruj = 5.0
if ruj < 0.5:
ruj = 0.5
self.predictions.append((user, item_j, ruj))
except KeyError:
pass
if self.test is not None:
for user in self.test['users']:
for item_j in self.test['feedback'][user]:
list_n = list()
try:
ruj = 0
sum_sim = 0
for item_i in self.train['feedback'][user]:
try:
sim = self.di_matrix[self.map_items[item_i]][self.map_items[item_j]]
except KeyError:
sim = 0
list_n.append((item_i, sim))
list_n = sorted(list_n, key=lambda x: -x[1])

for pair in list_n[:self.k]:
ruj += (self.train['feedback'][user][pair[0]] - self.bui[user][pair[0]]) * pair[1]
sum_sim += pair[1]
ruj = self.bui[user][item_j] + (ruj / sum_sim)
if ruj > 5:
ruj = 5.0
if ruj < 0.5:
ruj = 0.5
self.predictions.append((user, item_j, ruj))
except KeyError:
pass

0 comments on commit 751570c

Please sign in to comment.