In [56]:
from math import sqrt, log, log10, log2
from operator import itemgetter

import numpy as np
from scipy.linalg import svd
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from sklearn import metrics

In [76]:
class OCCF:
    def __init__(self, records_train, records_test):
        records = np.vstack([records_train, records_test])

        self.n = records[:, 0].max() + 1
        self.m = records[:, 1].max() + 1
        
        # Initial R
        self.R = np.zeros([self.n, self.m])
        for record in records_train:
            if record[2] < 4:
                continue
            self.R[record[0], record[1]] = 1

        # Initial R_test
        self.R_test = np.zeros([self.n, self.m])
        for record in records_test:
            if record[2] < 4:
                continue
            self.R_test[record[0], record[1]] = 1
        
        # Initial indicator
        y = np.where(self.R, 1, 0)
        y_user = np.sum(y, axis=1)
        y_item = np.sum(y, axis=0)
        
        y_test = np.where(self.R_test, 1, 0)
        y_user_test = np.sum(y_test, axis=1)
        y_item_test = np.sum(y_test, axis=0)

        # Global average of rating
        self.mu = np.sum(y) / self.n / self.m

        # bias of item
        self.b_i = np.where(y_item,
                            y_item / self.n - self.mu,
                            -10)

        
        # Initialize I
        self.i_rec = np.zeros([self.n, K], dtype=np.int32)
        for user in range(self.n):
            items = np.where(self.R[user, :] == 0)[0]
            self.i_rec[user] = sorted(items, key=lambda x: self.b_i[x], reverse=True)[:K]
        
        self.users_test = np.nonzero(y_user_test)[0]

        # precision
        self.pre = 0
        self.pre_u = np.zeros(self.n)
        for user in self.users_test:
            self.pre_u[user] = self.R_test[user, self.i_rec[user]].sum() / K
            self.pre += self.R_test[user, self.i_rec[user]].sum() / K
        self.pre /= len(self.users_test)
        print(round(self.pre, 4))

        # recall
        self.rec = 0
        self.rec_u = np.zeros(self.n)
        for user in self.users_test:
            self.rec_u[user] = self.R_test[user, self.i_rec[user]].sum() / y_user_test[user].sum()
            self.rec += self.R_test[user, self.i_rec[user]].sum() / y_user_test[user].sum()
        self.rec /= len(self.users_test)
        print(round(self.rec, 4))

        # F1 score
        self.f1_score = 0
        for user in self.users_test:
            self.f1_score += 2 / (1 / self.pre_u[user] + 1 / self.rec_u[user])
        self.f1_score /= len(self.users_test)
        print(round(self.f1_score, 4))

        # NDCG
        self.NDCG = 0
        log_recip = np.array([1 / log(i + 2) for i in range(K)])

        for user in self.users_test:
            self.NDCG += self.R_test[user, self.i_rec[user]].dot(log_recip) / log_recip[:K].sum()
            # self.NDCG += self.R_test[user, self.i_rec[user]].dot(log_recip) / log_recip[:min(K, len(self.i_rec[user]))].sum()
        self.NDCG /= len(self.users_test)
        print(round(self.NDCG, 4))

        # 1-call
        self.one_call = 0

        for user in self.users_test:
            self.one_call += self.R_test[user, self.i_rec[user]].sum() > 0
            # self.NDCG += self.R_test[user, self.i_rec[user]].dot(log_recip) / log_recip[:min(K, len(self.i_rec[user]))].sum()
        self.one_call /= len(self.users_test)
        print(round(self.one_call, 4))

        # MRR
        self.MRR = 0
        for user in self.users_test:
            items = np.where(self.R[user, :] == 0)[0]
            tmp = sorted(items, key=lambda x: self.b_i[x], reverse=True)
            for k in range(len(tmp)):
                if self.R_test[user, tmp[k]] > 0:
                    self.MRR += 1 / (k + 1)
                    break
            # self.NDCG += self.R_test[user, self.i_rec[user]].dot(log_recip) / log_recip[:min(K, len(self.i_rec[user]))].sum()
        self.MRR /= len(self.users_test)
        print(round(self.MRR, 4))
        
        # MAP
        self.MAP = 0
        for user in self.users_test:
            items = np.where(self.R[user, :] == 0)[0]
            tmp = sorted(items, key=lambda x: self.b_i[x], reverse=True)
            sum = 0
            for k in range(len(tmp)):
                if self.R_test[user, tmp[k]] > 0:
                    sum += 1
                    self.MAP += sum / (k + 1) / y_user_test[user].sum()
        self.MAP /= len(self.users_test)
        print(round(self.MAP, 4))
        
        # ARP
        self.ARP = 0
        for user in self.users_test:
            items = np.where(self.R[user, :] == 0)[0]
            tmp = sorted(items, key=lambda x: self.b_i[x], reverse=True)
            for k in range(len(tmp)):
                if self.R_test[user, tmp[k]] > 0:
                    self.ARP += (k + 1) / (self.m - y_user[user].sum()) / y_user_test[user].sum()
        self.ARP /= len(self.users_test)
        print(round(self.ARP, 4))

        # AUC
        self.AUC = 0
        for user in self.users_test:
            items = np.where(self.R[user, :] == 0)[0]
            tmp = sorted(items, key=lambda x: self.b_i[x], reverse=True)
            sum = 0
            res = 0
            for k in range(len(tmp)):
                if self.R_test[user, tmp[k]] < 1:
                    res += sum
                else:
                    sum += 1
            self.AUC += res / sum / (len(tmp) - sum)
        self.AUC /= len(self.users_test)
        print(round(self.AUC, 4))
        
    def get_pre(self, user):
        items = np.zeros(self.R[user])[0]
        # rannp.argsort(self.b_i[items])[::-1][:K]
        

In [77]:
# Load the records
records_train = np.loadtxt('../data/ml-100k/u1.base', dtype=np.int32)
records_test = np.loadtxt('../data/ml-100k/u1.test', dtype=np.int32)

# Preprocess
records_train[:, :2] -= 1
records_test[:, :2] -= 1

K = 5

In [78]:
occf = OCCF(records_train, records_test)


0.2338
0.0571
0.0775
0.2532
0.5877
0.4657
0.1516
0.1327
0.8734




ValueError: could not broadcast input array from shape (2) into shape (3)