In [1]:
import numpy as np
import scipy as sy
import pandas as pd
import matplotlib.pyplot as plt
import time
import sys
import os
import ast
import json
import copy 
import itertools
from itertools import islice
from tqdm import *
from math import ceil
from collections import defaultdict
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import roc_auc_score, accuracy_score
from scipy.sparse import csr_matrix, dok_matrix
from ipynb.fs.full.Random_Sample_Mapper import *

# Read In Data

In [2]:
notebook_path = os.path.abspath("BPR_OPT_Binary_Model.ipynb")
users_items_file_path = os.path.join(os.path.dirname(notebook_path), "data/australian_users_items.json")
items_file_path = os.path.join(os.path.dirname(notebook_path), "data/items_meta_data.json")
users_meta_data_file_path = os.path.join(os.path.dirname(notebook_path), "data/users_meta_data.json")

In [3]:
users_items = []
with open(users_items_file_path, 'r') as data:
    for line in data:
        users_items.append(ast.literal_eval(line))

In [4]:
with open(items_file_path, 'r') as data:
    games_dict = json.load(data)

In [5]:
with open(users_meta_data_file_path, 'r') as file:
    users_meta_data = json.load(file)

# Process Data into Training and Testing Sets

Using default dict for efficient data retrieval for users-items playtime relationship

In [6]:
usersPerItem = defaultdict(set)
itemsPerUser = defaultdict(set)
playtimesPerItem = defaultdict(dict)
playtimesPerUser = defaultdict(dict)
itemNames = defaultdict(str)

In [7]:
for game in games_dict:
    if 'owners' in games_dict[game]:
        usersPerItem[game] = set(games_dict[game]['owners'].keys())

In [8]:
for user in users_items:
    u_id = user['user_id']
    items = [item['item_id'] for item in user['items']]
    itemsPerUser[u_id] = items
    playtimesPerUser[user['user_id']] = dict((item['item_id'], item['playtime_forever']) for item in user['items'])
    for item in user['items']:
        itemNames[item['item_id']] = item['item_name']
        playtimesPerItem[item['item_id']][user['user_id']] = item['playtime_forever']

## Scheduled Sampling with Map-Reduce Algorithm

In [9]:
nUsers = len(itemsPerUser)
nItems = len(usersPerItem)
users = list(itemsPerUser.keys())
items = list(usersPerItem.keys())

In [10]:
user_item_counts = dict((k, len(v)) for k, v in itemsPerUser.items())

In [11]:
datafile = 'data/sample_in.tsv'
mapout1 = 'data/sample_map1.tsv'
mapout2 = 'data/sample_map2.tsv'
outfile = 'data/sample_out.tsv'

f = open(datafile,'w')
for u, its in itemsPerUser.items():
    for i in its:
        print(default_formatter(u,i), file=f)
f.close()

In [12]:
# run two stages of mapreduce
mapper = Mapper(user_item_counts)
mapreduce(datafile, mapout1, mapper=mapper, reducer=reducer)
mapreduce(datafile, mapout2, mapper=indicator_mapper)  # map the data again
mapreduce([mapout1, mapout2], outfile, reducer=indicator_reducer)

In [13]:
def trim(u, i, j):
    return u[1:len(u)-1], i[2:len(i)-2], j[1:len(j)-2]

In [14]:
def create_data(filepath):
    f = open(filepath)
    samples = [map(str, line.strip().split()) for line in f]
    return [trim(u, i, j) for u, i, j in samples]

In [15]:
def create_random_batches(data, batch_size=1024):
    batches = []
    random.shuffle(data)
    num_batches = int(len(data)/batch_size)
    for i in range(num_batches):
        mini_batch = data[i*batch_size:(i+1)*batch_size]
        batches.append(mini_batch)
    return batches

In [16]:
data = create_data(outfile)

In [17]:
train_data, test_data = train_test_split(data, test_size=0.2)

# Utility Functions

In [18]:
def inner(x, y):
    return sum([a*b for a,b in zip(x,y)])

In [19]:
def binary_label(u, i, j):
    c = Counter(itemsPerUser[u])
    if c[i] >= c[j]:
        return 1
    else:
        return 0

In [20]:
def generate_outputs(sample):
    predictions = []
    labels = []
    for u, i, j in sample:
        predict = sigmoid(prediction(u, i, j))
        label = binary_label(u, i, j)
        predictions.append(predict)
        labels.append(label)
            
    return np.rint(predictions), labels

In [21]:
def accuracy(predictions, labels):
    differences = [1 if x == y else 0 for x, y in zip(predictions, labels)]
    return sum(differences) / len(differences)

In [22]:
train_labels = [binary_label(u, i, j) for u, i, j in train_data]

In [23]:
test_labels = [binary_label(u, i, j) for u, i, j in test_data]

In [24]:
mini_batches = create_random_batches(train_data)

## Sigmoid Function

\begin{equation*}
\sigma(x) = \frac{1}{1 + e^{-x}}
\end{equation*}

In [25]:
def sigmoid(x):
    #Numerically stable sigmoid function.
    #Taken from: https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/
    if x >= 0:
        z = np.exp(-x)
        return 1 / (1 + z)
    else:
        # if x is less than zero then z will be small, denom can't be
        # zero because it's 1+z.
        z = np.exp(x)
        return z / (1 + z)

# Simple (Biase Only) Latent Factor Model with Binary Classification

In [None]:
itemBiases = defaultdict(float)

In [None]:
def unpack(theta):
    global itemBiases
    itemBiases = dict(zip(items, theta))

## Prediction Function

\begin{equation*}
f(i, j) = \beta_i - \beta_j
\end{equation*}

\begin{equation*}
p(i >_u j) = \sigma(f(i, j))
\end{equation*}

In [None]:
def prediction(u, item_i, item_j):
    return itemBiases[item_i] - itemBiases[item_j]

\begin{equation*}
\text{Cost Function (arg min)}:= \sum_{u,i,j} -\ln(\sigma(\beta_i - \beta_j)) + \lambda \sum_i \beta_i^2 = \sum_{u,i,j} -ln\left( \frac{1}{1 + e^{\beta_j - \beta_i}} \right) + \lambda \sum_i \beta_i^2
\end{equation*}

## Cost Function

In [None]:
class Cost(object):
    def __init__(self, max_iter=20000):
        self.max_iter = max_iter
        self.nit = 0
        
    def __call__(self, theta, lamb):
        if self.nit >= self.max_iter:
            raise RuntimeError("max iteration reached")
        self.nit += 1
        unpack(theta)
        cost = 0.0
        predictions = []
        for u, i, j in train_data:
            x = prediction(u, i, j)
            predictions.append(sigmoid(x))
            cost += np.log(sigmoid(x))

        for i in itemBiases:
            cost -= lamb*itemBiases[i]**2

        print('iteration {0} Cost: {1}'.format(self.nit, -cost))
        print('iteration {0} Training Accuracy: {1}'.format(self.nit, accuracy(np.rint(predictions), train_labels[:500000])))
        print('-------------------------------------------------------------------')

        return -cost

In [None]:
cost = Cost()

\begin{equation*}
\frac{\partial }{\partial x} ln\sigma(x) = \frac{1}{1 + e^x} = \sigma(-x)
\end{equation*}

## Partial Derivatives

\begin{equation*}
\frac{\partial f}{\partial \beta_i} = -\frac{e^{\beta_j - \beta_i}}{1 + e^{\beta_j - \beta_i}} + 2 \lambda \beta_j = -\frac{1}{1 + e^{\beta_i - \beta_j}} + 2 \lambda \beta_j
\end{equation*}

\begin{equation*}
\frac{\partial f}{\partial \beta_j} = \frac{e^{\beta_j - \beta_i}}{1 + e^{\beta_j - \beta_i}} + 2 \lambda \beta_j = \frac{1}{1 + e^{\beta_i - \beta_j}} + 2 \lambda \beta_j
\end{equation*}

### [Important]: switch the sign of all partial derivatives to compute your gradient ascent

In [None]:
def derivative(theta, lamb):
    unpack(theta)
    dItemBiases = defaultdict(float)
    for u, i, j in train_data:
        x = prediction(u, i, j)
        dbase = 1 / (1 + np.exp(x)) # negative gradient descent for maximizing
        dItemBiases[i] += -dbase
        dItemBiases[j] += dbase
    for i in itemBiases:
        dItemBiases[i] += 2*lamb*itemBiases[i]
    dtheta = [dItemBiases[i] for i in items]
    return np.array(dtheta)

In [None]:
res, f, d = sy.optimize.fmin_l_bfgs_b(cost, [0.0]*nItems, derivative, args=[0.001])

iteration 1 Cost: 881087.1071146384
iteration 1 Training Accuracy: 0.0
-------------------------------------------------------------------
iteration 2 Cost: 870261.4208830316
iteration 2 Training Accuracy: 0.567466
-------------------------------------------------------------------
iteration 3 Cost: 850370.8177994409
iteration 3 Training Accuracy: 0.575028
-------------------------------------------------------------------
iteration 4 Cost: 848075.8628756106
iteration 4 Training Accuracy: 0.579118
-------------------------------------------------------------------
iteration 5 Cost: 846492.8089477586
iteration 5 Training Accuracy: 0.58264
-------------------------------------------------------------------
iteration 6 Cost: 845749.7782593477
iteration 6 Training Accuracy: 0.584818
-------------------------------------------------------------------
iteration 7 Cost: 845136.7970781651
iteration 7 Training Accuracy: 0.585248
------------------------------------------------------------------

iteration 58 Cost: 840919.034125573
iteration 58 Training Accuracy: 0.588972
-------------------------------------------------------------------
iteration 59 Cost: 840893.4818122418
iteration 59 Training Accuracy: 0.588934
-------------------------------------------------------------------
iteration 60 Cost: 840877.4096184314
iteration 60 Training Accuracy: 0.588734
-------------------------------------------------------------------
iteration 61 Cost: 840852.1598087591
iteration 61 Training Accuracy: 0.588778
-------------------------------------------------------------------
iteration 62 Cost: 840835.7631463446
iteration 62 Training Accuracy: 0.588744
-------------------------------------------------------------------
iteration 63 Cost: 840819.0969745023
iteration 63 Training Accuracy: 0.588784
-------------------------------------------------------------------
iteration 64 Cost: 840803.8761601396
iteration 64 Training Accuracy: 0.58864
------------------------------------------------

iteration 115 Cost: 840313.2397350784
iteration 115 Training Accuracy: 0.589104
-------------------------------------------------------------------
iteration 116 Cost: 840306.3733586288
iteration 116 Training Accuracy: 0.589192
-------------------------------------------------------------------
iteration 117 Cost: 840303.7652592479
iteration 117 Training Accuracy: 0.589218
-------------------------------------------------------------------
iteration 118 Cost: 840298.9875828519
iteration 118 Training Accuracy: 0.58921
-------------------------------------------------------------------
iteration 119 Cost: 840293.517240242
iteration 119 Training Accuracy: 0.588994
-------------------------------------------------------------------
iteration 120 Cost: 840285.3878695008
iteration 120 Training Accuracy: 0.588974
-------------------------------------------------------------------
iteration 121 Cost: 840280.3950922288
iteration 121 Training Accuracy: 0.589022
----------------------------------

iteration 171 Cost: 840095.0092199519
iteration 171 Training Accuracy: 0.589164
-------------------------------------------------------------------
iteration 172 Cost: 840093.0040961453
iteration 172 Training Accuracy: 0.589158
-------------------------------------------------------------------
iteration 173 Cost: 840089.1974416305
iteration 173 Training Accuracy: 0.58907
-------------------------------------------------------------------
iteration 174 Cost: 840087.764886249
iteration 174 Training Accuracy: 0.589084
-------------------------------------------------------------------
iteration 175 Cost: 840085.7097639256
iteration 175 Training Accuracy: 0.58909
-------------------------------------------------------------------
iteration 176 Cost: 840083.2408641488
iteration 176 Training Accuracy: 0.589046
-------------------------------------------------------------------
iteration 177 Cost: 840081.8898711638
iteration 177 Training Accuracy: 0.589098
-----------------------------------

iteration 227 Cost: 840026.8367475986
iteration 227 Training Accuracy: 0.589228
-------------------------------------------------------------------
iteration 228 Cost: 840026.3789162773
iteration 228 Training Accuracy: 0.589224
-------------------------------------------------------------------
iteration 229 Cost: 840025.9154739043
iteration 229 Training Accuracy: 0.589174
-------------------------------------------------------------------
iteration 230 Cost: 840025.2738612668
iteration 230 Training Accuracy: 0.589172
-------------------------------------------------------------------
iteration 231 Cost: 840024.9073388462
iteration 231 Training Accuracy: 0.589222
-------------------------------------------------------------------
iteration 232 Cost: 840024.5984547781
iteration 232 Training Accuracy: 0.589196
-------------------------------------------------------------------
iteration 233 Cost: 840024.1329702362
iteration 233 Training Accuracy: 0.58917
---------------------------------

iteration 283 Cost: 840011.6712656767
iteration 283 Training Accuracy: 0.58919
-------------------------------------------------------------------
iteration 284 Cost: 840011.5216297696
iteration 284 Training Accuracy: 0.589172
-------------------------------------------------------------------
iteration 285 Cost: 840011.3882955728
iteration 285 Training Accuracy: 0.589174
-------------------------------------------------------------------
iteration 286 Cost: 840011.2661723107
iteration 286 Training Accuracy: 0.589168
-------------------------------------------------------------------
iteration 287 Cost: 840011.1589454269
iteration 287 Training Accuracy: 0.589186
-------------------------------------------------------------------
iteration 288 Cost: 840011.0150325238
iteration 288 Training Accuracy: 0.589176
-------------------------------------------------------------------
iteration 289 Cost: 840010.883751741
iteration 289 Training Accuracy: 0.58919
-----------------------------------

iteration 339 Cost: 840007.2162709747
iteration 339 Training Accuracy: 0.589208
-------------------------------------------------------------------
iteration 340 Cost: 840007.1562234942
iteration 340 Training Accuracy: 0.589202
-------------------------------------------------------------------
iteration 341 Cost: 840007.0949396899
iteration 341 Training Accuracy: 0.589182
-------------------------------------------------------------------
iteration 342 Cost: 840006.9950149393
iteration 342 Training Accuracy: 0.589148
-------------------------------------------------------------------
iteration 343 Cost: 840007.16666642
iteration 343 Training Accuracy: 0.589138
-------------------------------------------------------------------
iteration 344 Cost: 840006.9457258992
iteration 344 Training Accuracy: 0.58915
-------------------------------------------------------------------
iteration 345 Cost: 840006.8437636556
iteration 345 Training Accuracy: 0.58914
------------------------------------

iteration 395 Cost: 840003.0113779628
iteration 395 Training Accuracy: 0.589176
-------------------------------------------------------------------
iteration 396 Cost: 840002.8986497502
iteration 396 Training Accuracy: 0.589208
-------------------------------------------------------------------
iteration 397 Cost: 840002.79302395
iteration 397 Training Accuracy: 0.589202
-------------------------------------------------------------------
iteration 398 Cost: 840002.7173749305
iteration 398 Training Accuracy: 0.5892
-------------------------------------------------------------------
iteration 399 Cost: 840002.6135034313
iteration 399 Training Accuracy: 0.589192
-------------------------------------------------------------------
iteration 400 Cost: 840002.5395778226
iteration 400 Training Accuracy: 0.589202
-------------------------------------------------------------------
iteration 401 Cost: 840002.413509608
iteration 401 Training Accuracy: 0.589252
-------------------------------------

iteration 451 Cost: 839998.1175208016
iteration 451 Training Accuracy: 0.589184
-------------------------------------------------------------------
iteration 452 Cost: 839998.0758058855
iteration 452 Training Accuracy: 0.589202
-------------------------------------------------------------------
iteration 453 Cost: 839998.0349010329
iteration 453 Training Accuracy: 0.589208
-------------------------------------------------------------------
iteration 454 Cost: 839997.9612166561
iteration 454 Training Accuracy: 0.589254
-------------------------------------------------------------------
iteration 455 Cost: 839998.1135431026
iteration 455 Training Accuracy: 0.58924
-------------------------------------------------------------------
iteration 456 Cost: 839997.9263754004
iteration 456 Training Accuracy: 0.589254
-------------------------------------------------------------------
iteration 457 Cost: 839997.8600480363
iteration 457 Training Accuracy: 0.58921
----------------------------------

iteration 507 Cost: 839996.721239212
iteration 507 Training Accuracy: 0.589154
-------------------------------------------------------------------
iteration 508 Cost: 839996.7119930045
iteration 508 Training Accuracy: 0.589164
-------------------------------------------------------------------
iteration 509 Cost: 839996.7038688922
iteration 509 Training Accuracy: 0.58918
-------------------------------------------------------------------
iteration 510 Cost: 839996.6968813569
iteration 510 Training Accuracy: 0.589168
-------------------------------------------------------------------
iteration 511 Cost: 839996.6862402176
iteration 511 Training Accuracy: 0.589156
-------------------------------------------------------------------
iteration 512 Cost: 839996.6792741091
iteration 512 Training Accuracy: 0.58915
-------------------------------------------------------------------
iteration 513 Cost: 839996.6727431025
iteration 513 Training Accuracy: 0.589148
-----------------------------------

iteration 563 Cost: 839996.3969821469
iteration 563 Training Accuracy: 0.589164
-------------------------------------------------------------------
iteration 564 Cost: 839996.391252431
iteration 564 Training Accuracy: 0.589168
-------------------------------------------------------------------
iteration 565 Cost: 839996.3896138205
iteration 565 Training Accuracy: 0.58919
-------------------------------------------------------------------


In [None]:
d

{'grad': array([ 0.00367678, -0.0169391 ,  0.01158269, ..., -0.00479811,
         0.        , -0.00378774]),
 'task': b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH',
 'funcalls': 565,
 'nit': 552,
 'warnflag': 0}

In [None]:
unpack(res)

In [None]:
predictions, labels = generate_outputs(test_data)

In [None]:
accuracy(predictions, labels)

0.5771808701453179

# Complete Latent Factor Model with Binary Classification

A simple non-biased latent factor model that is wrapped into a binary function (sigmoid function) as a base line model, using popularity as the item's sole feature

In [None]:
itemBiases = defaultdict(float)

For each user and item we now have a low dimensional descriptor (representing that user's preferences, and that item's properties), of dimension K.

In [None]:
userGamma = {}
itemGamma = {}

In [None]:
K = 10

In [None]:
for u in itemsPerUser:
    userGamma[u] = [random.random() * 0.1 - 0.05 for k in range(K)]

In [None]:
for i in usersPerItem:
    itemGamma[i] = [random.random() * 0.1 - 0.05 for k in range(K)]

We'll use another library in this example to perform gradient descent. This library requires that we pass it a "flat" parameter vector (theta) containing all of our parameters. This utility function just converts between a flat feature vector, and our model parameters, i.e., it "unpacks" theta into our offset and bias parameters.

In [None]:
def unpack(theta):
    global itemBiases
    global userGamma
    global itemGamma
    index = 0
    itemBiases = dict(zip(items, theta[0:index + nItems]))
    index += nItems
    for u in users:
        userGamma[u] = theta[index:index + K]
        index += K
    for i in items:
        itemGamma[i] = theta[index:index + K]
        index += K

## Prediction Function

\begin{equation*}
f(u, i, j) = \gamma_u \gamma_i + \beta_i - (\gamma_u \gamma_j + \beta_j)
\end{equation*}

\begin{equation*}
p(i >_u j) = \sigma(f(u, i, j))
\end{equation*}

In [None]:
def prediction(user, item_i, item_j):
    return inner(userGamma[user], itemGamma[item_i]) + itemBiases[item_i] - (inner(userGamma[user], itemGamma[item_j]) + itemBiases[item_j]) 

## Cost Function

\begin{equation*}
\text{Cost Function (arg min)}:= \sum_{u,i,j} -\ln(\sigma(\gamma_u \gamma_i + \beta_i - (\gamma_u \gamma_j + \beta_j))) + \lambda [ \sum_u \beta_u^2 + \sum_i \beta_i^2 + \sum_i ||\gamma_i||_2^2 + \sum_u ||\gamma_u||_2^2 ]
\end{equation*}

\begin{equation*}
\sum_{u,i,j} \ln(\sigma(\gamma_u \gamma_i + \beta_i - (\gamma_u \gamma_j + \beta_j))) = \sum_{u,i,j} ln\left( \frac{1}{1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}} \right)
\end{equation*}

In [None]:
def cost(theta, lamb):
    unpack(theta)
    cost = 0.0
    predictions = []
    for u, i, j in train_data:
        x = prediction(u, i, j)
        predictions.append(sigmoid(x))
        cost += np.log(sigmoid(x))
        
    for u in users:
        for k in range(K):
            cost -= lamb*userGamma[u][k]**2
    for i in items:
        cost -= lamb*itemBiases[i]**2
        for k in range(K):
            cost -= lamb*itemGamma[i][k]**2
        
    print('Current Cost: %s' % -cost)
    print('Current Training Accuracy: %s' % accuracy(predictions, train_labels))
    print('-------------------------------------------------------------------')
        
    return -cost

## Partial Derivatives

\begin{equation*}
\frac{\partial f}{\partial \gamma_{u,k}} = \frac{(\gamma_{j,k} - \gamma_{i,k}) \cdot e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}{1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}} + 2 \lambda \gamma_{i,k} 
\end{equation*}

\begin{equation*}
\frac{\partial f}{\partial \gamma_{i,k}} = -\frac{\gamma_{u,k} \cdot e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}{1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}} + 2 \lambda \gamma_{i,k}
\end{equation*}

\begin{equation*}
\frac{\partial f}{\partial \gamma_{j,k}} = \frac{\gamma_{u,k} \cdot e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}{1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}} + 2 \lambda \gamma_{j,k}
\end{equation*}

\begin{equation*}
\frac{\partial f}{\partial \beta_i} = -\frac{e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}{1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}} + 2 \lambda \beta_i
\end{equation*}

\begin{equation*}
\frac{\partial f}{\partial \beta_j} = \frac{e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}{1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}} + 2 \lambda \beta_j
\end{equation*}

In [None]:
def derivative(theta, lamb):
    unpack(theta)
    dItemBiases = defaultdict(float)
    dUserGamma = {}
    dItemGamma = {}
    for u in users:
        dUserGamma[u] = [0.0 for k in range(K)]
    for i in items:
        dItemGamma[i] = [0.0 for k in range(K)]
    for u, i, j in train_data:
        x = prediction(u, i ,j)
        dbase = 1 / (1 + np.exp(x))
        dItemBiases[i] += -dbase
        dItemBiases[j] += dbase
        for k in range(K):
            dUserGamma[u][k] += (itemGamma[j][k] - itemGamma[i][k]) * dbase
            dItemGamma_k = userGamma[u][k] * dbase
            dItemGamma[i][k] += -dItemGamma_k
            dItemGamma[j][k] += dItemGamma_k
    for u in userGamma:
        for k in range(K):
            dUserGamma[u][k] += 2*lamb*userGamma[u][k]
    for i in itemBiases:
        dItemBiases[i] += 2*lamb*itemBiases[i]
        for k in range(K):
            dItemGamma[i][k] += 2*lamb*itemGamma[i][k]
    dtheta = [dItemBiases[i] for i in items]
    for u in users:
        dtheta += dUserGamma[u]
    for i in items:
        dtheta += dItemGamma[i]
    return np.array(dtheta)

In [None]:
complete_res, complete_f, complete_d = sy.optimize.fmin_l_bfgs_b(cost, 
                                [0.0]*nItems + # Initialize beta
                                [random.random() * 0.1 - 0.05 for k in range(K*(nUsers + nItems))], # Gamma
                                derivative, args=[0.001])

Current Cost: 881091.0857679009
Current Training Accuracy: 0.0
-------------------------------------------------------------------
Current Cost: 870264.9239619058
Current Training Accuracy: 0.0
-------------------------------------------------------------------
Current Cost: 850367.2754800008
Current Training Accuracy: 0.0
-------------------------------------------------------------------
Current Cost: 848066.2511377814
Current Training Accuracy: 0.0
-------------------------------------------------------------------
Current Cost: 846469.3843631307
Current Training Accuracy: 0.0
-------------------------------------------------------------------
Current Cost: 845704.1224281399
Current Training Accuracy: 0.0
-------------------------------------------------------------------
Current Cost: 845070.0794451325
Current Training Accuracy: 0.0
-------------------------------------------------------------------
Current Cost: 844755.0600726622
Current Training Accuracy: 0.0
--------------------

Current Cost: 214585.6889601349
Current Training Accuracy: 0.04800336705634312
-------------------------------------------------------------------
Current Cost: 212615.76911122346
Current Training Accuracy: 0.04963812011265478
-------------------------------------------------------------------
Current Cost: 210446.8919273968
Current Training Accuracy: 0.05074814733231588
-------------------------------------------------------------------
Current Cost: 208161.6015739953
Current Training Accuracy: 0.05348270056799408
-------------------------------------------------------------------
Current Cost: 206322.78371357266
Current Training Accuracy: 0.05554462923045455
-------------------------------------------------------------------
Current Cost: 204617.7470889815
Current Training Accuracy: 0.05664285601900656
-------------------------------------------------------------------
Current Cost: 202859.9973816936
Current Training Accuracy: 0.05832087732271819
-------------------------------------

Current Cost: 157790.70242607562
Current Training Accuracy: 0.15056484730242145
-------------------------------------------------------------------
Current Cost: 157323.6199151246
Current Training Accuracy: 0.15336469625690324
-------------------------------------------------------------------
Current Cost: 156859.50273562822
Current Training Accuracy: 0.15471230548956055
-------------------------------------------------------------------
Current Cost: 156414.65805148546
Current Training Accuracy: 0.15572635586953443
-------------------------------------------------------------------
Current Cost: 155960.39530420417
Current Training Accuracy: 0.15830120993753638
-------------------------------------------------------------------
Current Cost: 155497.10646791809
Current Training Accuracy: 0.16033953773777868
-------------------------------------------------------------------
Current Cost: 155048.17027376746
Current Training Accuracy: 0.1619672105354249
----------------------------------

Current Cost: 140465.22484599275
Current Training Accuracy: 0.24099863114999134
-------------------------------------------------------------------
Current Cost: 140235.35087775375
Current Training Accuracy: 0.24310225466903723
-------------------------------------------------------------------
Current Cost: 140020.4471172846
Current Training Accuracy: 0.24516654341771954
-------------------------------------------------------------------
Current Cost: 139842.86850352093
Current Training Accuracy: 0.24548515505766477
-------------------------------------------------------------------
Current Cost: 139658.31452834595
Current Training Accuracy: 0.24599021350913353
-------------------------------------------------------------------
Current Cost: 139518.28004754838
Current Training Accuracy: 0.24686108532498388
-------------------------------------------------------------------
Current Cost: 139348.6047683453
Current Training Accuracy: 0.24810799754551033
----------------------------------

Current Cost: 131992.40724266198
Current Training Accuracy: 0.3064493289488176
-------------------------------------------------------------------
Current Cost: 131880.1064612784
Current Training Accuracy: 0.30666252340418837
-------------------------------------------------------------------
Current Cost: 131762.49339397732
Current Training Accuracy: 0.3068332363075664
-------------------------------------------------------------------
Current Cost: 131654.82704649473
Current Training Accuracy: 0.3078040184401403
-------------------------------------------------------------------
Current Cost: 131544.30730660938
Current Training Accuracy: 0.3094364114102302
-------------------------------------------------------------------
Current Cost: 131414.08862221168
Current Training Accuracy: 0.31036313859999687
-------------------------------------------------------------------
Current Cost: 131274.96213636955
Current Training Accuracy: 0.31167691993014146
-------------------------------------

Current Cost: 127002.52402127843
Current Training Accuracy: 0.3493423226395204
-------------------------------------------------------------------
Current Cost: 126935.14864846798
Current Training Accuracy: 0.34967430810138933
-------------------------------------------------------------------
Current Cost: 126849.11319683799
Current Training Accuracy: 0.3505947417278978
-------------------------------------------------------------------
Current Cost: 126778.9926917585
Current Training Accuracy: 0.3515254023947008
-------------------------------------------------------------------
Current Cost: 126710.28299200063
Current Training Accuracy: 0.35199977972528596
-------------------------------------------------------------------
Current Cost: 126641.82021967195
Current Training Accuracy: 0.35284075711566
-------------------------------------------------------------------
Current Cost: 126576.85716294058
Current Training Accuracy: 0.3531208206806489
----------------------------------------

Current Cost: 123790.65515251078
Current Training Accuracy: 0.37669021508252437
-------------------------------------------------------------------
Current Cost: 123731.30581171911
Current Training Accuracy: 0.37712919111978227
-------------------------------------------------------------------
Current Cost: 123674.637360792
Current Training Accuracy: 0.3776279560079928
-------------------------------------------------------------------
Current Cost: 123623.7454327947
Current Training Accuracy: 0.3781094135972434
-------------------------------------------------------------------
Current Cost: 123577.35951931725
Current Training Accuracy: 0.37851220164576677
-------------------------------------------------------------------
Current Cost: 123531.96353491879
Current Training Accuracy: 0.3787466368771339
-------------------------------------------------------------------
Current Cost: 123479.56057153417
Current Training Accuracy: 0.37933351165095897
--------------------------------------

Current Cost: 121394.51866008584
Current Training Accuracy: 0.39502965841685417
-------------------------------------------------------------------
Current Cost: 121352.81161621706
Current Training Accuracy: 0.39535928379250124
-------------------------------------------------------------------
Current Cost: 121316.84509441983
Current Training Accuracy: 0.39568340230029736
-------------------------------------------------------------------
Current Cost: 121278.63360562982
Current Training Accuracy: 0.3958580486807118
-------------------------------------------------------------------
Current Cost: 121236.92871717046
Current Training Accuracy: 0.39609799077992985
-------------------------------------------------------------------
Current Cost: 121200.23270285508
Current Training Accuracy: 0.396370187390846
-------------------------------------------------------------------
Current Cost: 121154.54968619415
Current Training Accuracy: 0.3967682552669258
------------------------------------

Current Cost: 119408.91944006097
Current Training Accuracy: 0.4097314221879573
-------------------------------------------------------------------
Current Cost: 119368.77215906442
Current Training Accuracy: 0.410013845839168
-------------------------------------------------------------------
Current Cost: 119343.42400649682
Current Training Accuracy: 0.4102899759271205
-------------------------------------------------------------------
Current Cost: 119307.52469464536
Current Training Accuracy: 0.4103788725081423
-------------------------------------------------------------------
Current Cost: 119273.35026946485
Current Training Accuracy: 0.4104921566467895
-------------------------------------------------------------------
Current Cost: 119248.66766722997
Current Training Accuracy: 0.41066129615935304
-------------------------------------------------------------------
Current Cost: 119219.06088848779
Current Training Accuracy: 0.4109075318218292
---------------------------------------

In [None]:
complete_d

In [None]:
unpack(complete_res)

In [None]:
predictions, labels = generate_outputs(test_data)

In [None]:
accuracy(predictions, labels)

In [None]:
predictions, labels = generate_outputs(train_data)

In [None]:
accuracy(predictions, labels)

In [None]:
def percision_user(user):
    """
    returns the predicted ratings for the specified user,
    this is mainly used in computing evaluation metric
    """
    user_pred = []
    copy_items = copy.deepcopy(itemsPerUser[user])
    for i in itemsPerUser[user]:
        copy_items.remove(i)
        predict = sigmoid(prediction(user, i, random.choice(copy_items)))
        copy_items.add(i)
        user_pred.append(predict)
        
    return user_pred

In [None]:
def percision_score():
    score = 0.0
    for user in users:
        score += sum(np.rint(percision_user))
    score =/ nUsers
    return score