In [2]:
import numpy as np
import scipy as sy
import matplotlib.pyplot as plt
import sys
import os
import ast
import json
import random
from collections import defaultdict
from collections import Counter
from sklearn.model_selection import train_test_split
from ipynb.fs.full.Random_Sample_Mapper import *

In [3]:
notebook_path = os.path.abspath("BPR_OPT_Binary_Model.ipynb")
users_items_file_path = os.path.join(os.path.dirname(notebook_path), "data/australian_users_items.json")
items_file_path = os.path.join(os.path.dirname(notebook_path), "data/items_meta_data.json")
users_meta_data_file_path = os.path.join(os.path.dirname(notebook_path), "data/users_meta_data.json")

In [4]:
users_items = []
with open(users_items_file_path, 'r') as data:
    for line in data:
        users_items.append(ast.literal_eval(line))

In [5]:
with open(items_file_path, 'r') as data:
    games_dict = json.load(data)

In [6]:
with open(users_meta_data_file_path, 'r') as file:
    users_meta_data = json.load(file)

In [7]:
usersPerItem = defaultdict(set)
itemsPerUser = defaultdict(set)
playtimesPerItem = defaultdict(dict)
playtimesPerUser = defaultdict(dict)
itemNames = defaultdict(str)

In [8]:
train_usersPerItem = defaultdict(set)
train_itemsPerUser = defaultdict(set)
test_usersPerItem = defaultdict(set)
test_itemsPerUser = defaultdict(set)

In [9]:
for game in games_dict:
    if 'owners' in games_dict[game]:
        usersPerItem[game] = set(games_dict[game]['owners'].keys())

In [10]:
for user in users_items:
    u_id = user['user_id']
    items = [item['item_id'] for item in user['items']]
    itemsPerUser[u_id] = items
    
    items_train, items_test = train_test_split(items, test_size = 0.995)
    train_itemsPerUser[u_id] = items_train
    test_itemsPerUser[u_id] = items_test

    playtimesPerUser[user['user_id']] = dict((item['item_id'], item['playtime_forever']) for item in user['items'])
    for item in user['items']:
        itemNames[item['item_id']] = item['item_name']
        playtimesPerItem[item['item_id']][user['user_id']] = item['playtime_forever']

In [11]:
for user in train_itemsPerUser:
    for item in train_itemsPerUser[user]:
        train_usersPerItem[item].add(user)
        
    for item in test_itemsPerUser[user]:
        test_usersPerItem[item].add(user)

In [12]:
train_nUsers = len(train_itemsPerUser)
train_nItems = len(train_usersPerItem)
train_users = list(train_itemsPerUser.keys())
train_items = list(train_usersPerItem.keys())

In [13]:
user_item_counts = dict((k, len(v)) for k, v in train_itemsPerUser.items())

In [14]:
datafile = 'data/sample_in.tsv'
mapout1 = 'data/sample_map1.tsv'
mapout2 = 'data/sample_map2.tsv'
outfile = 'data/sample_out.tsv'

f = open(datafile,'w')
for user, items in train_itemsPerUser.items():
    for item in items:
        print(default_formatter(user,item), file=f)
f.close()

In [15]:
# run two stages of mapreduce
mapper1 = Mapper(user_item_counts, oversampling=10)
mapreduce(datafile, mapout1, mapper=mapper1, reducer=reducer)
mapreduce(datafile, mapout2, mapper=indicator_mapper)  # map the data again
mapreduce([mapout1, mapout2], outfile, reducer=indicator_reducer)

In [16]:
class ExternalSchedule(object):

    def __init__(self, filepath):
        self.filepath = filepath

    def generate_samples(self, data):
        f = open(self.filepath)
        samples = [map(str, line.strip().split()) for line in f]
        random.shuffle(samples)  # important!
        for u, i, j in samples:
            yield u, i, j

In [17]:
sampler = ExternalSchedule(outfile)  # schedule is one-indexed

# Complete Latent Factor Model with Binary Classification

A simple non-biased latent factor model that is wrapped into a binary function (sigmoid function) as a base line model, using popularity as the item's sole feature

In [18]:
itemBiases = defaultdict(float)

For each user and item we now have a low dimensional descriptor (representing that user's preferences, and that item's properties), of dimension K.

In [19]:
userGamma = {}
itemGamma = {}

In [20]:
K = 2

In [21]:
for u in train_itemsPerUser:
    userGamma[u] = [random.random() * 0.1 - 0.05 for k in range(K)]

In [22]:
for i in train_usersPerItem:
    itemGamma[i] = [random.random() * 0.1 - 0.05 for k in range(K)]

We'll use another library in this example to perform gradient descent. This library requires that we pass it a "flat" parameter vector (theta) containing all of our parameters. This utility function just converts between a flat feature vector, and our model parameters, i.e., it "unpacks" theta into our offset and bias parameters.

In [23]:
def unpack(theta):
    global itemBiases
    global userGamma
    global itemGamma
    index = 0
    itemBiases = dict(zip(train_items, theta[0:index + train_nItems]))
    index += train_nItems
    for u in train_users:
        userGamma[u] = theta[index:index + K]
        index += K
    for i in train_items:
        itemGamma[i] = theta[index:index + K]
        index += K

In [24]:
def inner(x, y):
    return sum([a*b for a,b in zip(x,y)])

## Sigmoid Function

\begin{equation*}
\sigma(x) = \frac{1}{1 + e^{-x}}
\end{equation*}

In [25]:
def sigmoid(x):
    #Numerically stable sigmoid function.
    #Taken from: https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/
    if x >= 0:
        z = np.exp(-x)
        return 1 / (1 + z)
    else:
        # if x is less than zero then z will be small, denom can't be
        # zero because it's 1+z.
        z = np.exp(x)
        return z / (1 + z)

## Prediction Function

\begin{equation*}
f(u, i, j) = \gamma_u \gamma_i + \beta_i - (\gamma_u \gamma_j + \beta_j)
\end{equation*}

\begin{equation*}
p(i >_u j) = \sigma(f(u, i, j))
\end{equation*}

In [26]:
def prediction(user, item_i, item_j):
    return inner(userGamma[user], itemGamma[item_i]) + itemBiases[item_i] - (inner(userGamma[user], itemGamma[item_j]) + itemBiases[item_j]) 

## Cost Function

\begin{equation*}
\text{BPR-OPT} := \text{argmax} \ln(\sigma(\gamma_u \gamma_i + \beta_i - (\gamma_u \gamma_j + \beta_j)))
\end{equation*}

\begin{equation*}
\text{Cost Function}:= \sum_{u,i,j} -ln\left( 1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)} \right)
\end{equation*}

In [27]:
def trim(u, i, j):
    return u[1:len(u)-1], i[2:len(i)-2], j[1:len(j)-2]

In [28]:
def cost(theta):
    unpack(theta)
    cost = 0
    for u, i, j in sampler.generate_samples(data):
        u, i, j = trim(u, i ,j)
        x = prediction(u, i, j)

        cost += np.log(sigmoid(x))
        
    print(-cost)
        
    return -cost

## Gradient Ascent

\begin{equation*}
\frac{\partial f}{\partial \gamma_{u,k}} = \frac{(\gamma_{i,k} - \gamma_{j,k}) \cdot e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}{1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}
\end{equation*}

\begin{equation*}
\frac{\partial f}{\partial \gamma_{i,k}} = \frac{\gamma_{u,k} \cdot e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}{1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}
\end{equation*}

\begin{equation*}
\frac{\partial f}{\partial \gamma_{j,k}} = -\frac{\gamma_{u,k} \cdot e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}{1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}
\end{equation*}

\begin{equation*}
\frac{\partial f}{\partial \beta_i} = \frac{e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}{1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}
\end{equation*}

\begin{equation*}
\frac{\partial f}{\partial \beta_j} = -\frac{e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}{1 + e^{\gamma_u \gamma_j + \beta_j - (\gamma_u \gamma_i + \beta_i)}}
\end{equation*}

In [29]:
def derivative(theta):
    unpack(theta)
    dItemBiases = defaultdict(float)
    dUserGamma = {}
    dItemGamma = {}
    for u in train_users:
        dUserGamma[u] = [0.0 for k in range(K)]
    for i in train_items:
        dItemGamma[i] = [0.0 for k in range(K)]
    for u, i, j in sampler.generate_samples(data):
        u, i, j = trim(u, i ,j)
        x = prediction(u, i ,j)
        dbase = np.exp(-x) / (1 + np.exp(-x))
        dItemBiases[i] += dbase
        dItemBiases[j] += -dbase
        for k in range(K):
            dUserGamma[u][k] += (itemGamma[i][k] - itemGamma[j][k]) * dbase
            dItemGamma_k = userGamma[u][k] * dbase
            dItemGamma[i][k] += dItemGamma_k
            dItemGamma[j][k] += -dItemGamma_k
    dtheta = [dItemBiases[i] for i in train_items]
    for u in train_users:
        dtheta += dUserGamma[u]
    for i in train_items:
        dtheta += dItemGamma[i]
    return np.array(dtheta)

In [30]:
sy.optimize.fmin_l_bfgs_b(cost, [0.0]*train_nItems + # Initialize beta
                                [random.random() * 0.1 - 0.05 for k in range(K*(train_nUsers + train_nItems))], # Gamma
                             derivative)

20112.32888593956
20210.48345567925
20120.56756774348
20113.078795422894
20112.397641205458
20112.335193930237
20112.32946470475
20112.328939042058
20112.32889081179
20112.328886386567
20112.328885980463
20112.32888594329
20112.328885939984
20112.328885939518
20112.328885939583
20112.32888593951
20112.32888593943
20112.328885939405
20112.32888593957
20112.328885939503


(array([-9.42986008e-15, -4.73319383e-15,  4.72190682e-15, ...,
         1.25060359e-02,  3.34352364e-02,  1.97081035e-02]),
 20112.328885939503,
 {'grad': array([ 1.99856945,  1.00315556, -1.00076339, ..., -0.02134534,
          0.02569994,  0.07383677]),
  'task': b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH',
  'funcalls': 20,
  'nit': 1,
  'warnflag': 0})