In [1]:
import tqdm
import implicit
import lightfm
import pandas as pd 
import numpy as np
from implicit.bpr import BayesianPersonalizedRanking as BPR
from scipy import sparse
import orion_recommend
from orion_recommend.evaluate import metrics
import lightfm
import evall
from lightfm import cross_validation

In [294]:
# Functions
def metrics_user_rec(item, sim_users, k):
    count = 0
    for i, user_list in zip(item, sim_users):       
        for u in user_list:
            if ui_matrix[u,i] == 1:
                count = count + 1            
    p_at_k = round(count/(item.shape[0] * k), 4)

    RS = []
    ans = 0.0
    for i, user_list in zip(item, sim_users):           
        r=[]
        for u in user_list:
             r.append(ui_matrix[u][i])
        ans = ans + evall.ndcg_at_k(r, k, method=1)
        RS.append(r)
    G_at_k = ans/item.shape[0]
    M_at_k = evall.mean_average_precision(RS)
    return p_at_k, G_at_k, M_at_k

def metrics_item_rec(user, sim_items, k):
    count = 0
    for u, item_list in zip(user, sim_items):       
        for i in item_list:
            if ui_matrix[u,i] == 1:
                count = count + 1            
    p_at_k = round(count/(user.shape[0] * k), 4)

    RS = []
    ans = 0.0
    for u, item_list in zip(user, sim_items):           
        r=[]
        for i in item_list:
             r.append(ui_matrix[u][i])
        ans = ans + evall.ndcg_at_k(r, k, method=1)
        RS.append(r)
    G_at_k = ans/user.shape[0]
    M_at_k = evall.mean_average_precision(RS)
    return p_at_k, G_at_k, M_at_k


def test_model_item_user(model, test, k, hybrid):
    rec_dic = {}
    if hybrid ==False:
        with tqdm.tqdm(total=len(user_list)) as progress:
            for u in test_items:
                user_preds = model.predict(np.repeat(u,ui_matrixT.shape[1]),np.array([i for i in range(ui_matrixT.shape[1])]))
                top_k = np.argsort(user_preds)[-k:]
                rec_dic[u] = top_k
                progress.update(1)
    else:
        with tqdm.tqdm(total=len(user_list)) as progress:
            for u in test_items:
                user_preds = model.predict(np.repeat(u,ui_matrixT.shape[1]),np.array([i for i in range(ui_matrixT.shape[1])]),
                                           item_features = user_features, user_features = item_features)
                top_k = np.argsort(user_preds)[-k:]
                rec_dic[u] = top_k
                progress.update(1)

    targets =  np.array([i for i in rec_dic.keys()])
    recommended = np.array([i for i in rec_dic.values()])
    metrics = metrics_user_rec(targets, recommended, k)
    return metrics

def test_model(model, test, k, hybrid):
    rec_dic = {}
    if hybrid ==False:
        with tqdm.tqdm(total=len(user_list)) as progress:
            for u in test_users:
                user_preds = model.predict(np.repeat(u,ui_matrix.shape[1]),np.array([i for i in range(ui_matrix.shape[1])]))
                top_k = np.argsort(user_preds)[-k:]
                rec_dic[u] = top_k
                progress.update(1)
    else:
        with tqdm.tqdm(total=len(user_list)) as progress:
            for u in test_users:
                user_preds = model.predict(np.repeat(u,ui_matrix.shape[1]),np.array([i for i in range(ui_matrix.shape[1])]),
                                           item_features = item_features)
                top_k = np.argsort(user_preds)[-k:]
                rec_dic[u] = top_k
                progress.update(1)

    targets =  np.array([i for i in rec_dic.keys()])
    recommended = np.array([i for i in rec_dic.values()])
    metrics = metrics_item_rec(targets, recommended, k)
    return metrics

In [283]:
ui_matrix = np.load("fa_ui_matrix.npy")
ui_matrixT = ui_matrix.T

In [4]:
ua_matrix = np.load("new_fa_ua_matrix.npy")

In [6]:
train, test = cross_validation.random_train_test_split(sparse.coo_matrix(ui_matrix), test_percentage=0.2, random_state=None)

In [292]:
train_users = train.nonzero()[0]
test_users = test.nonzero()[0]
train_items = train.nonzero()[1]
test_items = test.nonzero()[1]

In [7]:
user_features = np.load("user_attributes_npc.npy")

In [9]:
user_features = sparse.coo_matrix(user_features)

In [11]:
item_features = np.load("new_fa_ia_matrix.npy")

In [12]:
item_features = sparse.coo_matrix(item_features)

### User-Item

In [21]:
from lightfm import LightFM
from lightfm.evaluation import precision_at_k, recall_at_k


# Instantiate and train the model
model = LightFM(loss='warp')
model.fit(train, user_features = user_features,
          item_features=item_features, epochs=100)

mf1 = LightFM(loss='bpr')
mf1.fit(train, epochs=100)

In [244]:
metrics_bpr_20 = test_model(mf1,test,20)
pd.Series(metrics_bpr_20).to_csv("baselines_item-user/metrics_bpr_20.csv")

In [254]:
model.predict(np.array([1]), np.array([1]),user_features=user_features, item_features = item_features)

array([-608.4024], dtype=float32)

In [263]:
metrics = test_model(model,test,10, hybrid=True)
pd.Series(metrics).to_csv("baselines_item-user/metrics_hybrid_10_nousers.csv")

100%|██████████| 19041/19041 [01:32<00:00, 204.84it/s]


In [264]:
metrics

(0.003, 0.01169153339737001, 0.006635734931042895)

In [265]:
metrics = test_model(model,test,20, hybrid=True)
pd.Series(metrics).to_csv("baselines_item-user/metrics_hybrid_20_nousers.csv")

100%|██████████| 19041/19041 [01:33<00:00, 202.74it/s]


In [266]:
metrics

(0.0031, 0.021498005990391773, 0.01062182431756176)

### Item-User

In [281]:
# Instantiate and train the model
model = LightFM(loss='warp')
model.fit(train.T, user_features = item_features,
          item_features=user_features, epochs=100)

mf1 = LightFM(loss='bpr')
mf1.fit(train.T, epochs=100)

<lightfm.lightfm.LightFM at 0x7ff00faf1130>

In [287]:
user_features

<86297x60 sparse matrix of type '<class 'numpy.int64'>'
	with 258914 stored elements in COOrdinate format>

In [289]:
ui_matrixT.shape

(10456, 86297)

In [296]:
metrics_hybrid_20 = test_model_item_user(model,test,20, hybrid=True)
pd.Series(metrics_hybrid_20).to_csv("baselines_item-user/metrics_hybrid_20.csv")

In [298]:
metrics_hybrid_20

(0.0008, 0.0053514663088928315, 0.0030504014181894756)

In [299]:
metrics = test_model_item_user(model,test,10, hybrid=True)
pd.Series(metrics).to_csv("baselines_item-user/metrics_hybrid_10.csv")

 99%|█████████▉| 18884/19041 [06:51<00:03, 45.88it/s]


In [300]:
metrics

(0.0007, 0.0033925894740746477, 0.002422665190107051)

In [301]:
metrics = test_model_item_user(mf1,test,20, hybrid=False)
pd.Series(metrics).to_csv("baselines_item-user/metrics_bpr_20.csv")

 99%|█████████▉| 18884/19041 [06:45<00:03, 46.55it/s]


In [302]:
metrics

(0.2285, 0.42958979151601356, 0.27674562708594624)

In [303]:
metrics = test_model_item_user(mf1,test,10, hybrid=False)
pd.Series(metrics).to_csv("baselines_item-user/metrics_bpr_20.csv")

 99%|█████████▉| 18884/19041 [06:46<00:03, 46.45it/s]


In [304]:
metrics

(0.2402, 0.4603669129918061, 0.35823127156978213)