In [1]:
import os
path = os.getcwd()
os.chdir(path)

# magic to print version
%reload_ext autoreload
# magic so that the notebook will reload external python modules
%autoreload 2

import warnings
warnings.filterwarnings('ignore')


import sim, bpr, metrics
import numpy as np
import pandas as pd
from subprocess import call
from sklearn.utils import shuffle
from data_split import split_data
from WRMF.wrmf import *
from WRMF import wrmf_rec
from common.constants import (
    DEFAULT_USER_COL,
    DEFAULT_ITEM_COL,
    DEFAULT_TIMESTAMP_COL,
    DEFAULT_SPLIT_FLAG,
    DEFAULT_TEST_SIZE,
    DEFAULT_VAL_SIZE,
)
from common.enums import SimilarityMethod

In [2]:
# constants
ds_dir = "datasets/"

new_movies_rate_count = 10
new_users_rate_count = 10

min_rate_value = 3.0

# parameters were randomly chosen
bpr_params = {
    "reg": 0.01,
    "learning_rate": 0.1,
    "n_iters": 160,
    "n_factors": 15,
    "batch_size": 100,
}

In [3]:
def load_ds(data_name, sub_url):
    os.makedirs(ds_dir, exist_ok=True)
    gz_file_name = data_name + ".json.gz"
    file_path = os.path.join(ds_dir, gz_file_name)
    if not os.path.isfile(file_path):
        dl_path="https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_v2/"+  os.path.join(sub_url+"/", gz_file_name)
        call(
            [
                "curl",
                "-o",
                file_path,
                dl_path,
            ]
        )

    return  pd.read_json(file_path , lines=True)

In [4]:
reviews = load_ds("Movies_and_TV_5","categoryFilesSmall")
print("review shape: ", reviews.shape)
reviews = reviews.rename(
    columns={"reviewerID": DEFAULT_USER_COL,  'asin': DEFAULT_ITEM_COL,"overall": DEFAULT_RATING_COL}
)
reviews.head()

review shape:  (3410019, 12)


Unnamed: 0,rating,verified,reviewTime,userID,itemID,style,reviewerName,reviewText,summary,unixReviewTime,vote,image
0,5,True,"11 9, 2012",A2M1CU2IRZG0K9,0005089549,{'Format:': ' VHS Tape'},Terri,So sorry I didn't purchase this years ago when...,Amazing!,1352419200,,
1,5,True,"12 30, 2011",AFTUJYISOFHY6,0005089549,{'Format:': ' VHS Tape'},Melissa D. Abercrombie,Believe me when I tell you that you will recei...,Great Gospel VHS of the Cathedrals!,1325203200,,
2,5,True,"04 21, 2005",A3JVF9Y53BEOGC,000503860X,{'Format:': ' DVD'},Anthony Thompson,"I have seen X live many times, both in the ear...",A great document of a great band,1114041600,11.0,
3,5,True,"04 6, 2005",A12VPEOEZS1KTC,000503860X,{'Format:': ' DVD'},JadeRain,"I was so excited for this! Finally, a live co...",YES!! X LIVE!!,1112745600,5.0,
4,5,True,"12 3, 2010",ATLZNVLYKP9AZ,000503860X,{'Format:': ' DVD'},T. Fisher,X is one of the best punk bands ever. I don't ...,X have still got it,1291334400,5.0,


In [5]:
metadata = load_ds("meta_Movies_and_TV", "metaFiles2")
print("review shape: ", metadata.shape)
metadata.head()

review shape:  (203766, 19)


Unnamed: 0,category,tech1,description,fit,title,also_buy,tech2,brand,feature,rank,also_view,main_cat,similar_item,date,price,asin,imageURL,imageURLHighRes,details
0,"[Movies & TV, Movies]",,[],,Understanding Seizures and Epilepsy,[],,,[],"886,503 in Movies & TV (",[],Movies & TV,,,,695009,[],[],
1,"[Movies & TV, Movies]",,[],,Spirit Led&mdash;Moving By Grace In The Holy S...,[],,,[],"342,688 in Movies & TV (",[],Movies & TV,,,,791156,[https://images-na.ssl-images-amazon.com/image...,[https://images-na.ssl-images-amazon.com/image...,
2,"[Movies & TV, Movies]",,[Disc 1: Flour Power (Scones; Shortcakes; Sout...,,My Fair Pastry (Good Eats Vol. 9),[],,Alton Brown,[],"370,026 in Movies & TV (",[],Movies & TV,,,,143529,[https://images-na.ssl-images-amazon.com/image...,[https://images-na.ssl-images-amazon.com/image...,
3,"[Movies & TV, Movies]",,[Barefoot Contessa Volume 2: On these three di...,,"Barefoot Contessa (with Ina Garten), Entertain...","[B002I5GNW4, B005WXPVMM, B009UY3W8O, B00N27ID1...",,Ina Garten,[],"342,914 in Movies & TV (","[B002I5GNW4, 0804187045, B009UY3W8O, 060960219...",Movies & TV,,,$74.95,143588,[],[],
4,"[Movies & TV, Movies]",,[Rise and Swine (Good Eats Vol. 7) includes bo...,,Rise and Swine (Good Eats Vol. 7),"[B000P1CKES, B000NR4CRM]",,Alton Brown,[],"351,684 in Movies & TV (",[B0015SVNXY],Movies & TV,,,,143502,[https://images-na.ssl-images-amazon.com/image...,[https://images-na.ssl-images-amazon.com/image...,


In [6]:
threshold = 3
X, ratings = bpr.create_matrix(
    reviews, DEFAULT_USER_COL, DEFAULT_ITEM_COL, DEFAULT_RATING_COL, threshold
)

In [7]:
X_train, X_test = bpr.create_train_test(X, test_size=DEFAULT_TEST_SIZE, seed=1234)

In [8]:
np.int = int

bpr_original = bpr.BPR(**bpr_params)
bpr_original.fit(X_train)

BPR: 100%|██████████| 160/160 [1:07:24<00:00, 25.28s/it]


<bpr.BPR at 0x7ffaf89f3880>

In [10]:
train_score = metrics.auc_score(bpr_original, X_train, min_rate_value)
print("Train AUC: %f, NDCG: %f" % train_score)

Train AUC: 0.965592, NDCG: 0.177685


In [11]:
test_score = metrics.auc_score(bpr_original, X_test, min_rate_value)
print("Test  AUC: %f, NDCG: %f" % test_score)

Test  AUC: 0.773447, NDCG: 0.101372
