In [114]:
import numpy as np
import sys
sys.path.append("../tests")
sys.path.append("../implicit")

from implicit.als import AlternatingLeastSquares
from implicit.approximate_als import (AnnoyAlternatingLeastSquares, FaissAlternatingLeastSquares,
                                      NMSLibAlternatingLeastSquares)
from implicit.bpr import BayesianPersonalizedRanking
from implicit.nearest_neighbours import (BM25Recommender, CosineRecommender,
                                         TFIDFRecommender, bm25_weight)
from implicit.evaluation import precision_at_k, train_test_split, mean_average_precision_at_k
from twitter import get_twitter, read_data
from implicit.datasets.lastfm import get_lastfm

from recommender_base import RandomRecommender
from recommender_base_test import TestRecommenderBaseMixin

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [117]:
# maps command line model argument to class name
MODELS = {"als":  AlternatingLeastSquares,
          "nmslib_als": NMSLibAlternatingLeastSquares,
          "annoy_als": AnnoyAlternatingLeastSquares,
          "faiss_als": FaissAlternatingLeastSquares,
          "tfidf": TFIDFRecommender,
          "cosine": CosineRecommender,
          "bpr": BayesianPersonalizedRanking,
          "bm25": BM25Recommender}


def get_model(model_name):
    model_class = MODELS.get(model_name)
    if not model_class:
        raise ValueError("Unknown Model '%s'" % model_name)

    # some default params
    if issubclass(model_class, AlternatingLeastSquares):
        params = {'factors': 64, 'dtype': np.float32}
    elif model_name == "bm25":
        params = {'K1': 100, 'B': 0.5}
    elif model_name == "bpr":
        params = {'factors': 63}
    else:
        params = {}

    return model_class(**params)

In [150]:
def evaluate_model(model_name="als", dataset='twitter'):
    """evaluate the model by cross-validation"""

    # train the model based off input params
    if dataset is 'twitter':
        artists, users, plays = get_twitter()
    if dataset is 'lastfm':
        artists, users, plays = get_lastfm()
        
    # create a model from the input data
    model = CosineRecommender()

    # split data_set to train set and testing set
    train, test = train_test_split(plays)
    
    print(train.shape)
    print(test.shape)
    # evaluation
    p = precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=20, num_threads=4)
    print('precision@k = ', p)

In [124]:
artists, users, ratings = get_twitter()

train, test = train_test_split(ratings, train_percentage=0.8)

model_names = ['als', 
#                'nmslib_als', 'annoy_als', 'faiss_als', 
               'tfidf', 'cosine', 'bpr', 'bm25']

twitter_dict_mean_pk = dict()

for model_name in model_names:    
    model = get_model(model_name)
    # model = TFIDFRecommender()
    model.fit(train)
    p = mean_average_precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=20, num_threads=4)
    twitter_dict_mean_pk[model_name] = p
    print('model: ', model_name, 'p@k', p)

100%|██████████| 15.0/15 [00:00<00:00, 68.73it/s]
 82%|████████▏ | 8561/10399 [00:01<00:00, 7254.06it/s]


model:  als p@k 0.023577722042460805


100%|██████████| 8137/8137 [00:00<00:00, 263195.23it/s]
 82%|████████▏ | 8561/10399 [00:00<00:00, 19272.74it/s]


model:  tfidf p@k 0.008636345218551238


100%|██████████| 8137/8137 [00:00<00:00, 247242.10it/s]
 82%|████████▏ | 8561/10399 [00:00<00:00, 18511.21it/s]


model:  cosine p@k 0.004331847179154864


100%|██████████| 100/100 [00:01<00:00, 78.30it/s, correct=88.38%, skipped=1.60%]
 82%|████████▏ | 8561/10399 [00:01<00:00, 7730.84it/s]


model:  bpr p@k 0.016054753842349914


100%|██████████| 8137/8137 [00:00<00:00, 267464.86it/s]
 82%|████████▏ | 8561/10399 [00:00<00:00, 19231.09it/s]


model:  bm25 p@k 0.00334655955008917


In [122]:
twitter_dict_pk

{'als': 0.08183796657495997,
 'bm25': 0.01798546380322753,
 'bpr': 0.062086806553607356,
 'cosine': 0.014454071366977374,
 'tfidf': 0.034040980577341604}

In [125]:
twitter_dict_mean_pk

{'als': 0.023577722042460805,
 'bm25': 0.00334655955008917,
 'bpr': 0.016054753842349914,
 'cosine': 0.004331847179154864,
 'tfidf': 0.008636345218551238}

In [180]:
lastfm_dict

{'als': 0.19061238357916616,
 'bm25': 0.14145695212969467,
 'bpr': 0.0898032195250087,
 'cosine': 0.053626105676711264,
 'tfidf': 0.1770607415528869}

## Test RandomRecommender

In [106]:
artists, users, plays = get_twitter()

In [115]:
model = RandomRecommender()
train, test = train_test_split(plays, train_percentage=0.8)
model.fit(train)
mean_average_precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=20, num_threads=4)

 83%|████████▎ | 8608/10399 [00:01<00:00, 7154.53it/s]


0.00032677574919684536