In [None]:
!pip install lightfm

In [67]:
from lightfm.datasets import fetch_movielens
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

# Load the MovieLens 100k dataset. Only five
# star ratings are treated as positive.
data = fetch_movielens(min_rating=5.0)

train = data['train']
test = data['test']

```
model = LightFM(learning_rate=0.05, loss='bpr')
model.fit(train, epochs=10)

train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10, train_interactions=train).mean()

train_auc = auc_score(model, train).mean()
test_auc = auc_score(model, test, train_interactions=train).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))
```

In [88]:
model = LightFM(learning_rate=0.05, loss='warp')
# loss functions: logistic, BPR, WARP, k-OS WARP
# can pass in item and user features

# fit_partial for incremental training
model.fit_partial(train, epochs=30, num_threads=2)
# item_features, user_features

train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10, train_interactions=train).mean()

train_auc = auc_score(model, train).mean()
test_auc = auc_score(model, test, train_interactions=train).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

Precision: train 0.35, test 0.06.
AUC: train 0.97, test 0.93.


In [101]:
import numpy as np

# source: https://github.com/amkurian/movie-recommendation-system/blob/master/recommender.py
def recommend(user_id):
    n_users, n_items = data['train'].shape
    known_positives = set(data['item_labels'][data['train'].tocsr()[user_id].indices])

    want_predictions_for = np.arange(n_items)
    scores = model.predict(user_id, want_predictions_for)
    
    top_items = [x for x in data['item_labels'][np.argsort(-scores)] if x not in known_positives]
    print('known positives:', known_positives)
    return top_items[:10]
    
recommend(7)

'''
.predict_rank()
'''

known positives: {'Glory (1989)', 'Butch Cassidy and the Sundance Kid (1969)', 'Empire Strikes Back, The (1980)', 'Terminator, The (1984)', 'Lawrence of Arabia (1962)', 'Aliens (1986)', 'Full Metal Jacket (1987)', 'Jurassic Park (1993)', 'Die Hard (1988)', 'Contact (1997)', 'Professional, The (1994)', 'Pulp Fiction (1994)', 'Star Trek: The Wrath of Khan (1982)', 'Raiders of the Lost Ark (1981)', 'Star Trek: First Contact (1996)', 'Star Trek III: The Search for Spock (1984)', 'Godfather, The (1972)', 'Alien (1979)'}


['Star Wars (1977)',
 'Return of the Jedi (1983)',
 'Braveheart (1995)',
 'Fugitive, The (1993)',
 'Shawshank Redemption, The (1994)',
 'Princess Bride, The (1987)',
 'Silence of the Lambs, The (1991)',
 'Terminator 2: Judgment Day (1991)',
 'Indiana Jones and the Last Crusade (1989)',
 'Forrest Gump (1994)']

### lightfm
https://towardsdatascience.com/how-to-build-a-movie-recommender-system-in-python-using-lightfm-8fa49d7cbe3b
https://towardsdatascience.com/solving-business-usecases-by-recommender-system-using-lightfm-4ba7b3ac8e62

### tensorrec
https://hackernoon.com/tensorrec-a-recommendation-engine-framework-in-tensorflow-d85e4f0874e8
https://gist.github.com/kayibal/16340660d1d85b9ea1872a5d9be0f383

### surprise
https://medium.com/hacktive-devs/recommender-system-made-easy-with-scikit-surprise-569cbb689824
https://realpython.com/build-recommendation-engine-collaborative-filtering/

In [None]:
# !pip install scikit-surprise

In [57]:
import pandas as pd
from surprise import Dataset
from surprise import Reader

ratings_dict = {
    "item": [1, 2, 1, 2, 1, 2, 1, 2, 1],
    "user": ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D', 'E'],
    "rating": [1, 2, 2, 4, 2.5, 4, 4.5, 5, 3],
}

df = pd.DataFrame(ratings_dict)
reader = Reader(rating_scale=(1, 5))

# Loads Pandas dataframe
data = Dataset.load_from_df(df[["user", "item", "rating"]], reader)

In [41]:
from surprise import Dataset

# Load the movielens-100k dataset (download it if needed).
# data = Dataset.load_builtin('ml-1m')
data = Dataset.load_builtin('ml-100k')

In [58]:
from surprise import SVD, NMF, CoClustering, KNNBasic, KNNWithMeans
from surprise.model_selection import cross_validate

algo = NMF()

cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    2.1309  2.1219  1.6636  0.2364  0.7927  1.3891  0.7546  
MAE (testset)     2.0714  2.1071  1.6357  0.2355  0.7927  1.3685  0.7384  
Fit time          0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    


{'test_rmse': array([2.13091913, 2.12192154, 1.66364688, 0.23643617, 0.79272128]),
 'test_mae': array([2.07142857, 2.10714286, 1.63573323, 0.23548396, 0.79272128]),
 'fit_time': (0.0013074874877929688,
  0.0016717910766601562,
  0.0011878013610839844,
  0.0011448860168457031,
  0.0012860298156738281),
 'test_time': (4.935264587402344e-05,
  3.814697265625e-05,
  4.3392181396484375e-05,
  4.1484832763671875e-05,
  3.314018249511719e-05)}

In [59]:
trainingSet = data.build_full_trainset()
model = algo.fit(trainingSet)

In [64]:
model.predict('E', 2)

Prediction(uid='E', iid=2, r_ui=None, est=3.068125689180161, details={'was_impossible': False})