https://github.com/massquantity/LibRecommender

In [80]:
import implicit
from scipy.sparse import coo_matrix

import pandas as pd
import ml_metrics as metrics

In [81]:
user_hist_df = pd.read_csv('../data/raw/movies_dataset_10 months.csv')

user_hist_df['ts'] = pd.to_datetime(user_hist_df['ts'])
user_hist_df['month'] = user_hist_df['ts'].dt.month

test_1 = user_hist_df[user_hist_df['month'] == 6]
test_2 = user_hist_df[user_hist_df['month'] == 7]
user_hist_df = user_hist_df[user_hist_df['episode_id'] == 0]
train = user_hist_df[~user_hist_df['month'].isin([6, 7])]

In [82]:
correct_1 = test_1.drop_duplicates(['user_id', 'movie_id']).groupby('user_id')['movie_id'].apply(list).to_dict()
correct_2 = test_2.drop_duplicates(['user_id', 'movie_id']).groupby('user_id')['movie_id'].apply(list).to_dict()


In [83]:
train['event'] = 1
train = train[['user_id', 'movie_id', 'event']].drop_duplicates()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [84]:
user_encode = {u: i for i, u in enumerate(train['user_id'].unique())}
item_encode = {u: i for i, u in enumerate(train['movie_id'].unique())}

user_decode = {v: k for k, v in user_encode.items()}
item_decode = {v: k for k, v in item_encode.items()}

n_users, n_items = len(user_encode), len(item_encode)

train['user_id'] = train['user_id'].apply(lambda x: user_encode[x])
train['movie_id'] = train['movie_id'].apply(lambda x: item_encode[x])

In [85]:
item_user = coo_matrix(
    (train['event'], (train['user_id'], train['movie_id'])), shape=(n_users, n_items))

user_item = coo_matrix(
    (train['event'], (train['movie_id'], train['user_id'])), shape=(n_items, n_users))

In [86]:
user_item.shape

(4063, 2055)

In [53]:
# initialize a model
model = implicit.als.AlternatingLeastSquares(factors=5)

# train the model on a sparse matrix of item/user/confidence weights
model.fit(user_item, show_progress=False)

In [110]:
model = implicit.lmf.LogisticMatrixFactorization(factors = 5, random_state = 42)

model.fit(user_item, show_progress=False)

In [123]:
model = implicit.nearest_neighbours.TFIDFRecommender(K=10)

model.fit(user_item, show_progress=False)

In [88]:
movies_database = pd.read_csv('../data/raw/movies.csv')
useless_movies = movies_database[(movies_database['year'] < 2010) & 
                                 (~movies_database['id'].isin(list(item_encode.keys()))) &
                                 (movies_database['imdb_rating'] < 6)]['id'].values

useless_movies = [m for m in useless_movies if m in item_decode]

In [111]:
# als
res = model.recommend_all(item_user, N = 5, filter_items = useless_movies, show_progress=False)

In [112]:
recom = {}

for ind, rec in enumerate(res):
    recom[user_decode[ind]] = [item_decode[x] for x in rec]

In [113]:
reco = []
corr = []

num_to_recom = 5
for user in correct_1:
    reco.append(recom[user])
    corr.append(correct_1[user])

metrics.mapk(reco, corr, 5)

0.010527426160337552

In [52]:
# als
reco = []
corr = []

num_to_recom = 5
for user in correct_1:
    reco.append(recom[user])
    corr.append(correct_1[user])

metrics.mapk(reco, corr, 5)

0.02020464135021097

# inference

In [126]:
user_hist_df = pd.read_csv('../data/raw/movies_dataset_10 months.csv')
user_hist_df = user_hist_df[user_hist_df['episode_id'] == 0]

user_hist_df['event'] = 1
train = user_hist_df[['user_id', 'movie_id', 'event']].drop_duplicates()

In [127]:
user_encode = {u: i for i, u in enumerate(train['user_id'].unique())}
item_encode = {u: i for i, u in enumerate(train['movie_id'].unique())}

user_decode = {v: k for k, v in user_encode.items()}
item_decode = {v: k for k, v in item_encode.items()}

n_users, n_items = len(user_encode), len(item_encode)

train['user_id'] = train['user_id'].apply(lambda x: user_encode[x])
train['movie_id'] = train['movie_id'].apply(lambda x: item_encode[x])

item_user = coo_matrix(
    (train['event'], (train['user_id'], train['movie_id'])), shape=(n_users, n_items))

user_item = coo_matrix(
    (train['event'], (train['movie_id'], train['user_id'])), shape=(n_items, n_users))

In [128]:
# initialize a model
model = implicit.als.AlternatingLeastSquares(factors=5)

# train the model on a sparse matrix of item/user/confidence weights
model.fit(user_item, show_progress=False)

In [116]:
model = implicit.lmf.LogisticMatrixFactorization(factors = 5, random_state = 42)

model.fit(user_item, show_progress=False)

In [117]:
res = model.recommend_all(item_user, N = 5, filter_items = useless_movies, show_progress=False)

In [129]:
res = model.recommend_all(item_user, N = 5, show_progress=False)

In [130]:
recom = {}

for ind, rec in enumerate(res):
    recom[user_decode[ind]] = [item_decode[x] for x in rec]

In [131]:
import sys

PATH = '/Users/danil/Documents/github/sweet_RS/'
sys.path.append(str(PATH))

from src.utils import save_to_pickle

In [133]:
save_to_pickle(recom, '../data/processed/als_2.pickle')