https://github.com/massquantity/LibRecommender

In [80]:
import implicit
from scipy.sparse import coo_matrix

import pandas as pd
import ml_metrics as metrics

In [243]:
user_hist_df = pd.read_csv('../data/raw/movies_dataset_10 months.csv')

user_hist_df['ts'] = pd.to_datetime(user_hist_df['ts'])
user_hist_df['month'] = user_hist_df['ts'].dt.month

test_1 = user_hist_df[user_hist_df['month'] == 6]
test_2 = user_hist_df[user_hist_df['month'] == 7]
user_hist_df = user_hist_df[user_hist_df['episode_id'] == 0]
train = user_hist_df[~user_hist_df['month'].isin([6, 7])]

In [244]:
correct_1 = test_1.drop_duplicates(['user_id', 'movie_id']).groupby('user_id')['movie_id'].apply(list).to_dict()
correct_2 = test_2.drop_duplicates(['user_id', 'movie_id']).groupby('user_id')['movie_id'].apply(list).to_dict()


In [245]:
train['event'] = 1
train = train[['user_id', 'movie_id', 'event']].drop_duplicates()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [246]:
user_encode = {u: i for i, u in enumerate(train['user_id'].unique())}
item_encode = {u: i for i, u in enumerate(train['movie_id'].unique())}

user_decode = {v: k for k, v in user_encode.items()}
item_decode = {v: k for k, v in item_encode.items()}

n_users, n_items = len(user_encode), len(item_encode)

train['user_id'] = train['user_id'].apply(lambda x: user_encode[x])
train['movie_id'] = train['movie_id'].apply(lambda x: item_encode[x])

In [247]:
item_user = coo_matrix(
    (train['event'], (train['user_id'], train['movie_id'])), shape=(n_users, n_items))

user_item = coo_matrix(
    (train['event'], (train['movie_id'], train['user_id'])), shape=(n_items, n_users))

In [248]:
user_item.shape

(4063, 2055)

In [249]:
# initialize a model
model = implicit.als.AlternatingLeastSquares(factors=5, iterations= 15)

# train the model on a sparse matrix of item/user/confidence weights
model.fit(user_item, show_progress=False)

In [197]:
model = implicit.lmf.LogisticMatrixFactorization(factors = 5, learning_rate = 1, random_state = 42)

model.fit(user_item, show_progress=False)

In [253]:
model = implicit.bpr.BayesianPersonalizedRanking(factors = 10)

model.fit(user_item, show_progress=False)

In [257]:
model = implicit.nearest_neighbours.TFIDFRecommender(K=10)

model.fit(user_item, show_progress=False)

In [173]:
movies_database = pd.read_csv('../data/raw/movies.csv')
useless_movies = movies_database[(movies_database['year'] < 2010) & 
                                 (~movies_database['id'].isin(list(item_encode.keys()))) &
                                 (movies_database['imdb_rating'] < 6)]['id'].values

useless_movies = [m for m in useless_movies if m in item_decode]

In [254]:
# als
res = model.recommend_all(item_user, N = 5, filter_items = useless_movies, show_progress=False)

In [260]:
reco

[[11295, 19693, 8621, 16537, 10547],
 [3610, 19908, 16620, 9269, 10447],
 [11421, 3479, 4805, 8328, 17629],
 [17419, 8395, 8334, 14725, 19861],
 [16510, 2037, 13561, 16708, 18431],
 [4624, 3609, 11279, 9339, 16456],
 [18308, 18344, 19492, 17698, 18213],
 [18213, 18545, 18311, 3610, 18406],
 [14022, 14406, 14405, 4764, 19103],
 [16740, 18308, 18080, 17028, 16337],
 [9017, 16299, 8388, 16321, 16953],
 [18831, 10567, 10598, 10618, 13890],
 [16524, 16506, 4559, 8849, 9473],
 [18943, 15402, 18545, 16389, 15651],
 [13158, 2160, 18938, 8371, 11049],
 [10474, 13889, 6881, 7437, 7438],
 [2382, 2968, 9019, 9300, 2945],
 [18607, 16497, 18308, 18449, 13561],
 [4686, 3609, 18305, 18431, 16368],
 [17756, 10597, 15489, 10570, 8334],
 [7295, 17536, 10474, 6128, 7292],
 [2192, 18448, 18126, 51, 9125],
 [2954, 4004, 5, 16387, 17261],
 [8290, 13211, 17962, 9281, 11341],
 [16389, 8560, 4248, 15402, 16698],
 [19492, 15163, 16790, 17756, 18813],
 [18126, 19532, 19317, 18870, 18251],
 [3608, 17446, 16583, 10

In [262]:
# als
res = model.similar_items(19908)
res

[]

In [255]:
recom = {}

for ind, rec in enumerate(res):
    recom[user_decode[ind]] = [item_decode[x] for x in rec]

In [256]:
# als
reco = []
corr = []

num_to_recom = 5
for user in correct_1:
    try:
        reco.append(recom[user])
        corr.append(correct_1[user])
    except:
        print(user)
        pass

metrics.mapk(reco, corr, 5)

0.007202531645569619

In [188]:
# als
reco = []
corr = []

num_to_recom = 5
for user in correct_1:
    reco.append(recom[user])
    corr.append(correct_1[user])

metrics.mapk(reco, corr, 5)

0.020546413502109707

# inference

In [263]:
user_hist_df = pd.read_csv('../data/raw/movies_dataset_10 months.csv')
user_hist_df = user_hist_df[user_hist_df['episode_id'] == 0]

user_hist_df['event'] = 1
train = user_hist_df[['user_id', 'movie_id', 'event']].drop_duplicates()

In [264]:
user_encode = {u: i for i, u in enumerate(train['user_id'].unique())}
item_encode = {u: i for i, u in enumerate(train['movie_id'].unique())}

user_decode = {v: k for k, v in user_encode.items()}
item_decode = {v: k for k, v in item_encode.items()}

n_users, n_items = len(user_encode), len(item_encode)

train['user_id'] = train['user_id'].apply(lambda x: user_encode[x])
train['movie_id'] = train['movie_id'].apply(lambda x: item_encode[x])

item_user = coo_matrix(
    (train['event'], (train['user_id'], train['movie_id'])), shape=(n_users, n_items))

user_item = coo_matrix(
    (train['event'], (train['movie_id'], train['user_id'])), shape=(n_items, n_users))

In [265]:
# initialize a model
model = implicit.als.AlternatingLeastSquares(factors=5)

# train the model on a sparse matrix of item/user/confidence weights
model.fit(user_item, show_progress=False)

In [116]:
model = implicit.lmf.LogisticMatrixFactorization(factors = 5, random_state = 42)

model.fit(user_item, show_progress=False)

In [267]:
res = model.recommend_all(item_user, N = 10, filter_items = useless_movies, show_progress=False)

In [129]:
res = model.recommend_all(item_user, N = 5, show_progress=False)

In [268]:
recom = {}

for ind, rec in enumerate(res):
    recom[user_decode[ind]] = [item_decode[x] for x in rec]

In [269]:
import sys

PATH = '/Users/danil/Documents/github/sweet_RS/'
sys.path.append(str(PATH))

from src.utils import save_to_pickle

In [270]:
save_to_pickle(recom, '../data/processed/als_2_10.pickle')