In [1]:
import random
import pandas as pd
import numpy as np

import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
from sklearn.preprocessing import MinMaxScaler

#-------------------------
# LOAD AND PREP THE DATA
#-------------------------
 
ratings = pd.read_csv("datasets/100k/ratings.csv")
ratings.dropna(inplace = True)
ratings.drop(columns = "timestamp", inplace = True)
print(ratings.shape)
 
movies = pd.read_csv("datasets/100k/movies.csv")
movies.dropna(inplace = True)
movies.drop(columns = "genres", inplace = True)
print(movies.shape)

data = ratings.join(movies.set_index("movieId"), on = "movieId")
print(data.shape)
print("Users:", data.userId.unique().size)
print("Movies:", data.movieId.unique().size)


# Convert movie and users into numerical IDs
data['user_id'] = data.userId.astype("category").cat.codes
data['movie_id'] = data.movieId.astype("category").cat.codes
 
# Create a lookup frame so we can get the movie titles back in 
# readable form later.
item_lookup = data[['movie_id', 'title']].drop_duplicates()
item_lookup['movie_id'] = item_lookup.movie_id.astype(str)
 
data = data.drop(['userId', 'movieId'], axis=1)
 
# Drop any rows that have rating 0
data = data.loc[data.rating != 0]
 
# Create lists of all users, artists and plays
users = list(np.sort(data.user_id.unique()))
movies = list(np.sort(data.movie_id.unique()))
ratings = list(data.rating)
 
# Get the rows and columns for our new matrix
rows = data.user_id.astype(int)
cols = data.movie_id.astype(int)
 
# Contruct a sparse matrix for our users and items containing number of plays
data_sparse = sparse.csr_matrix((ratings, (rows, cols)), shape=(len(users), len(movies)))
data_sparse.shape

(100836, 3)
(9742, 2)
(100836, 4)
Users: 610
Movies: 9724


(610, 9724)

In [2]:
import als_recommender as als

model = als.ALSRecommender(iterations = 10, latent = 10, alpha_val = 40, regularizer = 0.1)
model

<als_recommender.ALSRecommender at 0x218df07ab48>

In [3]:
model.fit(data_sparse)

iteration 1 of 10
iteration 2 of 10
iteration 3 of 10
iteration 4 of 10
iteration 5 of 10
iteration 6 of 10
iteration 7 of 10
iteration 8 of 10
iteration 9 of 10
iteration 10 of 10


(<610x10 sparse matrix of type '<class 'numpy.float64'>'
 	with 6100 stored elements in Compressed Sparse Row format>,
 <9724x10 sparse matrix of type '<class 'numpy.float64'>'
 	with 97240 stored elements in Compressed Sparse Row format>)

In [4]:
model.recommend(10, data_sparse, item_lookup, 10)

Unnamed: 0,movie,score
0,"Specialist, The (1994)",1.0
1,Eraser (1996),0.968935
2,Showgirls (1995),0.967016
3,Judge Dredd (1995),0.963066
4,City Slickers II: The Legend of Curly's Gold (...,0.941959
5,Johnny Mnemonic (1995),0.931688
6,"Bronx Tale, A (1993)",0.929558
7,Bad Boys (1995),0.927899
8,Demolition Man (1993),0.927359
9,Congo (1995),0.920243


## Making a Grid search:

In [5]:
import implicit
from implicit.evaluation import train_test_split, precision_at_k, mean_average_precision_at_k, AUC_at_k, ndcg_at_k
from sklearn.model_selection import GridSearchCV

param_grid = [
    {
        "iterations": [5, 10, 20, 30],
        "latent": [5, 10, 20, 30],
        "alpha_val": [15, 25, 40],
        "regularizer": [0.1, 0.01, 0.001],
    }
]

In [None]:
print("best parameters:\n")
print(als_gridmodel.best_params_)