In [1]:
import pandas as pd
import numpy as np
import pickle

from scipy.sparse import csr_matrix
from sklearn.decomposition import NMF
from implicit.als import AlternatingLeastSquares

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df_cf = pd.read_parquet(r'/mnt/c/Projects/Recomender System/data/processed/ratings_for_cf.parquet')
print(df_cf.shape)
df_cf.head()

(24945870, 3)


Unnamed: 0,userId,movieId,rating
0,1,296,5.0
1,1,306,3.5
2,1,307,5.0
3,1,665,5.0
4,1,899,3.5


In [3]:
user_ids = df_cf["userId"].unique()
movie_ids = df_cf["movieId"].unique()
user_id_to_idx = {u:i for i,u in enumerate(user_ids)}
movie_id_to_idx = {m:i for i,m in enumerate(movie_ids)}

In [4]:
rows = df_cf["userId"].map(user_id_to_idx).astype("int32")
cols = df_cf["movieId"].map(movie_id_to_idx).astype("int32")
data = df_cf["rating"].astype("float32")
R = csr_matrix((data, (rows, cols)), shape=(len(user_ids), len(movie_ids)))

In [5]:
model = AlternatingLeastSquares(factors=64, regularization=0.05, iterations=20, random_state=42)
model.fit(R.T)

  check_blas_config()
100%|██████████| 20/20 [03:11<00:00,  9.60s/it]


In [22]:
idx_to_movie_id = {i:m for m,i in movie_id_to_idx.items()}

def _normalize_recs(recs):
    if isinstance(recs, tuple) and len(recs) == 2:
        item_ids, scores = recs
        return list(zip(np.asarray(item_ids).tolist(), np.asarray(scores).tolist()))
    return recs

def recommend_for_user(user_id, top_n=10):
    if user_id not in user_id_to_idx:
        return "User tidak ditemukan"

    uidx = user_id_to_idx[user_id]
    user_items = R[uidx]

    recs = model.recommend(
        uidx,
        user_items,
        N=top_n,
        filter_already_liked_items=True
    )

    rows_out = _normalize_recs(recs)

    out = []
    for i, s in rows_out:
        i = int(i)
        if i in idx_to_movie_id:
            movie_id = idx_to_movie_id[i]
        else:
            movie_id = i
        out.append((movie_id, float(s)))

    return pd.DataFrame(out, columns=["movieId", "score"])

In [30]:
df_movies = pd.read_csv(r"/mnt/c/Projects/Recomender System/data/raw/movies.csv")
def recommend_with_title(user_id, top_n=10):
    rec = recommend_for_user(user_id, top_n)
    if isinstance(rec, str):
        return rec
    
    out = rec.merge(
        df_movies[["movieId","title","genres"]],
        on="movieId",
        how="left"
    )

    out["title"] = out["title"].fillna("Unknown Title")
    out["genres"] = out["genres"].fillna("Unknown Genre")

    return out

In [31]:
recommend_with_title(1, 5)

Unnamed: 0,movieId,score,title,genres
0,159379,1.488745,Unknown Title,Unknown Genre
1,135973,1.484348,Unknown Title,Unknown Genre
2,144300,1.48083,As the Light Goes Out (2014),Action|Drama
3,136309,1.479399,Scooby-Doo! Moon Monster Madness (2015),Adventure|Animation|Children
4,147951,1.437021,Unknown Title,Unknown Genre


In [26]:
df_movies["movieId"].max(), df_movies["movieId"].nunique()

(np.int64(209171), 62423)

In [37]:
with open(r'/mnt/c/Projects/Recomender System/models/als_implicit.pkl', 'wb') as f:
    pickle.dump(model, f)