In [1]:
import pandas as pd

In [4]:
df = pd.read_csv("../data/item_metadata.csv", nrows=50_000)

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

tfidf = TfidfVectorizer(max_features=10000)
tfidf_mat = tfidf.fit_transform(df["title"].fillna("") + " " + df["store"].fillna("") + " " + df["description"].fillna(""))

In [None]:
# Merge train ratings with TF-IDF item matrix
from scipy.sparse import vstack

# Suppose you have: user_id, item_id, rating in train.csv
# And: item_id index matches the row in tfidf_matrix

def build_user_profile(user_id, train_df, tfidf_matrix):
    user_rated = train_df[train_df["user_id"] == user_id]
    indices = user_rated["item_id"].values
    ratings = user_rated["rating"].values

    # Get TF-IDF rows of rated items
    item_vectors = tfidf_matrix[indices]

    # Weight by rating
    weighted_vectors = item_vectors.multiply(ratings[:, None])

    # Average to get user profile
    profile = weighted_vectors.mean(axis=0)
    return profile


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_for_user(user_id, train_df, tfidf_matrix, top_k=10):
    profile = build_user_profile(user_id, train_df, tfidf_matrix)
    scores = cosine_similarity(profile, tfidf_matrix).flatten()

    # Remove items the user already rated
    seen = set(train_df[train_df["user_id"] == user_id]["item_id"])
    recommendations = [
        i for i in scores.argsort()[::-1]
        if i not in seen
    ][:top_k]

    return recommendations
