In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD

movies = pd.read_csv("u.item",
                     sep='|', encoding='latin-1', header=None, usecols=[0,1],
                     names=['movie_id', 'title'])
ratings = pd.read_csv("u.data",
                      sep='\t', header=None, names=['user_id', 'movie_id', 'rating', 'timestamp'])


train, test = train_test_split(ratings, test_size=0.3, random_state=42)

user_item_train = train.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)
user_item_test  = test.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)



In [27]:

for m in movies['movie_id']:
    if m not in user_item_train.columns:
        user_item_train[m] = 0
        user_item_test[m] = 0

user_item_train = user_item_train.sort_index(axis=1)
user_item_test = user_item_test.sort_index(axis=1)

user_sim = cosine_similarity(user_item_train)
item_sim = cosine_similarity(user_item_train.T)


In [28]:

def recommend_user(user_id, top_n=10):
    user_idx = user_id - 1
    sim_scores = user_sim[user_idx]
    user_ratings = user_item_train.loc[user_id].values
    pred_ratings = sim_scores.dot(user_item_train.values) / np.abs(sim_scores).sum()
    pred_ratings[user_ratings > 0] = 0
    top_idx = np.argsort(pred_ratings)[::-1][:top_n]
    return movies[movies['movie_id'].isin(user_item_train.columns[top_idx])]['title'].values

def recommend_item(user_id, top_n=10):
    user_ratings = user_item_train.loc[user_id].values
    pred_ratings = item_sim.dot(user_ratings)
    pred_ratings[user_ratings > 0] = 0
    top_idx = np.argsort(pred_ratings)[::-1][:top_n]
    return movies[movies['movie_id'].isin(user_item_train.columns[top_idx])]['title'].values


In [29]:
svd = TruncatedSVD(n_components=20, random_state=42)
svd_matrix = svd.fit_transform(user_item_train)
user_features = svd_matrix
item_features = svd.components_.T

def recommend_svd(user_id, top_n=10):
    user_idx = user_id - 1
    pred_ratings = user_features[user_idx].dot(item_features.T)
    user_ratings = user_item_train.loc[user_id].values
    pred_ratings[user_ratings > 0] = 0
    top_idx = np.argsort(pred_ratings)[::-1][:top_n]
    return movies[movies['movie_id'].isin(user_item_train.columns[top_idx])]['title'].values


In [30]:
def precision_at_k(recommend_func, k):
    precisions = []
    for user_id in user_item_test.index:
        true_items = set(user_item_test.loc[user_id][user_item_test.loc[user_id] > 0].index)
        if not true_items:
            continue
        recommended = recommend_func(user_id, top_n=k)
        recommended_ids = set(user_item_train.columns[user_item_train.columns.isin([movies[movies['title']==t]['movie_id'].values[0] for t in recommended])])
        precisions.append(len(true_items & recommended_ids)/k)
    if len(precisions) == 0:
        return 0.0
    return np.mean(precisions)

In [31]:
Ks = [10, 20, 50]
results = {}
results['User-based CF'] = {k: precision_at_k(recommend_user, k) for k in Ks}
results['Item-based CF'] = {k: precision_at_k(recommend_item, k) for k in Ks}
results['SVD'] = {k: precision_at_k(recommend_svd, k) for k in Ks}

df_results = pd.DataFrame(results).T
df_results.index.name = "Model"
df_results.columns.name = "K"

print("=== Precision@K Results ===\n")
print(df_results)

=== Precision@K Results ===

K                    10        20        50
Model                                      
User-based CF  0.344327  0.281548  0.197794
Item-based CF  0.375716  0.303977  0.213404
SVD            0.413256  0.345387  0.245599
