In [8]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# -------------------------------
# Step 1: Load the ratings data
# -------------------------------
columns = ['user_id', 'item_id', 'rating', 'timestamp']
ratings = pd.read_csv('u.data', sep='\t', names=columns)

# Create the utility matrix (users as rows, items as columns)
utility_matrix = ratings.pivot_table(index='user_id', columns='item_id', values='rating')

# -------------------------------
# Step 2: Center ratings by user mean
# -------------------------------
user_means = utility_matrix.mean(axis=1)
centered_matrix = utility_matrix.sub(user_means, axis=0)

# Fill missing values with 0 for similarity computation
filled_centered = centered_matrix.fillna(0)

# Compute cosine similarity between users
user_sim = cosine_similarity(filled_centered)

# -------------------------------
# Step 3: Find top 10 similar users to user 1
# -------------------------------
user1_index = utility_matrix.index.get_loc(1)  # correct index of user_id = 1
similarities = list(enumerate(user_sim[user1_index]))
similarities = sorted(similarities, key=lambda x: x[1], reverse=True)

# Exclude user 1 and take top 10
top_10_users = [i for i, sim in similarities[1:11]]
similar_user_ids = [utility_matrix.index[i] for i in top_10_users]  # get actual user_ids

# -------------------------------
# Step 4: Predict rating for item 508
# -------------------------------
item_id = 508
ratings_508 = utility_matrix.loc[similar_user_ids, item_id]
expected_rating = ratings_508.dropna().mean()

# -------------------------------
# Output result
# -------------------------------
print("----- Problem 1 -----")
print("Top 10 similar users to user 1:", similar_user_ids)
print(f"Expected rating of user 1 for item 508: {expected_rating:.2f}")


----- Problem 1 -----
Top 10 similar users to user 1: [773, 868, 592, 880, 429, 276, 916, 222, 457, 8]
Expected rating of user 1 for item 508: 4.20
