# Task 4: Recommendation System using User-Based Collaborative Filtering

In [6]:
# 1. Import Libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error

In [7]:
# 2. Create Dataset
data = {
    'User': ['Alice', 'Alice', 'Alice', 'Bob', 'Bob', 'Bob', 'Carol', 'Carol', 'Dave', 'Dave'],
    'Item': ['Laptop', 'Phone', 'Tablet', 'Laptop', 'Tablet', 'Monitor', 'Phone', 'Monitor', 'Laptop', 'Phone'],
    'Rating': [5, 3, 4, 4, 2, 5, 2, 4, 5, 3]
}

df = pd.DataFrame(data)
print("Dataset:\n", df)

Dataset:
     User     Item  Rating
0  Alice   Laptop       5
1  Alice    Phone       3
2  Alice   Tablet       4
3    Bob   Laptop       4
4    Bob   Tablet       2
5    Bob  Monitor       5
6  Carol    Phone       2
7  Carol  Monitor       4
8   Dave   Laptop       5
9   Dave    Phone       3


In [8]:
# 3. Create User-Item Matrix
user_item_matrix = df.pivot_table(index='User', columns='Item', values='Rating').fillna(0)
print("\nUser-Item Matrix:\n", user_item_matrix)



User-Item Matrix:
 Item   Laptop  Monitor  Phone  Tablet
User                                 
Alice     5.0      0.0    3.0     4.0
Bob       4.0      5.0    0.0     2.0
Carol     0.0      4.0    2.0     0.0
Dave      5.0      0.0    3.0     0.0


In [9]:
# 4. Compute User Similarity Matrix (Cosine Similarity)
user_similarity = pd.DataFrame(cosine_similarity(user_item_matrix),
                               index=user_item_matrix.index,
                               columns=user_item_matrix.index)
print("\nUser Similarity Matrix:\n", user_similarity)


User Similarity Matrix:
 User      Alice       Bob     Carol      Dave
User                                         
Alice  1.000000  0.590292  0.189737  0.824621
Bob    0.590292  1.000000  0.666667  0.511310
Carol  0.189737  0.666667  1.000000  0.230089
Dave   0.824621  0.511310  0.230089  1.000000


In [10]:
# 5. Predict Ratings
predicted_ratings = pd.DataFrame(index=user_item_matrix.index, columns=user_item_matrix.columns)

for user in user_item_matrix.index:
    for item in user_item_matrix.columns:
        # Users who have rated the item
        rated_by = user_item_matrix[item] > 0
        # Compute weighted sum of ratings
        numerator = sum(user_similarity.loc[user, other] * user_item_matrix.loc[other, item]
                        for other in user_item_matrix.index if rated_by[other] and other != user)
        denominator = sum(abs(user_similarity.loc[user, other])
                          for other in user_item_matrix.index if rated_by[other] and other != user)
        if denominator == 0:
            predicted_ratings.loc[user, item] = 0
        else:
            predicted_ratings.loc[user, item] = numerator / denominator

predicted_ratings = predicted_ratings.astype(float)
print("\nPredicted Ratings:\n", predicted_ratings)



Predicted Ratings:
 Item     Laptop   Monitor     Phone    Tablet
User                                         
Alice  4.582807  4.756757  2.812949  2.000000
Bob    5.000000  4.000000  2.622983  4.000000
Carol  4.386405  5.000000  3.000000  2.443101
Dave   4.617263  4.689655  2.781846  3.234526


In [11]:
# 6. Recommend Top Items for Each User
top_n = 2
print("\nTop Recommendations:")
for user in predicted_ratings.index:
    recommendations = predicted_ratings.loc[user].sort_values(ascending=False)
    recommendations = recommendations[user_item_matrix.loc[user] == 0]  # Only items not rated yet
    print(f"\n{user}:")
    print(recommendations.head(top_n))



Top Recommendations:

Alice:
Item
Monitor    4.756757
Name: Alice, dtype: float64

Bob:
Item
Phone    2.622983
Name: Bob, dtype: float64

Carol:
Item
Laptop    4.386405
Tablet    2.443101
Name: Carol, dtype: float64

Dave:
Item
Monitor    4.689655
Tablet     3.234526
Name: Dave, dtype: float64


In [12]:
# 7. Compute RMSE on Known Ratings
original_ratings = []
pred_ratings = []

for i, row in df.iterrows():
    user = row['User']
    item = row['Item']
    original_ratings.append(row['Rating'])
    pred_ratings.append(predicted_ratings.loc[user, item])

rmse = np.sqrt(mean_squared_error(original_ratings, pred_ratings))
print(f"\nRMSE on Known Ratings: {rmse:.3f}")



RMSE on Known Ratings: 1.114
