In [1]:
import pandas as pd
ratings = pd.read_csv('u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])


movies = pd.read_csv('u.item', sep='|', encoding='latin-1', names=[
    'item_id', 'title', 'release_date', 'video_release_date', 'IMDb_URL',
    'unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy',
    'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
    'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
])

# Merge ratings with movie titles
df = pd.merge(ratings, movies[['item_id', 'title']], on='item_id')

In [2]:
df.head()

Unnamed: 0,user_id,item_id,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


# **Data Cleaning**

In [3]:
df.isnull().sum()

Unnamed: 0,0
user_id,0
item_id,0
rating,0
timestamp,0
title,0


In [4]:
df.duplicated().sum()

np.int64(0)

In [5]:
user_item_matrix = df.pivot_table(index='user_id', columns='title', values='rating')


In [6]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Replace null with 0 for cosine similarity
user_item_filled = user_item_matrix.fillna(0)

user_similarity = cosine_similarity(user_item_filled)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)


In [7]:
def get_top_n_recommendations(user_id, n=5):
    similar_users = user_similarity_df[user_id].drop(user_id).sort_values(ascending=False)

    weighted_ratings = pd.Series(dtype='float64')
    for similar_user, similarity in similar_users.items():
        sim_user_ratings = user_item_matrix.loc[similar_user]
        weighted_ratings = weighted_ratings.add(sim_user_ratings * similarity, fill_value=0)

    # Normalize by sum of similarities
    sum_similarities = similar_users.sum()
    weighted_ratings /= sum_similarities

    watched_movies = user_item_matrix.loc[user_id].dropna().index
    recommendations = weighted_ratings.drop(watched_movies, errors='ignore')

    return recommendations.sort_values(ascending=False).head(n)


In [8]:
user_id = 196
recommended_movies = get_top_n_recommendations(user_id, n=5)
print(f"Recommended movies for user {user_id}:\n", recommended_movies)


Recommended movies for user 196:
 title
Star Wars (1977)                  2.950404
Fargo (1996)                      2.675448
Return of the Jedi (1983)         2.341793
Raiders of the Lost Ark (1981)    2.178680
Godfather, The (1972)             2.124019
dtype: float64


In [9]:
def precision_at_k(user_id, k=5):
    recommended = get_top_n_recommendations(user_id, n=k).index
    actual = user_item_matrix.loc[user_id].dropna().sort_values(ascending=False).head(k).index

    true_positives = len(set(recommended).intersection(set(actual)))
    return true_positives / k


In [17]:
user_id = 196
evaluate = precision_at_k(user_id, k=5)
print(f"Precision at 5 for user {user_id}: {evaluate}")


Precision at 5 for user 196: 0.0


# **Bouns**

In [12]:
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=20)
latent_matrix = svd.fit_transform(user_item_filled)

new_matrix = np.dot(latent_matrix, svd.components_)
new_df = pd.DataFrame(new_matrix, index=user_item_filled.index, columns=user_item_filled.columns)

def get_svd_recommendations(user_id, n=5):
    user_row = new_df.loc[user_id]
    watched = user_item_matrix.loc[user_id].dropna().index
    user_row = user_row.drop(watched, errors='ignore')
    return user_row.sort_values(ascending=False).head(n)


In [13]:
user_id = 196
recommended_movies = get_svd_recommendations(user_id, n=5)
print(f"Recommended movies for user {user_id}:\n", recommended_movies)


Recommended movies for user 196:
 title
When Harry Met Sally... (1989)    1.653425
Fargo (1996)                      1.475086
Sleepless in Seattle (1993)       1.437616
Young Frankenstein (1974)         1.381451
Dave (1993)                       1.324838
Name: 196, dtype: float64
