In [25]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler
from scipy.sparse.linalg import svds

# Sample dataset
data = {
    'user_id': [1,1,1,2,2,3,3,3,4,4,5,5,5,6,6],
    'movie_id': [101,102,103,101,103,102,103,104,101,102,102,103,104,101,104],
    'rating': [5,4,3,4,5,2,4,4,3,4,5,3,5,4,5]
}
df = pd.DataFrame(data)

# Create user-movie matrix
user_movie_matrix = df.pivot_table(index='user_id', columns='movie_id', values='rating').fillna(0)

# Normalize ratings using MinMaxScaler
scaler = MinMaxScaler()
normalized_ratings = scaler.fit_transform(user_movie_matrix)
normalized_df = pd.DataFrame(normalized_ratings,
                             index=user_movie_matrix.index,
                             columns=user_movie_matrix.columns)

# Build KNN model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(normalized_df)

# Function to get recommendations using KNN
def get_movie_recommendations(user_id, n_recommendations=3):
    distances, indices = model_knn.kneighbors(
        normalized_df.loc[user_id].values.reshape(1, -1),
        n_neighbors=n_recommendations+1
    )
    similar_users = normalized_df.iloc[indices.flatten()[1:]]
    recommended_scores = similar_users.mean(axis=0).sort_values(ascending=False)

    user_rated = df[df['user_id'] == user_id]['movie_id']
    recommended_scores = recommended_scores[~recommended_scores.index.isin(user_rated)]
    return recommended_scores.head(n_recommendations)

# Get KNN recommendations for user 1
print("Top recommendations using KNN for user 1:")
recommendations = get_movie_recommendations(1)
for movie_id, score in recommendations.items():
    print(f"Movie {movie_id} (recommendation score: {score:.2f})")

# ------- SVD-based Recommendations -------

# Create user-movie matrix again for SVD
user_movie_matrix = df.pivot_table(index='user_id', columns='movie_id', values='rating').fillna(0)
matrix = user_movie_matrix.values

# Normalize by user mean
user_ratings_mean = np.mean(matrix, axis=1)
matrix_normalized = matrix - user_ratings_mean.reshape(-1, 1)

# Perform SVD
u, sigma, vt = svds(matrix_normalized, k=2)
sigma = np.diag(sigma)

# Predict ratings
predicted_ratings = np.dot(np.dot(u, sigma), vt) + user_ratings_mean.reshape(-1, 1)
predicted_df = pd.DataFrame(predicted_ratings,
                            index=user_movie_matrix.index,
                            columns=user_movie_matrix.columns)

# Function to get recommendations using SVD
def get_svd_recommendations(user_id, n_recommendations=3):
    user_ratings = predicted_df.loc[user_id]
    user_rated = df[df['user_id'] == user_id]['movie_id']
    recommendations = user_ratings[~user_ratings.index.isin(user_rated)]
    return recommendations.sort_values(ascending=False).head(n_recommendations)

# Get SVD recommendations for user 1
print("\nTop SVD-based recommendations for user 1:")
recommendations = get_svd_recommendations(1)
for movie_id, rating in recommendations.items():
    print(f"Movie {movie_id} (predicted rating: {rating:.2f})")


Top recommendations using KNN for user 1:
Movie 104 (recommendation score: 0.33)

Top SVD-based recommendations for user 1:
Movie 104 (predicted rating: -0.06)
