In [2]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
import matplotlib.pyplot as plt

In [3]:

def load_data():
    ratings = pd.read_csv('ml-100k/ml-100k/u.data', sep='\t', names=['userId', 'movieId', 'rating', 'timestamp'])
    movies = pd.read_csv('ml-100k/ml-100k/u.item', sep='|', names=['movieId', 'title'], usecols=[0, 1], encoding='latin-1')
    return ratings, movies

In [4]:
def preprocess_data(ratings):
    user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
    return user_item_matrix

In [5]:
def train_model(ratings):
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
    trainset, testset = train_test_split(data, test_size=0.2)
    
    algo = SVD()
    algo.fit(trainset)
    predictions = algo.test(testset)
    return predictions

In [6]:
def get_recommendations(predictions, n = 10):
    top_n = {}
    for uid, iid, true_r, est, _ in predictions:
        if uid not in top_n:
            top_n[uid] = []
        top_n[uid].append((iid, est))
    
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    
    return top_n

In [11]:
def plot_rating_distribution(ratings):
    plt.hist(ratings['rating'], bins=5, edgecolor='black')
    plt.title('Distribution of Ratings')
    plt.xlabel('Rating')
    plt.ylabel('Frequency')
    plt.show()

In [12]:
user_id = int(input("Enter userId: "))
ratings, movies = load_data()
# print(ratings.head())
# print(movies.head())
user_item_matrix = preprocess_data(ratings)
# print(user_item_matrix.head())
# plot_rating_distribution(ratings)
predictions = train_model(ratings)
# print("RMSE:", accuracy.rmse(predictions))
top_n = get_recommendations(predictions)
if user_id in top_n:
    recommended_movies = top_n[user_id]
    recommended_movie_ids = [movie[0] for movie in recommended_movies]
    recommended_titles = movies[movies['movieId'].isin(recommended_movie_ids)]
    print(f"Recommended Movies for User {user_id}:")
    print(recommended_titles)
else:
    print(f"No recommendations available for User {user_id}.")


Enter userId:  10


Recommended Movies for User 10:
     movieId                                              title
58        59                           Three Colors: Red (1994)
97        98                   Silence of the Lambs, The (1991)
126      127                              Godfather, The (1972)
169      170                             Cinema Paradiso (1988)
181      182                                  GoodFellas (1990)
198      199               Bridge on the River Kwai, The (1957)
473      474  Dr. Strangelove or: How I Learned to Stop Worr...
479      480                          North by Northwest (1959)
518      519           Treasure of the Sierra Madre, The (1948)
655      656                                           M (1931)
