<a href="https://colab.research.google.com/github/fahadshakeel23/DataScience/blob/main/Collaborative_filtering_movie.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Step 1: Import Libraries & Load Dataset

In [7]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity


In [3]:
ratings_url = "https://files.grouplens.org/datasets/movielens/ml-100k/u.data"

columns = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv(ratings_url, sep='\t', names=columns)
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [4]:
movies_url = "https://files.grouplens.org/datasets/movielens/ml-100k/u.item"

movies = pd.read_csv(movies_url, sep='|', encoding = 'latin-1', header=None, usecols=[0, 1], names=['movie_id', 'title'])
movies.head()

Unnamed: 0,movie_id,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


Step 2: Prepare User-Movie Ratings Matrix

In [5]:
user_movie_matrix = ratings.pivot_table(index='user_id', columns='movie_id', values='rating').fillna(0)
user_movie_matrix.shape

(943, 1682)

Step 3: Compute User Similarities

In [8]:
user_similarity = cosine_similarity(user_movie_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)
user_similarity_df.head()

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.166931,0.04746,0.064358,0.378475,0.430239,0.440367,0.319072,0.078138,0.376544,...,0.369527,0.119482,0.274876,0.189705,0.197326,0.118095,0.314072,0.148617,0.179508,0.398175
2,0.166931,1.0,0.110591,0.178121,0.072979,0.245843,0.107328,0.103344,0.161048,0.159862,...,0.156986,0.307942,0.358789,0.424046,0.319889,0.228583,0.22679,0.161485,0.172268,0.105798
3,0.04746,0.110591,1.0,0.344151,0.021245,0.072415,0.066137,0.08306,0.06104,0.065151,...,0.031875,0.042753,0.163829,0.069038,0.124245,0.026271,0.16189,0.101243,0.133416,0.026556
4,0.064358,0.178121,0.344151,1.0,0.031804,0.068044,0.09123,0.18806,0.101284,0.060859,...,0.052107,0.036784,0.133115,0.193471,0.146058,0.030138,0.196858,0.152041,0.170086,0.058752
5,0.378475,0.072979,0.021245,0.031804,1.0,0.237286,0.3736,0.24893,0.056847,0.201427,...,0.338794,0.08058,0.094924,0.079779,0.148607,0.071459,0.239955,0.139595,0.152497,0.313941


Step 4: Recommendation Function

In [11]:
def recommend_movies(user_id, num_recommendations=5):
    #FInd movies rated by the user
    user_ratings = user_movie_matrix.loc[user_id]

    #get similar users scores
    sim_scores = user_similarity_df[user_id]

    #Weighted reccomendation scores for unrated movies
    not_rated = user_ratings[user_ratings == 0].index
    scores = user_movie_matrix.loc[:, not_rated].T.dot(sim_scores) / sim_scores.sum()

    # top recommendations
    top_movie_ids = scores.sort_values(ascending=False).index[:num_recommendations]
    return movies[movies['movie_id'].isin(top_movie_ids)][['movie_id', 'title']]

#Example: recommendations for user 1
recommend_movies(1)



Unnamed: 0,movie_id,title
285,286,"English Patient, The (1996)"
287,288,Scream (1996)
317,318,Schindler's List (1993)
356,357,One Flew Over the Cuckoo's Nest (1975)
422,423,E.T. the Extra-Terrestrial (1982)
