<a href="https://colab.research.google.com/github/avulahemalatha-12/Movie-Recommendation-system/blob/main/Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd

# Load the dataset
ratings = pd.read_csv('/content/ratings.csv')
movies = pd.read_csv('/content/movies.csv')

print(ratings.head())
print(movies.head())

   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  


In [7]:
# Create a user-item matrix
user_movie_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

print(user_movie_matrix.head())

movieId  1       2       3       4       5       6       7       8       \
userId                                                                    
1           4.0     0.0     4.0     0.0     0.0     4.0     0.0     0.0   
2           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
3           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
4           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
5           4.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   

movieId  9       10      ...  193565  193567  193571  193573  193579  193581  \
userId                   ...                                                   
1           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
2           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
3           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
4           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0

In [8]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Calculate cosine similarity between users
user_similarity = cosine_similarity(user_movie_matrix)

# Convert to DataFrame for easier handling
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)

print(user_similarity_df.head())

userId       1         2         3         4         5         6         7    \
userId                                                                         
1       1.000000  0.027283  0.059720  0.194395  0.129080  0.128152  0.158744   
2       0.027283  1.000000  0.000000  0.003726  0.016614  0.025333  0.027585   
3       0.059720  0.000000  1.000000  0.002251  0.005020  0.003936  0.000000   
4       0.194395  0.003726  0.002251  1.000000  0.128659  0.088491  0.115120   
5       0.129080  0.016614  0.005020  0.128659  1.000000  0.300349  0.108342   

userId       8         9         10   ...       601       602       603  \
userId                                ...                                 
1       0.136968  0.064263  0.016875  ...  0.080554  0.164455  0.221486   
2       0.027257  0.000000  0.067445  ...  0.202671  0.016866  0.011997   
3       0.004941  0.000000  0.000000  ...  0.005048  0.004892  0.024992   
4       0.062969  0.011361  0.031163  ...  0.085938  0.128273  0

In [9]:
# Provide Ratings for the new user
new_user_ratings = {
    1: 5,
    2: 3,
    3: 4
}

# Create a DataFrame for the new user
new_user_df = pd.DataFrame(new_user_ratings.items(), columns=['movieId', 'rating'])
new_user_df['userId'] = 0

# Append the new user ratings to the original ratings DataFrame
all_ratings = pd.concat([ratings, new_user_df[['userId', 'movieId', 'rating']]], ignore_index=True)

# Create a new user-item matrix including the new user
new_user_movie_matrix = all_ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Calculate cosine similarity for the new user
new_user_similarity = cosine_similarity(new_user_movie_matrix)

# Get the similarity scores for the new user
new_user_similarities = new_user_similarity[0]

# Get the indices of the most similar users
similar_users_indices = np.argsort(new_user_similarities)[::-1][1:]
similar_users_indices = similar_users_indices[similar_users_indices < user_movie_matrix.shape[0]]

# Get the ratings of the most similar users using the original user_movie_matrix and the filtered indices
similar_users_ratings = user_movie_matrix.iloc[similar_users_indices]

# Calculate weighted ratings for each movie
weighted_ratings = similar_users_ratings.T.dot(new_user_similarities[similar_users_indices])
recommendations = weighted_ratings / np.array([np.abs(new_user_similarities[similar_users_indices]).sum()])

# Get the top 5 movie recommendations
top_recommendations = recommendations.sort_values(ascending=False).head(5)

# Get movie titles for the recommended movie IDs
recommended_movie_ids = top_recommendations.index
recommended_movies = movies[movies['movieId'].isin(recommended_movie_ids)]

print("Recommended Movies for the new user:")
print(recommended_movies[['movieId', 'title']])

Recommended Movies for the new user:
     movieId                                      title
224      260  Star Wars: Episode IV - A New Hope (1977)
257      296                        Pulp Fiction (1994)
277      318           Shawshank Redemption, The (1994)
314      356                        Forrest Gump (1994)
510      593           Silence of the Lambs, The (1991)
