In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
mov=pd.read_csv("movies.csv")
rat=pd.read_csv("ratings.csv")

In [None]:
mov.head()

In [None]:
rat.head()

# Exploratory Data Analysis


In [None]:
mov.info()

In [None]:
rat.info()

** No null values in both movies and ratings files

In [None]:
mov[mov['title'].duplicated()]

In [None]:
mov.drop(mov.loc[mov['movieId']==26982].index,inplace=True)
mov.drop(mov.loc[mov['movieId']==64997].index,inplace=True)


In [None]:
mov.nunique()

In [None]:
fd=pd.merge(mov,rat,how='left',on='movieId')
fd.drop('timestamp',axis=1,inplace=True)
fd

In [None]:
fd.drop_duplicates(inplace=True)
fd

In [None]:
fd.info()

In [None]:
fd.describe()

 Highest Rated Movie


In [None]:
fd.head(2)

In [None]:
high_rate_movie = fd[['title','rating']].groupby('title')['rating'].sum().sort_values(ascending=False)
high_rate_movie.head(10)
Top10=high_rate_movie.nlargest(10)
Top10

In [None]:
plt.figure(figsize=(30,10))
plt.bar(Top10.index,Top10.values)
plt.title('Top 10 highly rated movies',fontsize=20)
plt.xlabel('Movie Names',fontsize=20)
plt.xticks(fontsize=15,rotation=90)
plt.yticks(fontsize=15)
plt.ylabel('Ratings Sum',fontsize=20)
plt.show

Number of ratings for each movie

In [None]:
no_rat=fd.groupby('title')['rating'].count().sort_values(ascending=False)
Top10_rat=no_rat.nlargest(10)
Top10_rat

In [None]:
plt.figure(figsize=(30,10))
plt.bar(Top10_rat.index,Top10_rat.values)
plt.title('Top 10 most rated movies',fontsize=20)
plt.xlabel('Movie Names',fontsize=20)
plt.xticks(fontsize=15,rotation=90)
plt.yticks(fontsize=15)
plt.ylabel('Number of Ratings',fontsize=20)
plt.show

# Creation of Recommendation System


Popularity Based Recommendation System at Genre Level

In [None]:
fd.info()

In [None]:
def popularity_recommender(genre, min_reviews, num_recommendations):
    genre_movies = fd[(fd['genres'] == genre) & (fd['rating'] >= min_reviews)]
    
    if genre_movies.empty:
        return "No movies found for the given genre and minimum review threshold."
    
    sorted_movies = genre_movies.sort_values(by='rating', ascending=False)
  
    top_movies = sorted_movies.head(num_recommendations)
    
    return top_movies[['title', 'rating', 'userId']]


In [None]:
fd['genres'].value_counts()

In [None]:
genre='Drama|Romance'
min_reviews=5
num_recommendations=3

Top=popularity_recommender(genre, min_reviews, num_recommendations)
Top

In [None]:
fd.head()

Collaborative based recommender system which recommends top N movies based on “K” similar users for a target user “u"

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors


user_movie_matrix = fd.pivot_table(index='userId', columns='title', values='rating').fillna(0)


knn_model = NearestNeighbors(metric='cosine', algorithm='brute')
knn_model.fit(user_movie_matrix)

def collaborative_recommender(user_id, num_recommendations, k):

    target_user_index = user_movie_matrix.index.get_loc(user_id)

    _, similar_users_indices = knn_model.kneighbors(user_movie_matrix.iloc[target_user_index].values.reshape(1, -1), n_neighbors=k+1)

    similar_users_indices = similar_users_indices.flatten()

    movies_watched = user_movie_matrix.iloc[target_user_index][user_movie_matrix.iloc[target_user_index] > 0].index.tolist()

    recommended_movies = []
    for similar_user_index in similar_users_indices:
        similar_user_movies = user_movie_matrix.iloc[similar_user_index][user_movie_matrix.iloc[similar_user_index] > 0].index.tolist()
        recommended_movies.extend([movie for movie in similar_user_movies if movie not in movies_watched])

    recommended_movies = list(set(recommended_movies))[:num_recommendations]

    return pd.DataFrame({'title': recommended_movies})




In [None]:
user_id=1
num_recommendations=5
k=100
Recommendations=collaborative_recommender(user_id, num_recommendations, k)
Recommendations