# Objective

1.   Find Out the list of most popular and liked genre
2.   Create a model that finds the best suited movie for  one user in every genre
3.   Find what Genre Movies have received the best and worst ratings based on User ratings.



  The following dataset has two files with movie and rating databases with ID, Rating, Genre, and Movie name.

The primary analysis shows that the collaborative analysis of the genre as per user ratings and hence we need to use recommendation style using SVD

In [None]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse.linalg import svds

In [None]:
# Import Datasets

movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

In [15]:
movies.info() # Getting the information of Movies dataset

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27278 entries, 0 to 27277
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  27278 non-null  int64 
 1   title    27278 non-null  object
 2   genres   27278 non-null  object
dtypes: int64(1), object(2)
memory usage: 639.5+ KB


In [20]:
ratings.info() # Getting the information of Ratings dataset

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048575 entries, 0 to 1048574
Data columns (total 4 columns):
 #   Column     Non-Null Count    Dtype  
---  ------     --------------    -----  
 0   userId     1048575 non-null  int64  
 1   movieId    1048575 non-null  int64  
 2   rating     1048575 non-null  float64
 3   timestamp  1048575 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 32.0 MB


In [24]:
ratings.isnull().sum() # Check for null values

Unnamed: 0,0
userId,0
movieId,0
rating,0
timestamp,0


In [25]:
movies.isnull().sum() #Check for null values

Unnamed: 0,0
movieId,0
title,0
genres,0


In [26]:
movies.duplicated().sum() # Check for duplicate values

0

In [27]:
ratings.duplicated().sum() # Check for duplicate values

0

In [28]:
# Merging two datasets
df = pd.merge(movies, ratings, on='movieId')

In [30]:
user_movie_matrix = df.pivot_table(index='userId', columns='movieId', values='rating').fillna(0) #Create a user-movie matrix

In [31]:
matrix = user_movie_matrix.values # Conversion of matrix to numpy array

In [32]:
# Normalize by sutracting mean
user_ratings_mean = np.mean(matrix, axis=1)
matrix_norm = matrix - user_ratings_mean.reshape(-1, 1)

In [33]:
# Perform SVD
U, sigma, Vt = svds(matrix_norm, k=50) # Number of singular values and vectors to keep
sigma = np.diag(sigma)

In [34]:
# Reconstruct the matrix
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns=user_movie_matrix.columns)

In [35]:
def recommend_movies(predictions_df, user_id, movies_df, original_ratings_df, num_recommendations=5):
    # Get and sort the user's predictions
    user_row_number = user_id - 1  # UserID starts at 1, not 0
    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False)

    # Get the user's data and merge in the movie information
    user_data = original_ratings_df[original_ratings_df.userId == user_id]
    user_full = user_data.merge(movies_df, how='left', on='movieId').sort_values(['rating'], ascending=False)

    # Recommend the highest predicted rating movies that the user hasn't seen yet
    recommendations = (movies_df[~movies_df['movieId'].isin(user_full['movieId'])].
                       merge(pd.DataFrame(sorted_user_predictions).reset_index(), how='left',
                             on='movieId').
                       rename(columns={user_row_number: 'Predictions'}).
                       sort_values('Predictions', ascending=False).
                       iloc[:num_recommendations, :-1])

    return user_full, recommendations

# Example usage for user with userId 1
already_rated, predictions = recommend_movies(preds_df, 1, movies, ratings, 10)
print(predictions)


      movieId                                              title  \
1141     1197                         Princess Bride, The (1987)   
2404     2571                                 Matrix, The (1999)   
1149     1210  Star Wars: Episode VI - Return of the Jedi (1983)   
6610     6874                           Kill Bill: Vol. 1 (2003)   
1076     1127                                  Abyss, The (1989)   
1255     1339             Dracula (Bram Stoker's Dracula) (1992)   
3593     3793                                       X-Men (2000)   
1146     1206                         Clockwork Orange, A (1971)   
8018     8874                           Shaun of the Dead (2004)   
4664     4886                              Monsters, Inc. (2001)   

                                           genres  
1141      Action|Adventure|Comedy|Fantasy|Romance  
2404                       Action|Sci-Fi|Thriller  
1149                      Action|Adventure|Sci-Fi  
6610                        Action|Crime|Th

In [37]:
# Find the most popular and liked genres
genre_ratings = data.groupby('genres')['rating'].mean().sort_values(ascending=False)
print("Most popular and liked genres:\n", genre_ratings.head())

# Find the best and worst rated genres
best_genres = genre_ratings.head()
worst_genres = genre_ratings.tail()
print("Best rated genres:\n", best_genres)
print("Worst rated genres:\n", worst_genres)


Most popular and liked genres:
 genres
Adventure|Comedy|Crime|Drama|Fantasy|Mystery|Sci-Fi|Thriller    5.0
Animation|Crime|Mystery                                         5.0
Comedy|Drama|Musical|Romance|War                                5.0
Comedy|Crime|Western                                            5.0
Crime|Film-Noir|Horror|Romance|Thriller                         5.0
Name: rating, dtype: float64
Best rated genres:
 genres
Adventure|Comedy|Crime|Drama|Fantasy|Mystery|Sci-Fi|Thriller    5.0
Animation|Crime|Mystery                                         5.0
Comedy|Drama|Musical|Romance|War                                5.0
Comedy|Crime|Western                                            5.0
Crime|Film-Noir|Horror|Romance|Thriller                         5.0
Name: rating, dtype: float64
Worst rated genres:
 genres
Documentary|Romance|War                    0.5
Crime|Horror|Sci-Fi                        0.5
Adventure|Drama|Horror                     0.5
Action|Adventure|Horror|Sci