### Import required Libraries

In [66]:
import os
from google.colab import drive

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

### Dataset

In [67]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [91]:
dataset_path = "/content/drive/My Drive/Machine Learning Datasets/ml-latest-small"

In [114]:
# URM = pd.read_csv(dataset_path + '/UserRatingMatrix.csv', index_col = 'userId')
# ICM = pd.read_csv(dataset_path + '/ItemContentMatrix.csv', index_col = 'movieId')
ratings = pd.read_csv(dataset_path + '/ratings.csv')
movies = pd.read_csv(dataset_path + '/movies.csv', index_col = 'movieId')
links = pd.read_csv(dataset_path + '/links.csv', index_col = 'movieId')

In [115]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100836 non-null  int64  
 1   movieId    100836 non-null  int64  
 2   rating     100836 non-null  float64
 3   timestamp  100836 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.1 MB


In [116]:
len(ratings.userId.unique())

610

In [117]:
links.head()

Unnamed: 0_level_0,imdbId,tmdbId
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,114709,862.0
2,113497,8844.0
3,113228,15602.0
4,114885,31357.0
5,113041,11862.0


In [118]:
links.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9742 entries, 1 to 193609
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   imdbId  9742 non-null   int64  
 1   tmdbId  9734 non-null   float64
dtypes: float64(1), int64(1)
memory usage: 228.3 KB


# <center>Non Personalized Recommenders</center>
<p align = "justify">Non Personalized Recommenders are useful when a new user starts using a platform, like when a user just starts using the platform supported by our Recommendation System. He/she can be suggested with items at random. But, can we do better?
</p>
Yes, we can!!! Here are a few ways for the same.

    -> A simple way is to recommend the most popular items, whatever be the ratings. The user is recommended the items which were rated most times.
    -> A better approach is to recommend the best rated items. Average Ratings for all movies are calculated, using the non-zero ratings only.
    -> A support term is used to give weightage to the movies rated by more users.


In [119]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [120]:
def best_rated_with_support(ratings_df, support_factor):
    ratings_by_movies = ratings_df.groupby('movieId')
    avg_ratings_by_movies = (ratings_by_movies.rating.sum() / (ratings_by_movies.rating.count() + support_factor)).sort_values(ascending = False)
    return avg_ratings_by_movies

In [121]:
best_rated_movies = best_rated_with_support(ratings, 10)

In [122]:
best_rated_movies.head()

movieId
318     4.293578
2959    4.085526
858     4.076733
260     4.068966
296     4.064669
Name: rating, dtype: float64

In [124]:
len(best_rated_movies), len(links), len(movies)

(9724, 9742, 9742)

In [125]:
best_rated_movies = pd.concat([best_rated_movies, links, movies], axis = 1).sort_values('rating', ascending = False).dropna()

In [126]:
best_rated_movies.head()

Unnamed: 0_level_0,rating,imdbId,tmdbId,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
318,4.293578,111161,278.0,"Shawshank Redemption, The (1994)",Crime|Drama
2959,4.085526,137523,550.0,Fight Club (1999),Action|Crime|Drama|Thriller
858,4.076733,68646,238.0,"Godfather, The (1972)",Crime|Drama
260,4.068966,76759,11.0,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi
296,4.064669,110912,680.0,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller


### Conclusion
    -> If a new user comes to use the platform our recommendation supports, we can recommend top 20 movies based on the average ratings.

In [127]:
best_rated_movies.to_csv(dataset_path + '/BestRatedMovies.csv')