# Movie Recommender System 

This notebook contains several approaches to creating recommender systems for movie recommendation. Due to lack of compute power, we were highly strained in terms of use of SOTA techniques, such as DL NCF. The following approaches contains one final approach and several for potential future use.

## Pearson correlation - user-to-user collaborative filtering (USED)

In [2]:
import pandas as pd
import numpy as np
 
data_ratings = pd.read_csv("../data/demo_datasets/ratings.csv")
data_movies = pd.read_csv("../data/demo_datasets/movies.csv")

In [75]:
new_user = data_ratings.userId.unique().shape[0] # id of the new user

In [76]:
picked_userid = 54 # some user id

In [3]:
# Create user-item matrix
matrix = data_ratings.pivot_table(index='userId', columns='movieId', values='rating')
matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


In [78]:
# Normalize user-item matrix
matrix_norm = matrix.subtract(matrix.mean(axis=1), axis = 'rows')
matrix_norm.tail()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
606,-1.157399,,,,,,-1.157399,,,,...,,,,,,,,,,
607,0.213904,,,,,,,,,,...,,,,,,,,,,
608,-0.634176,-1.134176,-1.134176,,,,,,,0.865824,...,,,,,,,,,,
609,-0.27027,,,,,,,,,0.72973,...,,,,,,,,,,
610,1.311444,,,,,1.311444,,,,,...,,,,,,,,,,


In [79]:
# calcualte user similarity matrix using Pearson correlation
user_similarity = matrix_norm.T.corr()
user_similarity.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,,0.079819,0.207983,0.268749,-0.291636,-0.118773,0.469668,0.918559,-0.037987,...,0.09157371,-5.222581e-18,-0.061503,-0.407556,-0.164871,0.066378,0.174557,0.26807,-0.175412,-0.032086
2,,1.0,,,,,-0.991241,,,0.037796,...,-0.3873468,,-1.0,,,0.583333,,-0.125,,0.623288
3,0.079819,,1.0,,,,,,,,...,,,0.4332,,,-0.791334,-0.333333,-0.395092,,0.569562
4,0.207983,,,1.0,-0.336525,0.148498,0.542861,0.117851,,0.485794,...,-0.2221127,0.3966413,0.09009,-0.080296,0.400124,0.144603,0.116518,-0.170501,-0.27735,-0.043786
5,0.268749,,,-0.336525,1.0,0.043166,0.158114,0.028347,,-0.777714,...,-4.5324670000000006e-17,0.1533034,0.234743,0.067791,-0.364156,0.244321,0.23108,-0.020546,0.384111,0.040582


In [80]:
# Remove picked user ID from the candidate list
user_similarity.drop(index=picked_userid, inplace=True)

In [81]:
# Number of similar users
n = 10 
# User similarity threashold
user_similarity_threshold = 0.3
# Get top n similar users
similar_users = user_similarity[user_similarity[picked_userid]>user_similarity_threshold][picked_userid].sort_values(ascending=False)[:n]
# Print out top n similar users
print(f'The similar users for user {picked_userid} are', similar_users)

The similar users for user 54 are userId
475    1.0
272    1.0
439    1.0
60     1.0
549    1.0
558    1.0
422    1.0
515    1.0
97     1.0
20     1.0
Name: 54, dtype: float64


  similar_users = user_similarity[user_similarity[picked_userid]>user_similarity_threshold][picked_userid].sort_values(ascending=False)[:n]


In [82]:
# Remove movies that have been watched
picked_userid_watched = matrix_norm[matrix_norm.index == picked_userid].dropna(axis=1, how='all')

# Movies that similar users watched. Remove movies that none of the similar users have watched
similar_user_movies = matrix_norm[matrix_norm.index.isin(similar_users.index)].dropna(axis=1, how='all')

# Remove the watched movie from the movie list
similar_user_movies.drop(picked_userid_watched.columns,axis=1, inplace=True, errors='ignore')

similar_user_movies

movieId,2,8,13,19,34,48,60,94,107,158,...,158238,158813,166528,168250,168252,174055,175569,176371,177765,179819
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20,-0.590909,-2.590909,0.409091,,0.409091,1.409091,,,-0.090909,-0.590909,...,,,,,,,,,,
60,,,,,,-0.727273,-0.727273,,,,...,,,,,,,,,,
97,,,,,,,,,,,...,,,,,,,,,,
272,,,,,,,,,,,...,0.370968,,,,,,,,1.370968,
422,,,,,,,,,,,...,,,,,,,,,,
439,,,,,,,,,,,...,,,,,,,,,,
475,0.090323,,,-0.409677,,,,,,,...,,,-0.409677,,-0.909677,,,,,
515,,,,,,,,,,,...,,,,-0.346154,0.153846,-0.846154,-0.846154,0.153846,,-0.846154
549,,,,,,,,,,,...,,-2.666667,,,,,,,,
558,,,,,,,,-0.928571,,,...,,,,,,,,,,


In [83]:
# A dictionary to store item scores
item_score = {}# Loop through items

for i in similar_user_movies.columns:
  # Get the ratings for movie i
  movie_rating = similar_user_movies[i]
  
  # Create a variable to store the score
  total = 0
  
  # Create a variable to store the number of scores
  count = 0
  
  # Loop through similar users
  for u in similar_users.index:
    # If the movie has rating
    if pd.isna(movie_rating[u]) == False:
      # Score is the sum of user similarity score multiply by the movie rating
      score = similar_users[u] * movie_rating[u]
      
      # Add the score to the total score for the movie so far
      total += score
      
      # Add 1 to the count
      count +=1
  
  # Get the average score for the item
  item_score[i] = total / count

# Convert dictionary to pandas dataframe
item_score = pd.DataFrame(item_score.items(), columns=['movie', 'movie_score'])
    
# Sort the movies by score
ranked_item_score = item_score.sort_values(by='movie_score', ascending=False)

# Select top m movies
m = 10
ranked_item_score.head(m)

Unnamed: 0,movie,movie_score
87,1272,1.677419
78,1213,1.677419
32,750,1.677419
185,2728,1.677419
49,922,1.677419
97,1387,1.677419
60,1025,1.409091
406,5991,1.409091
31,720,1.409091
158,2300,1.409091


In [84]:
###### Step 9: Predict Scores (Optional)# Average rating for the picked user
avg_rating = matrix[matrix.index == picked_userid].T.mean()[picked_userid]# Print the average movie rating for user 1
print(f'The average movie rating for user {picked_userid} is {avg_rating:.2f}')# Calcuate the predicted rating
ranked_item_score['predicted_rating'] = ranked_item_score['movie_score'] + avg_rating# Take a look at the data
ranked_item_score.head(m)

The average movie rating for user 54 is 3.03


Unnamed: 0,movie,movie_score,predicted_rating
87,1272,1.677419,4.707722
78,1213,1.677419,4.707722
32,750,1.677419,4.707722
185,2728,1.677419,4.707722
49,922,1.677419,4.707722
97,1387,1.677419,4.707722
60,1025,1.409091,4.439394
406,5991,1.409091,4.439394
31,720,1.409091,4.439394
158,2300,1.409091,4.439394


## OTHER APPROACHES (R&D - not actually used)

## Deep learning Neural Collaborative Filtering

In [57]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl

  from .autonotebook import tqdm as notebook_tqdm


In [58]:
class MovieLensTrainDataset(Dataset):
    """MovieLens PyTorch Dataset for Training
    
    Args:
        ratings (pd.DataFrame): Dataframe containing the movie ratings
        all_movies (list): List containing all movieIds
    
    """

    def __init__(self, ratings, all_movies):
        self.users, self.items, self.labels = self.get_dataset(ratings, all_movies)

    def __len__(self):
        return len(self.users)
  
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.labels[idx]

    def get_dataset(self, ratings, all_movies):
        users, items, labels = [], [], []
        user_item_set = set(zip(ratings['userId'], ratings['movieId']))

        num_negatives = 4
        for u, i in user_item_set:
            users.append(u)
            items.append(i)
            labels.append(1)
            for _ in range(num_negatives):
                negative_item = np.random.choice(all_movies)
                while (u, negative_item) in user_item_set:
                    negative_item = np.random.choice(all_movies)
                users.append(u)
                items.append(negative_item)
                labels.append(0)

        return torch.tensor(users), torch.tensor(items), torch.tensor(labels)

class NCFMovieRecommender(pl.LightningModule):
    """ Neural Collaborative Filtering (NCF)
    
        Args:
            num_users (int): Number of unique users
            num_items (int): Number of unique items
            ratings (pd.DataFrame): Dataframe containing the movie ratings for training
            all_movies (list): List containing all movieIds (train + test)
    """
    
    def __init__(self, num_users, num_items, ratings, all_movies):
        super().__init__()
        self.user_embedding = nn.Embedding(num_embeddings=num_users, embedding_dim=8)
        self.item_embedding = nn.Embedding(num_embeddings=num_items, embedding_dim=8)
        self.fc1 = nn.Linear(in_features=16, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=32)
        self.output = nn.Linear(in_features=32, out_features=1)
        self.ratings = ratings
        self.all_movies = all_movies
        self.save_hyperparameters(logger=False)
        
    def forward(self, user_input, item_input):
        
        # Pass through embedding layers
        user_embedded = self.user_embedding(user_input)
        item_embedded = self.item_embedding(item_input)

        # Concat the two embedding layers
        vector = torch.cat([user_embedded, item_embedded], dim=-1)

        # Pass through dense layer
        vector = nn.ReLU()(self.fc1(vector))
        vector = nn.ReLU()(self.fc2(vector))

        # Output layer
        pred = nn.Sigmoid()(self.output(vector))

        return pred
    
    def training_step(self, batch, batch_idx):
        user_input, item_input, labels = batch
        predicted_labels = self(user_input, item_input)
        loss = nn.BCELoss()(predicted_labels, labels.view(-1, 1).float())
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters())

    def train_dataloader(self):
        return DataLoader(MovieLensTrainDataset(self.ratings, self.all_movies),
                          batch_size=512, num_workers=2)

In [59]:
# data
ratings = pd.read_csv('./data/small/ratings.csv')
movies = pd.read_csv('./data/small/movies.csv')

In [60]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [61]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [62]:
## sampling the data
# rand_userIds = np.random.choice(ratings['userId'].unique(), 
#                                 size=int(len(ratings['userId'].unique())*0.1), 
#                                 replace=False)

# ratings = ratings.loc[ratings['userId'].isin(rand_userIds)]

# print('There are {} rows of data from {} users'.format(len(ratings), len(rand_userIds)))

In [63]:
#How many rating did the users give
ratings[['userId', 'movieId']].groupby('userId').count().sort_values(by='movieId')

Unnamed: 0_level_0,movieId
userId,Unnamed: 1_level_1
442,20
406,20
147,20
194,20
569,20
...,...
274,1346
448,1864
474,2108
599,2478


The minimum number of reviews given by a user is 20. In order to split dataset into test/train, we will take the latest 20% of reviews of each individual user as a test set, and the rest as a training set.

In [64]:
# conerting to datetime type
ratings['timestamp'] = pd.to_datetime(ratings['timestamp'], utc=True)
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,1970-01-01 00:00:00.964982703+00:00
1,1,3,4.0,1970-01-01 00:00:00.964981247+00:00
2,1,6,4.0,1970-01-01 00:00:00.964982224+00:00
3,1,47,5.0,1970-01-01 00:00:00.964983815+00:00
4,1,50,5.0,1970-01-01 00:00:00.964982931+00:00
...,...,...,...,...
100831,610,166534,4.0,1970-01-01 00:00:01.493848402+00:00
100832,610,168248,5.0,1970-01-01 00:00:01.493850091+00:00
100833,610,168250,5.0,1970-01-01 00:00:01.494273047+00:00
100834,610,168252,5.0,1970-01-01 00:00:01.493846352+00:00


In [65]:
ratings['rank'] = ratings.groupby(['userId'])['timestamp'] \
                                .rank(method='first', ascending=False, pct=True)

In [66]:
train_ratings = ratings[ratings['rank'] < 0.8]
test_ratings = ratings[ratings['rank'] >= 0.8]


Now we no longer need timestamps and ranks, so we will drop them

In [None]:
train_ratings.drop(['timestamp', 'rank'], axis=1, inplace=True)
test_ratings.drop(['timestamp', 'rank'], axis=1, inplace=True)

### Converting dataset into implicit feedback 
Our dataset contains information about viewed films by users. However, we would also like to know, which movies the user did not watch. It is a good indicator of movies, that are not interesting to the user. So, we have to create records that indicate movies not watched by every individual.

In [68]:
## setting every record as poistive

train_ratings.loc[:, 'rating'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_ratings.loc[:, 'rating'] = 1
  train_ratings.loc[:, 'rating'] = 1


In [69]:
from tqdm import tqdm
# generating negative observations (i.e. not watched)
# Get a list of all movie IDs
all_movieIds = ratings['movieId'].unique()

# Placeholders that will hold the training data
users, items, labels = [], [], []

# This is the set of items that each user has interaction with
user_item_set = set(zip(train_ratings['userId'], train_ratings['movieId']))

# 4:1 ratio of negative to positive samples
num_negatives = 4

for (u, i) in tqdm(user_item_set):
    users.append(u)
    items.append(i)
    labels.append(1) # items that the user has interacted with are positive
    for _ in range(num_negatives):
        # randomly select an item
        negative_item = np.random.choice(all_movieIds) 
        # check that the user has not interacted with this item
        while (u, negative_item) in user_item_set:
            negative_item = np.random.choice(all_movieIds)
        users.append(u)
        items.append(negative_item)
        labels.append(0) # items not interacted with are negative

100%|██████████| 80286/80286 [00:03<00:00, 24692.97it/s]


## Moving to PyTorch

## Model - Neural Collaborative Filtering (NCF)

In [71]:
num_users = ratings['userId'].max()+1
num_items = ratings['movieId'].max()+1

all_movies = ratings['movieId'].unique()

model = NCFMovieRecommender(num_users, num_items, train_ratings, all_movies)

### Model training

In [72]:
trainer = pl.Trainer(max_epochs=5)
trainer.fit(model)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /home/wiktor/Desktop/personal/MovieRecommender/model/lightning_logs

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 4.9 K 
1 | item_embedding | Embedding | 1.5 M 
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.228     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 785/785 [00:09<00:00, 85.05it/s, loss=0.335, v_num=0]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 785/785 [00:09<00:00, 84.74it/s, loss=0.335, v_num=0]


##  Model evaluation

In [73]:
# sample User-item pairs for testing
test_user_item_set = set(zip(test_ratings['userId'][:1000], test_ratings['movieId'][:1000]))

# Dict of all items that are interacted with by each user
user_interacted_items = ratings.groupby('userId')['movieId'].apply(list).to_dict()

hits = []
for (u,i) in tqdm(test_user_item_set):
    interacted_items = user_interacted_items[u]
    not_interacted_items = set(all_movieIds) - set(interacted_items)
    selected_not_interacted = list(np.random.choice(list(not_interacted_items), 99))
    test_items = selected_not_interacted + [i]
    
    predicted_labels = np.squeeze(model(torch.tensor([u]*100), 
                                        torch.tensor(test_items)).detach().numpy())
    
    top10_items = [test_items[i] for i in np.argsort(predicted_labels)[::-1][0:10].tolist()]
    
    if i in top10_items:
        hits.append(1)
    else:
        hits.append(0)
        
print("The Hit Ratio @ 10 is {:.2f}".format(np.average(hits)))

  test_user_item_set = set(zip(test_ratings['userId'][:1000], test_ratings['movieId'][:1000]))
100%|██████████| 1000/1000 [00:01<00:00, 520.75it/s]

The Hit Ratio @ 10 is 0.75





## SVD Matrix Factorization

In [28]:
import pandas as pd
import numpy as np
 
ratings_data = pd.read_csv("./data/small/ratings.csv")
movies_data = pd.read_csv("./data/small/movies.csv")

In [29]:
from surprise import Dataset
from surprise import Reader
 
# Get minimum and maximum rating from the dataset
min_rating = ratings_data.rating.min()
max_rating = ratings_data.rating.max()
 
reader = Reader(rating_scale=(min_rating, max_rating))
data = Dataset.load_from_df(ratings_data[['userId', 'movieId', 'rating']], reader)

In [30]:
from surprise import SVD
from surprise.model_selection import cross_validate
 
svd = SVD(n_epochs=10)
results = cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=10, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8787  0.8774  0.8767  0.8835  0.8710  0.8830  0.8857  0.8729  0.8730  0.8808  0.8783  0.0048  
MAE (testset)     0.6757  0.6773  0.6768  0.6817  0.6710  0.6781  0.6836  0.6717  0.6719  0.6780  0.6766  0.0040  
Fit time          0.45    0.51    0.47    0.46    0.49    0.47    0.47    0.49    0.46    0.50    0.48    0.02    
Test time         0.04    0.04    0.03    0.04    0.04    0.04    0.04    0.03    0.13    0.04    0.05    0.03    


In [31]:
def generate_recommendation(model, user_id, ratings_df, movies_df, n_items):
   # Get a list of all movie IDs from dataset
   movie_ids = ratings_df["movieId"].unique()
 
   # Get a list of all movie IDs that have been watched by user
   movie_ids_user = ratings_df.loc[ratings_df["userId"] == user_id, "movieId"]
    # Get a list off all movie IDS that that have not been watched by user
   movie_ids_to_pred = np.setdiff1d(movie_ids, movie_ids_user)
 
   # Apply a rating of 4 to all interactions (only to match the Surprise dataset format)
   test_set = [[user_id, movie_id, 4] for movie_id in movie_ids_to_pred]
 
   # Predict the ratings and generate recommendations
   predictions = model.test(test_set)
   pred_ratings = np.array([pred.est for pred in predictions])
   print("Top {0} item recommendations for user {1}:".format(n_items, user_id))
   # Rank top-n movies based on the predicted ratings
   index_max = (-pred_ratings).argsort()[:n_items]
   for i in index_max:
       movie_id = movie_ids_to_pred[i]
       print(movies_df[movies_df["movieId"]==movie_id]["title"].values[0], pred_ratings[i])

In [32]:
# define which user ID that we want to give recommendation
userID = 23
# define how many top-n movies that we want to recommend
n_items = 10
# generate recommendation using the model that we have trained
generate_recommendation(svd,userID,ratings_data,movies_data,n_items)

Top 10 item recommendations for user 23:
Shawshank Redemption, The (1994) 4.221935956657099
Fight Club (1999) 4.212665296981007
Departed, The (2006) 4.165097129232535
Eternal Sunshine of the Spotless Mind (2004) 4.123274938548723
Princess Bride, The (1987) 4.093765397393623
Good Will Hunting (1997) 4.086011624699826
Lord of the Rings: The Fellowship of the Ring, The (2001) 4.0670887199393535
Schindler's List (1993) 4.041386721561442
Dark Knight Rises, The (2012) 4.0401118088851184
Life Is Beautiful (La Vita è bella) (1997) 4.0364877105243515
