# Recommender Systems

#### First I imported the necessary libraries and the data. I also implemented a couple objects and some utils functions.

In [1120]:
# Import necessary libraries
import csv
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [1121]:
# Firstly let's create training and testing files
with open('ml-latest-small/ratings.csv', 'r') as input_file:
    # Create two output files for training and testing
    with open('ml-latest-small/ratings_train.csv', 'w', newline='') as training_file, open('ml-latest-small/ratings_test.csv', 'w', newline='') as testing_file:
        # Create CSV writers for the output files
        training_writer = csv.writer(training_file)
        testing_writer = csv.writer(testing_file)
        
        # Loop over the input file and split the rows into the training and testing files
        for i, row in enumerate(csv.reader(input_file)):
            if i % 2 == 0:
                # Even rows go to the training file
                training_writer.writerow(row)
            else:
                # Odd rows go to the testing file
                testing_writer.writerow(row)

In [1122]:
class User:
    def __init__(self, user_id, genres_count):
        super().__init__()
        self.id = user_id
        self.genre_ratings = np.zeros(genres_count, dtype=int)  # Ratings of genres by the user, normalized to (0,1)
        self.ratings: Dict[int, float] = {}  # Star-rating of given movies
        self.movies_rated = set()

class Movie:
    def __init__(self, movie_id, title, genres):
        super().__init__()
        self.id = movie_id
        self.title = title
        self.genres = genres
        self.genres_vector = None

    def set_genres(self, genres_str_int):
        self.genres_vector = np.zeros(len(genres_str_int))
        for genre in self.genres:
            genre_id = genres_str_int[genre]
            self.genres_vector[genre_id] = 1

        self.genres_vector = self.genres_vector.reshape(1, -1)


In [1123]:
# Input part


# import movies from the file. Output format is a tuple (dictionary of Movie objects, list of genres)
def read_movies(path):
    genres_set = set() # initially a set so I don't have to check for duplicates
    movies = {}
    inverted_movies = {}

    with open(path, encoding="utf-8") as f:
        #f.readline() # skip the header
        f.readline()
        reader = csv.reader(f, delimiter=',')
        for row in reader:
            # print("Row: {}".format(row))
            movie_id = int(row[0])
            title = row[1]
            genres = [genre.strip() for genre in row[2].split('|') if genre.strip() != '(no genres listed)']
            movie = Movie(movie_id, title, genres)
            movies[movie_id] = movie
            inverted_movies[title] = movie_id
            genres_set = genres_set.union(set(genres))

    return movies, sorted(list(genres_set)), inverted_movies
    
# import users ratings from the file. Output format is a dictionary of User objects (key is the id)
def read_ratings(path, genre_str_id, movies):
    users = {}
    with open(path, encoding="utf-8") as f:
        f.readline() # skip the header
        f.readline()
        reader = csv.reader(f, delimiter=',')
        for i, row in enumerate(reader):
            user_id = int(row[0])
            movie_id = int(row[1])
            rating = float(row[2])
            # create new user if necessary
            if user_id not in users:
                users[user_id] = User(user_id, len(genre_str_id))
            # add rating if user already present
            users[user_id].movies_rated.add(movie_id)
            users[user_id].ratings[movie_id] = rating
            # +1 to the genre field of the user profile if rating is >= 2.5
            for movie_genre in movies[movie_id].genres:
                if rating >= 2.5:
                    users[user_id].genre_ratings[genre_str_id[movie_genre]] += 1

    # Normalize scores
    for user in users.values():
        user.genre_ratings = user.genre_ratings / 5
    
    return users


In [1124]:
# Evaluation measures
def compute_recall(recommended_movies, rated_movies):
    count = 0
    for movie_id in recommended_movies:
        if movie_id in rated_movies:
            count += 1

    return count / len(rated_movies)


def compute_precision(recommended_movies, rated_movies):
    count = 0
    for movie_id in recommended_movies:
        if movie_id in rated_movies:
            count += 1

    return count / len(recommended_movies)


def compute_fmeasure(precision, recall):
    if precision == 0 and recall == 0:
        return 0

    return 2 * (precision * recall) / (precision + recall)

def evaluate_results(recommended_movies, testing_user):
    
    precision = compute_precision(recommended_movies, testing_user.movies_rated)
    recall = compute_recall(recommended_movies, testing_user.movies_rated)
    fmeasure = compute_fmeasure(precision, recall)

    print("Statistics about recommendation for user {}".format(testing_user.id))
    print("Precision = {:.3f}".format(precision))
    print("Recall = {:.3f}".format(recall))
    print("F-Measure = {:.3f}".format(fmeasure))
    
    return (precision, recall, fmeasure)


## Implementation of content based, collaborative filtering based and hybrid recommender

In [1125]:

# CONTENT BASED RECOMMENDER
def recommend_content_based(movies, users, user_id, N):
    similarities = {}

    # focus only on movies that has not been rated yet by the user
    non_rated_movies = [movie for movie in movies.values() if movie.id not in users[user_id].movies_rated]
    # compute similarity
    for movie in non_rated_movies:
        similarities[movie.id] = cosine_similarity(users[user_id].genre_ratings.reshape(1, -1), movie.genres_vector)[0][0]

    # Sort similarities
    sorted_similarities = sorted(similarities.items(), key=lambda item: item[1], reverse=True)
    
    return sorted_similarities[:N]
    
    
# COLLABORATIVE FILTERING BASED RECOMMENDER
def recommend_collaborative_based(users, user_id, N, Nu):
    this_user = users[user_id]

    # compute similarities with other users
    similarities = {}
    for user in users.values():
        if user_id != user.id:
            similarities[user.id] = cosine_similarity(this_user.genre_ratings.reshape(1, -1), user.genre_ratings.reshape(1, -1))[0][0]

    # Get top Nu most similar users
    sorted_similar_users = sorted(similarities.items(), key=lambda i: i[1], reverse=True)[:Nu]

    # Get movies to recommend (their rating is the average of the rating given by the similar users)
    temp_ratings = {}  # {movie_id: list of ratings}
    for user, user_similarity in [(users[u_id], sim) for u_id, sim in sorted_similar_users]:   
        for movie_id, movie_rating in user.ratings.items():  # Iterate through the Users' ratings
            # Focus only on movies that the user hasn't rated
            if movie_id not in this_user.movies_rated:    
                if movie_id not in temp_ratings:
                    temp_ratings[movie_id] = []
                # Add rating (weighted to the similarity)
                temp_ratings[movie_id].append(movie_rating * user_similarity)

    # Calculate average
    avg_ratings = {movie_id: sum(list_rating) / len(list_rating) for movie_id, list_rating in temp_ratings.items()}
    
    # Sort ratings
    sorted_ratings = sorted(avg_ratings.items(), key=lambda r: r[1], reverse=True)
    
    # Normalize ratings
    final_ratings = [(rating[0], (rating[1] / 5)) for rating in sorted_ratings]

    # Select first N (they are sorted)
    res = {}
    for idx, (k, v) in enumerate(final_ratings):
        if idx == N: break
        res[k] = v
    
    return res
    
    
# HYBRID BASED RECOMMENDER     
def recommend_hybrid_based(movies, users, user_id, content_weight, collab_weight, N, Nu):
    content_recommendations = recommend_content_based(movies, users, user_id, N)
    content_recommendations = {t[0]: t[1] for t in content_recommendations}
    collab_recommendations = recommend_collaborative_based(users, user_id, N, Nu)

    # Get all movie ids avoiding duplicates thanks to 'set'
    keys = list(set(list(content_recommendations.keys()) + list(collab_recommendations.keys())))

    final_recommendations = {}
    for key in keys:
        # if movie is recommended by both recommenders, compute weighted score
        if key in content_recommendations and key in collab_recommendations:
            final_recommendations[key] = content_recommendations[key] * content_weight + collab_recommendations[key] * collab_weight
         # else use the score from the only recommender that suggested it
        elif key in content_recommendations and key not in collab_recommendations:
            final_recommendations[key] = content_recommendations[key] * content_weight
        else:
            final_recommendations[key] = collab_recommendations[key] * collab_weight
    
    # Sort in descending order
    sorted_final = sorted(final_recommendations.items(), key=lambda item: item[1], reverse=True)
        
    return sorted_final[:N]
        

#### Let's perform the task with the first three recommenders

In [1126]:
# Parameters
N = 50      # number of results (number of recommendations)
Nu = 20      # number of users to be kept in consideration in the collaborative filtering based recommender

# Import files  
movies, genres_list, inverted_movies = read_movies('ml-latest-small/movies.csv')

# useful dictionaries
genre_str_id = {genre: i for i, genre in enumerate(genres_list)}
genre_id_str = {i: genre for i, genre in enumerate(genres_list)}

for movie in movies.values():
    movie.set_genres(genre_str_id)


users = read_ratings('ml-latest-small/ratings.csv', genre_str_id, movies)

#res = recommend_content_based(movies, users, 1, N)
#for r in res:
#    print(movies[r[0]].title)
    
#res = recommend_collaborative_based(users, 100, N, Nu)
#for r in res.keys():
#    print(movies[r].title)

#res = recommend_hybrid_based(movies, users, 1, 0.3, 0.7, N, Nu)
#for r in res:
   #print(movies[r[0]].title)



In [1127]:
users_train = read_ratings('ml-latest-small/ratings_train.csv', genre_str_id, movies)
users_test = read_ratings('ml-latest-small/ratings_test.csv', genre_str_id, movies)
id_test = 11
test_user = users_test[id_test]

In [1128]:
print("CONTENT BASED RECOMMENDER\n")
recommended_movies = recommend_content_based(movies, users_train, id_test, N)
recommended_movies = [item[0] for item in recommended_movies]
res = evaluate_results(recommended_movies, test_user)

CONTENT BASED RECOMMENDER

Statistics about recommendation for user 11
Precision = 0.020
Recall = 0.031
F-Measure = 0.024


In [1129]:
print("COLLABORATIVE FILTERING BASED RECOMMENDER\n")
recommended_movies = recommend_collaborative_based(users_train, id_test, N, Nu)
res = evaluate_results(list(recommended_movies.keys()), test_user)

COLLABORATIVE FILTERING BASED RECOMMENDER

Statistics about recommendation for user 11
Precision = 0.060
Recall = 0.094
F-Measure = 0.073


In [1142]:
print("HYBRID RECOMMENDER\n")
weights = [(0.1, 0.9), (0.3, 0.7), (0.7, 0.3), (0.9, 0.1)]
for (content_weight, collab_weight) in weights:
    print("-"*70)
    print("Content based weight: {}".format(content_weight))
    print("Collaborative filtering based weight: {}\n".format(collab_weight))
    recommended_movies = recommend_hybrid_based(movies, users_train, id_test, content_weight, collab_weight, N, Nu)
    recommended_movies = [item[0] for item in recommended_movies]
    res = evaluate_results(recommended_movies, test_user)
    print()

HYBRID RECOMMENDER

----------------------------------------------------------------------
Content based weight: 0.1
Collaborative filtering based weight: 0.9

Statistics about recommendation for user 11
Precision = 0.060
Recall = 0.094
F-Measure = 0.073

----------------------------------------------------------------------
Content based weight: 0.3
Collaborative filtering based weight: 0.7

Statistics about recommendation for user 11
Precision = 0.060
Recall = 0.094
F-Measure = 0.073

----------------------------------------------------------------------
Content based weight: 0.7
Collaborative filtering based weight: 0.3

Statistics about recommendation for user 11
Precision = 0.020
Recall = 0.031
F-Measure = 0.024

----------------------------------------------------------------------
Content based weight: 0.9
Collaborative filtering based weight: 0.1

Statistics about recommendation for user 11
Precision = 0.020
Recall = 0.031
F-Measure = 0.024



## Comments

#### The parameters I used are: number of movies to recommend = 50 and number of similar users to consider = 20. I ran the notebook for 5 randomly selected users. These parameters are also valid for the second part. The results for the content based recommender and the collaborative filtering based recommender can be seen in the 'results_base.csv' file. The results for the hybrid recommender can be seen in the 'results_hybrid.csv' file. Other than the results in the files I did more experiments and I've seen that the content based recommender worked better than the collaborative filtering one, even if they both had bad results overall. Or at least I was expecting better scores. Anyway one thing that I noticed is that content based and collaborative filtering recommended different movies and for this reason the hybrid recommender basically became a content based or a collaborative filtering one, depending on the weights. This is because of my implementation: if a movie is recommended by just one of the two recommenders its similarity gets scaled down by the corresponding weight factor, so basically if for example the weights are 0.1 and 0.9, movies recommended just by the model with a weight of 0.1 will never be selected.

## K-Nearest Neighbors based recommender

#### The reason why I chose this model instead of Tensorflow recommenders can be found in the 'issues' section.
#### Anyway, I first rearranged the data in order to have a table where each row is a user, each column a movie and the values are the ratings (zero if not rated yet)

In [1131]:
ratings_dataset = pd.read_csv('ml-latest-small/ratings_train.csv', sep=',', header=0)
del ratings_dataset['timestamp']
ratings_dataset.head()

Unnamed: 0,userId,movieId,rating
0,1,3,4.0
1,1,47,5.0
2,1,70,3.0
3,1,110,4.0
4,1,157,5.0


In [1132]:
column_names = ['movie id', 'title', 'genres']
movies_dataset = pd.read_csv('ml-latest-small/movies.csv', sep=',', header=0)
movies_dataset.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [1133]:
genres_dataset = items_dataset[['movieId', 'genres']]
genres_list = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
                    'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi','Thriller', 'War', 'Western', 
                    'IMAX', '(no genres listed)']
for genre in genres_list:
    genres_dataset[genre] = [0] * 9742 # (number of movies)

for row in genres_dataset.iterrows():
    index = row[0]
    content = row[1]
    film_genres = content['genres'].split('|')
    for g in film_genres:
        genres_dataset.iat[index, genres_dataset.columns.get_loc(g)] = 1

del genres_dataset['genres']
genres_dataset.head()

Unnamed: 0,movieId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,...,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,IMAX,(no genres listed)
0,1,0,1,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,2,0,1,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,3,0,0,0,0,1,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,4,0,0,0,0,1,0,0,1,0,...,0,0,0,1,0,0,0,0,0,0
4,5,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [1134]:
processed_movies = pd.merge(
    movies_dataset,
    genres_dataset,
    how="inner",
    on='movieId',
)
del processed_movies['genres']
del processed_movies['(no genres listed)']
processed_movies.head()

Unnamed: 0,movieId,title,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,...,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,IMAX
0,1,Toy Story (1995),0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),0,0,0,0,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,4,Waiting to Exhale (1995),0,0,0,0,1,0,0,1,...,0,0,0,0,1,0,0,0,0,0
4,5,Father of the Bride Part II (1995),0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [1135]:
merged_dataset = pd.merge(ratings_dataset, processed_movies, how='inner', on='movieId')
merged_dataset.head()

Unnamed: 0,userId,movieId,rating,title,Action,Adventure,Animation,Children,Comedy,Crime,...,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,IMAX
0,1,3,4.0,Grumpier Old Men (1995),0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
1,6,3,5.0,Grumpier Old Men (1995),0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
2,42,3,4.0,Grumpier Old Men (1995),0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
3,51,3,4.0,Grumpier Old Men (1995),0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
4,58,3,3.0,Grumpier Old Men (1995),0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0


In [1136]:
refined_dataset = merged_dataset.groupby(by=['userId','title'], as_index=False).agg({"rating":"mean"})
user_to_movie_df = refined_dataset.pivot(
    index='userId',
    columns='title',
    values='rating').fillna(0)

user_to_movie_df.sample(n=10)

title,'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 (1979),...,Zootopia (2016),Zulu (1964),Zulu (2013),[REC] (2007),[REC]³ 3 Génesis (2012),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
240,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
84,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
207,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
232,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0
139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
424,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
110,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Now let's perform the actual task

In [1137]:
# transform matrix to scipy sparse matrix
user_to_movie_sparse_df = csr_matrix(user_to_movie_df.values)

# Define and fit the model
knn_model = NearestNeighbors(metric='cosine', algorithm='brute')
knn_model.fit(user_to_movie_sparse_df)

In [1138]:
def get_similar_users(user, n):
  knn_input = np.asarray([user_to_movie_df.values[user-1]]) 
  distances, indices = knn_model.kneighbors(knn_input, n_neighbors=n+1)
  
  print("Top", n, "most simlar users to the user", user," are: \n")
  for i in range(1,len(distances[0])):
    print("{}. User {} --> {:.3f}".format(i, indices[0][i]+1, distances[0][i]))
  
  return indices.flatten()[1:] + 1, distances.flatten()[1:]


def filtered_movie_recommendations(n, mean_rating_list, refined_dataset, user_id):
  
  # Find the index of the last occurrence of 0 in mean_rating_list
  zero_index = np.where(mean_rating_list == 0)[0][-1]

  # Sort the mean_rating_list in descending order
  sorted_indices = np.argsort(mean_rating_list)[::-1]

  # Select only the indices before zero_index
  filtered_indices = sorted_indices[:list(sorted_indices).index(zero_index)]

  # Limit the number of indices to n
  n_filtered_indices = min(len(filtered_indices), n)

  # Get a list of movies watched by user_id
  movies_watched = list(refined_dataset[refined_dataset['userId'] == user_id]['title'])

  # Create a filtered list of movies based on the filtered_indices
  filtered_movie_list = list(movies_list[filtered_indices[:n_filtered_indices]])

  count = 0
  final_movie_list = []
  for m in filtered_movie_list:
    if m not in movies_watched:
      count += 1
      final_movie_list.append(m)
    if count == n:
      break
  
  if count == 0:
    print("No movies which are not seen by the input user and seen by similar users. Please increase the number of similar users.")
  elif count < n:
    print("There aren't {} movies which are not seen by the input user and seen by similar users.".format(n))
    print("Anyway, I found {} to recommend. If you want {}, please increase the number of similar users to consider.\nMovies:".format(count, n))
    for i, movie in zip(range(1,len(final_movie_list)+1), final_movie_list):
      print(i,". ",movie)
  else:
    for i, movie in zip(range(1,len(final_movie_list)+1), final_movie_list):
      print(i,". ",movie) 
  
  return final_movie_list



#### At first we find the most similar users (and their similarity)

In [1139]:
# Parameters
#id_test = 1
num_users = 20
num_movies = 50
testing_user = users_test[id_test]


similar_users_list, similarities_list = get_similar_users(user_id, num_users)

Top 20 most simlar users to the user 1  are: 

1. User 597 --> 0.800
2. User 202 --> 0.803
3. User 217 --> 0.805
4. User 368 --> 0.808
5. User 288 --> 0.810
6. User 239 --> 0.811
7. User 226 --> 0.817
8. User 590 --> 0.818
9. User 469 --> 0.821
10. User 45 --> 0.824
11. User 304 --> 0.825
12. User 524 --> 0.830
13. User 607 --> 0.831
14. User 91 --> 0.831
15. User 115 --> 0.833
16. User 57 --> 0.833
17. User 376 --> 0.837
18. User 64 --> 0.841
19. User 385 --> 0.841
20. User 313 --> 0.842


#### Then we get the recommended movies. The "role" of a similar user in the recommendation task is weighted to his similarity with the main user.

In [1140]:
weight_list = similarities_list / np.sum(similarities_list)

# Movie ratings from similar users
movie_ratings = user_to_movie_df.values[similar_users_list]
movies_list = user_to_movie_df.columns

# Broadcasting weight matrix to rating matrix so it is compatible for matrix operations
weight_list = weight_list[:,np.newaxis] + np.zeros(len(movies_list))
weighted_rating_matrix = weight_list * movie_ratings
mean_rating_list = weighted_rating_matrix.sum(axis = 0)

recommended_movies = filtered_movie_recommendations(num_movies, mean_rating_list, refined_dataset, user_id)

There aren't 50 movies which are not seen by the input user and seen by similar users.
Anyway, I found 42 to recommend. If you want 50, please increase the number of similar users to consider.
Movies:
1 .  Shawshank Redemption, The (1994)
2 .  Schindler's List (1993)
3 .  Forrest Gump (1994)
4 .  Terminator 2: Judgment Day (1991)
5 .  Inception (2010)
6 .  Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
7 .  Speed (1994)
8 .  Matrix, The (1999)
9 .  True Lies (1994)
10 .  Juno (2007)
11 .  Léon: The Professional (a.k.a. The Professional) (Léon) (1994)
12 .  American Beauty (1999)
13 .  Jurassic Park (1993)
14 .  Lord of the Rings: The Fellowship of the Ring, The (2001)
15 .  Star Wars: Episode V - The Empire Strikes Back (1980)
16 .  Usual Suspects, The (1995)
17 .  Aladdin (1992)
18 .  Pulp Fiction (1994)
19 .  Quiz Show (1994)
20 .  Bourne Identity, The (2002)
21 .  Babe (1995)
22 .  Waterworld (1995)
23 .  Silence of the Lambs, The (1991)
24 .  Fight Club

In [1141]:
recommended_movies = [inverted_movies[title] for title in recommended_movies]
res = evaluate_results(recommended_movies, testing_user)

Statistics about recommendation for user 11
Precision = 0.119
Recall = 0.156
F-Measure = 0.135


## Comments

#### The results can be seen in the 'results_knn.csv' file. Using this model I got better results than the previous ones, but I was still expecting better scores. 

## Issues and posible extensions

#### For the first part no major issues. In the second part I haven't been able to use Tensorflow recommenders because every time I tried to import it the kernel crashed. I had a similar issue in a previous homework but this time I didn't managed to solve it. For this reason I didn't follow the suggestion in the task description and I implemented a recommender based on sklearn k-nearest neighbors. A possible extension could be modifying the second part in order for the knn model to be able to compute the distance between users focusing not only on the rating they gave to movies but also the genre infromation, that isn't used in my implementation.