# Collaborative filtering

### With test dataset

In [1]:
from IPython.core.display import HTML
from movie_display import movie_display
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
movies = pd.read_csv('./dataset/test_dataset.csv')

In [3]:
movies.head()

Unnamed: 0,userId,movieId,rating
0,1,1,1
1,1,3,2
2,1,6,1
3,2,3,4
4,2,4,2


In [4]:
user_item_matrix = movies.pivot_table(index='userId', columns='movieId', values='rating')

user_item_matrix.head(20)

movieId,1,2,3,4,5,6
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1.0,,2.0,,,1.0
2,,,4.0,2.0,,
3,3.0,5.0,,4.0,4.0,3.0
4,,4.0,1.0,,3.0,
5,,,2.0,5.0,4.0,4.0
6,5.0,,,,2.0,
7,,4.0,3.0,,,
8,,,,4.0,,2.0
9,5.0,,4.0,,,
10,,2.0,3.0,,,


# Similarity Metrics

In [5]:
def calculate_similarity(df, similarity='none'):
    """
    Prepare the datafram for the cosine similarity
    Parameters
    ----------
    df : Pandas Dataframe

    similarity : string
    Returns
    -------
     dataframe : Pandas dataframe
    """
    similarity_matrix = None
    if similarity == 'none':
        similarity_matrix = df.fillna(0)
    if similarity == 'pearson':
        # centered zero matrix
        similarity_matrix = df.subtract(df.mean(axis=1), axis=0).fillna(0)   
    if similarity == 'adjusted':
        # Adjusted cosine similarity
        similarity_matrix = (df - df.mean()).fillna(0)
    
    similarity = cosine_similarity(similarity_matrix)
    # fill on diagonal with 0 
    # https://github.com/sharmin2697/Movie-Recommender-System/blob/main/code/Functions.ipynb
    np.fill_diagonal(similarity, 0)
    # set the columns and index of the initial dataframe otherwise it would messup the indexes later
    return pd.DataFrame(similarity,index=df.index, columns=df.index)
    # return pd.DataFrame(similarity, index = similarity_matrix.index, columns = similarity_matrix.index)

In [6]:
# cosine similarity
cosine_similarity_matrix = calculate_similarity(user_item_matrix)
# calc pearson similarity
pearson_similarity_matrix = calculate_similarity(user_item_matrix, 'pearson')
# calculate adjusted cosine similarity
adjusted_cosine_similarity_matrix = calculate_similarity(user_item_matrix, 'adjusted')

In [7]:
cosine_similarity_matrix

userId,1,2,3,4,5,6,7,8,9,10,11,12
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,0.0,0.730297,0.282843,0.160128,0.418167,0.379049,0.489898,0.182574,0.82885,0.679366,0.904534,0.0
2,0.730297,0.0,0.206559,0.175412,0.515339,0.0,0.536656,0.4,0.558744,0.744208,0.660578,0.383482
3,0.282843,0.206559,0.0,0.724657,0.709652,0.493172,0.46188,0.568038,0.270501,0.320256,0.639602,0.693103
4,0.160128,0.175412,0.724657,0.0,0.351541,0.218507,0.745241,0.0,0.122513,0.598321,0.362103,0.403604
5,0.418167,0.515339,0.709652,0.351541,0.0,0.190207,0.153644,0.801638,0.159968,0.213066,0.693451,0.548954
6,0.379049,0.0,0.493172,0.218507,0.190207,0.0,0.0,0.0,0.725018,0.0,0.54858,0.0
7,0.489898,0.536656,0.46188,0.745241,0.153644,0.0,0.0,0.0,0.374817,0.94299,0.467748,0.411597
8,0.182574,0.4,0.568038,0.0,0.801638,0.0,0.0,0.0,0.0,0.0,0.440386,0.766965
9,0.82885,0.558744,0.270501,0.122513,0.159968,0.725018,0.374817,0.0,0.0,0.519778,0.768946,0.0
10,0.679366,0.744208,0.320256,0.598321,0.213066,0.0,0.94299,0.0,0.519778,0.0,0.58037,0.285391


# User-User recommendations

In [8]:
def get_similar_users(user_id, similarity_strategy):
    """"Create a dict with the most similar users and ranks them based on similarity """
    # get the similarity matrix based similarity strategy
    similarity_matrix = pd.DataFrame(cosine_similarity(calculate_similarity(user_item_matrix, similarity_strategy)))
    # drop the user itself
    similar_users = similarity_matrix.loc[user_id]
    # sorting the value descending in order to get the most similar users first
    return similar_users.sort_values(ascending=False).head(3)

In [9]:
get_similar_users(0, 'none')

0     1.000000
1     0.740645
10    0.716579
Name: 0, dtype: float64

In [10]:
def get_recommendation_list(user_id, number_of_recommendations, user_item_matrix, similarity_metric):
    # get the similar users with the selected similarity strategy as pandas dataframe
    similar_users = get_similar_users(user_id, similarity_metric)

    print(similar_users)
    # get pandas dataframe with each users and their movies
    rated_movies_by_user = user_item_matrix.iloc[user_id].dropna()
    
    initial_recommendation = pd.Series()
    # print(len(similar_users))
    for user_id, similarity in similar_users.items():
        # print(user_id, similarity)
        similar_user = user_item_matrix.loc[user_id].dropna()
         
        # for movie_id, rating in similar_user.items():
        #     if movie_id not in rated_movies_by_user:
        #         # print('checking', user_id, similarity, movie_id, rating)
        #         initial_recommendation= pd.concat([initial_recommendation, pd.Series([rating * similarity], index=[movie_id])])
    # sort values descending
    recommendations = initial_recommendation.sort_values(ascending=False).head(number_of_recommendations)
    # return the movies id only
    print(initial_recommendation.head())
    movies = []
    for movie_id, _ in recommendations.items():
        movies.append(movie_id)
    
    return movies

In [11]:
# get_recommendation_list(0, 3, user_item_matrix, 'none')

In [12]:
# function to generate user-based recommendations
def get_user_reccommendations(user_id, number_of_reccommendations, similarity_matrix):
    # get similar users from the similarity matrix and sort them descending and select the first 2 rows from the dataframe
    similar_user_data = similarity_matrix.loc[user_id].sort_values(ascending=False).head(2)
    # drop the unrated movies from the user so that we can check based on this in the for loop
    rated_movies_by_user = user_item_matrix.loc[user_id].dropna()
    print(similar_user_data)
    # print('rated', rated_movies_by_user.to_list())
    recommendations = []
    for user_id, similarity in similar_user_data.items():
        # select the user from the user_item matrix and drop the null values
        similar_user_movies = user_item_matrix.loc[user_id].dropna()
        # print('current user',similar_user_movies.tolist())
        for movie_id, rating in similar_user_movies.items():
            if movie_id not in rated_movies_by_user:
                # print('user_id', user_id, 'movie', movie_id, 'similarity',similarity, 'rating', rating)
                recommendations.append((movie_id, rating * similarity))
  
    # sort list of tuples based on similarity and return it 
    # https://stackoverflow.com/a/3121985
    recommendations.sort(key=lambda tup: tup[1], reverse=True)
    return recommendations[:number_of_reccommendations]

In [13]:
get_user_reccommendations(1, 2, cosine_similarity_matrix)

userId
11    0.904534
9     0.828850
Name: 1, dtype: float64


[(4, 1.8090680674665818), (5, 1.8090680674665818)]

# Item-Item Recommendations

Give N (configurable) recommendations for a given user U (configurable) based on the
movies the user U rated with at least 3.5 stars. Explain your implementation and the
strategy that you use for selecting the final recommendations.
Which means that we have to give the user N number of recommendations for a certain user(selectable) from the movies that he has rated with at least 3.5 stars.

In the UI we would have to 
1. N number of recommendations
2. U which user id
3. Select certain movie which is rated >= 3.5 stars

How to do the recommendations: 
2. find similar items
3. Candidate selection (items you might recommend)
4. Score recommendation candidates
5. Filter candidates (top_n) recommendation

In [14]:
# Create item_user matrix by 
item_user_matrix = movies.pivot_table(index='movieId', columns='userId', values='rating')

item_user_matrix.head(20)

userId,1,2,3,4,5,6,7,8,9,10,11,12
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,1.0,,3.0,,,5.0,,,5.0,,4.0,
2,,,5.0,4.0,,,4.0,,,2.0,1.0,3.0
3,2.0,4.0,,1.0,2.0,,3.0,,4.0,3.0,5.0,
4,,2.0,4.0,,5.0,,,4.0,,,2.0,5.0
5,,,4.0,3.0,4.0,2.0,,,,,2.0,
6,1.0,,3.0,,4.0,,,2.0,,,4.0,


In [15]:
# cosine similarity
item_item_cosine_similarity_matrix = calculate_similarity(item_user_matrix)
# calc pearson similarity
item_item_pearson_similarity_matrix = calculate_similarity(item_user_matrix, 'pearson')
# calculate adjusted cosine similarity
item_item_adjusted_cosine_similarity_matrix = calculate_similarity(item_user_matrix, 'adjusted')

In [101]:
# item_item_cosine_similarity_matrix
item_item_pearson_similarity_matrix
# item_item_adjusted_cosine_similarity_matrix

movieId,1,2,3,4,5,6
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,0.0,-0.178542,0.414039,-0.084767,-0.358569,0.57752
2,-0.178542,0.0,-0.526235,0.397796,0.607644,-0.260206
3,0.414039,-0.526235,0.0,-0.598444,-0.433013,0.332106
4,-0.084767,0.397796,-0.598444,0.0,0.545545,-0.075314
5,-0.358569,0.607644,-0.433013,0.545545,0.0,0.038348
6,0.57752,-0.260206,0.332106,-0.075314,0.038348,0.0


In [174]:
# For each similar item that has not been seen by the user, calculate the expected rating
# Using the similarity matrixes 
# In order to predict the rating we have to know the following and use the matrixes
# We have to know the movie id to which we want to predict and the user id as well
# Following we need the similarity matrix to find similar items and the item_user matrix
# Prediction formula used is R(m, u) = {∑ ⱼ S(m, j)R(j, u)}/ ∑ ⱼ S(m, j)
# R(m, u): the rating for movie m by user u
# S(m, j): the similarity between movie m and movie j
# j ∈ J where J is the set of the similar movies to movie m
# source: https://medium.com/@Sumeet_Agrawal/item-based-collaborative-filtering-4e64f65ae6ea
def rating_prediction(movie_id, user_id, user_item_matrix, similarity_matrix, debug=False):
    # rating from the user 
    # contains movie_id and the rating
    user_rating = user_item_matrix.loc[:, user_id].sort_values(ascending=False).dropna()
    # print('rating from user',user_rating)
    # the weights for our calculation
    # We look at the similarities between movie_id and the other movies 
    # and we sort and descending and pick 10 most similar items
    similarity_items_by_id = similarity_matrix.loc[movie_id][user_rating.index]
    # get the items that have a similarity greater than 0
    similarity_items_by_id = similarity_items_by_id[similarity_items_by_id > 0]
    # print('similar items by id',similarity_items_by_id)

    weighted_average = np.average(user_rating.loc[similarity_items_by_id.index], weights = similarity_items_by_id)
    # https://towardsdatascience.com/3-ways-to-compute-a-weighted-average-in-python-4e066de7a719
    predicted_rating = round(weighted_average, 1)
    if (debug):
        print('predicted', predicted_rating)
        print('user ratings', user_rating.loc[similarity_items_by_id.index])
        print('similar items by id',similarity_items_by_id)
    return predicted_rating

In [180]:
# movie_id 2 user_id 4 expected  4.0 predicted nan
# movie_id 3 user_id 4 expected  1.0 predicted nan
# movie_id 5 user_id 4 expected  3.0 predicted nan
rating_prediction(2, 12, item_user_matrix, item_item_pearson_similarity_matrix, debug=True)

predicted 5.0
user ratings movieId
4    5.0
Name: 12, dtype: float64
similar items by id movieId
4    0.397796
Name: 2, dtype: float64


5.0

In [19]:
def recommend_movies_by_item(user_id, number_of_recommendations, item_user_matrix, user_item_matrix, similarity_matrix, debug=False):
   # 1. Take the k highest rated items of a user, for this we need the ones with more than 3.5
   highest_rated_items = user_item_matrix.loc[user_id].loc[lambda rating : rating >= 0].sort_values(ascending=False)
   # Filter ones rated with more than 3.5
   # highest_rated_items.loc[lambda rating : rating >= 1]
   # 2. Find j similar items to those k highest rated items
   similar_items_to_highest_items = similarity_matrix.loc[highest_rated_items.index]
   if debug:
      print('huighest rated items index, which is the id of the movie',highest_rated_items.index)
      print('huighest rated items', highest_rated_items)
      print('similar items', similar_items_to_highest_items)
   # 3. For each similar item that has not been seen by the user, calculate the expected rating
   # checking if the value is NaN by checking if it's equals to itself https://stackoverflow.com/a/944712
   unrated_items_by_user = user_item_matrix.loc[user_id].loc[lambda rating : rating != rating]
   if debug:
      print('unrated movies', unrated_items_by_user)
   recommendation_list = []
   for movie_id, _ in unrated_items_by_user.items():
      # print('movie_id', movie_id)
      predicted_rating = rating_prediction(movie_id, user_id, user_item_matrix, similarity_matrix)
      recommendation_list.append((movie_id, predicted_rating))
   # 4. Select the top n movies with the highest rating
   recommendation_list.sort(key=lambda tup: tup[1], reverse=True)
   return recommendation_list[:number_of_recommendations]

In [20]:
recommend_movies_by_item(5, 3, item_user_matrix, user_item_matrix, item_item_pearson_similarity_matrix, debug=True)

huighest rated items index, which is the id of the movie Int64Index([4, 5, 6, 3], dtype='int64', name='movieId')
huighest rated items movieId
4    5.0
5    4.0
6    4.0
3    2.0
Name: 5, dtype: float64
similar items movieId         1         2         3         4         5         6
movieId                                                            
4       -0.084767  0.397796 -0.598444  0.000000  0.545545 -0.075314
5       -0.358569  0.607644 -0.433013  0.545545  0.000000  0.038348
6        0.577520 -0.260206  0.332106 -0.075314  0.038348  0.000000
3        0.414039 -0.526235  0.000000 -0.598444 -0.433013  0.332106
unrated movies movieId
1   NaN
2   NaN
Name: 5, dtype: float64


[(2, 4.4), (1, 3.2)]

# Validation
- A-B testing
- RMSE
- Hit-rate

# Hit rate
1. For each user, leave one high rating out (store this rating in a test set)
2. Recommend n movies
3. If the "left-out"-movie is part of your recommendation, you've got a hit!

In [21]:
# User-User
# Split the data into a training set and a test set
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(user_item_matrix, test_size=0.2)
# Initialize the hit rate
hit_rate = 0
# Iterate over the users in the test set
# print(test_df.index)
# print(test_df.columns)
# print('train', train_df)
for movie_id, row in test_df.iterrows():
    # print(type(row))
    for user_id, rating in row.items():
        # Select the row in the similarity matrix corresponding to the user
        user_row = cosine_similarity_matrix.loc[user_id]
        # Select the top N most similar users
        N = 10
        similar_users = user_row.sort_values(ascending=False).head(N)
        # print(similar_users)
        # Extract the user ids of the similar users
        similar_user_ids = similar_users.index
        # print(similar_user_ids)
        # Select the rows of the user-item matrix corresponding to the similar users
        print(similar_user_ids)
        similar_user_rows = train_df[train_df.isin(similar_user_ids)]

        # similar_user_rows = train_df.loc[similar_user_ids]
        # print(similar_user_rows)
        # Check if the recommended item is present in the test set for the user
        # print('hit rate', movie_id, 'hit2 ', similar_user_rows.columns)
        if movie_id in similar_user_rows.columns:
            hit_rate += 1

# Calculate the hit rate
hit_rate /= test_df.shape[0]
print(f' User-user Hit rate: {hit_rate:.2f}')

Int64Index([11, 9, 2, 10, 7, 5, 6, 3, 8, 4], dtype='int64', name='userId')
Int64Index([10, 1, 11, 9, 7, 5, 8, 12, 3, 4], dtype='int64', name='userId')
Int64Index([4, 5, 12, 11, 8, 6, 7, 10, 1, 9], dtype='int64', name='userId')
Int64Index([7, 3, 10, 12, 11, 5, 6, 2, 1, 9], dtype='int64', name='userId')
Int64Index([8, 3, 11, 12, 2, 1, 4, 10, 6, 9], dtype='int64', name='userId')
Int64Index([9, 11, 3, 1, 4, 5, 2, 6, 7, 8], dtype='int64', name='userId')
Int64Index([11, 9, 2, 10, 7, 5, 6, 3, 8, 4], dtype='int64', name='userId')
Int64Index([10, 1, 11, 9, 7, 5, 8, 12, 3, 4], dtype='int64', name='userId')
Int64Index([4, 5, 12, 11, 8, 6, 7, 10, 1, 9], dtype='int64', name='userId')
Int64Index([7, 3, 10, 12, 11, 5, 6, 2, 1, 9], dtype='int64', name='userId')
Int64Index([8, 3, 11, 12, 2, 1, 4, 10, 6, 9], dtype='int64', name='userId')
Int64Index([9, 11, 3, 1, 4, 5, 2, 6, 7, 8], dtype='int64', name='userId')
Int64Index([11, 9, 2, 10, 7, 5, 6, 3, 8, 4], dtype='int64', name='userId')
Int64Index([10, 1, 

In [22]:
# User-User
# 1. For each user, leave one high rating out (store this rating in a test set)
hit_rate = 0
N = 10
user_number = len(user_item_matrix)
for user_id, values in user_item_matrix.iterrows():
    # print('values', values)
    highest_rating = values.sort_values(ascending=False).index[0]
    # 2. Recommend n movies
    user_recommendations = get_user_reccommendations(user_id, 10, cosine_similarity_matrix)
    # 3. If the "left-out"-movie is part of your recommendation, you've got a hit!
    # print('user_recommendations', user_recommendations)
    # 3. If the "left-out"-movie is part of your recommendation, you've got a hit!
    print('checking',movie_id , user_recommendations)
    for movie_id ,rating in user_recommendations:
        if highest_rating == rating:
            hit_rate += 1
print('User-user Hit rate:', hit_rate / float(user_number))

userId
11    0.904534
9     0.828850
Name: 1, dtype: float64
checking 9 [(4, 1.8090680674665818), (5, 1.8090680674665818), (2, 0.9045340337332909)]
userId
10    0.744208
1     0.730297
Name: 2, dtype: float64
checking 2 [(2, 1.4884168150705015), (1, 0.7302967433402215), (6, 0.7302967433402215)]
userId
4    0.724657
5    0.709652
Name: 3, dtype: float64
checking 6 [(3, 1.4193048402327797), (3, 0.7246573018525413)]
userId
7    0.745241
3    0.724657
Name: 4, dtype: float64
checking 3 [(4, 2.898629207410165), (1, 2.173971905557624), (6, 2.173971905557624)]
userId
8    0.801638
3    0.709652
Name: 5, dtype: float64
checking 6 [(2, 3.5482621005819492), (1, 2.1289572603491695)]
userId
9     0.725018
11    0.548580
Name: 6, dtype: float64
checking 1 [(3, 2.9000739528287083), (3, 2.7429019252949667), (6, 2.1943215402359733), (4, 1.0971607701179866), (2, 0.5485803850589933)]
userId
10    0.942990
4     0.745241
Name: 7, dtype: float64
checking 2 [(5, 2.235723940575298)]
userId
5     0.801638
12

In [23]:
#Item-Item


# RMSE

1. Remove some test data from the dataset (remove those ratings from the training set)
2. Predict ratings for the missing items
3. Compare to the real values in the test data set

In [179]:
# Split the data into a training set and a test set
train_df, test_df = train_test_split(user_item_matrix, test_size=0.1)

test_df

movieId,1,2,3,4,5,6
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3,3.0,5.0,,4.0,4.0,3.0
12,,3.0,,5.0,,


In [181]:
def calculate_rmse(test_dataframe, user_item_matrix, similarity_matrix):
    
    # Initialize the list of squared differences
    squared_differences = []

    # Iterate over the user-item pairs in the test set
    for user_id, row in test_df.iterrows():
        for movie_id in row.index:
            # Check if the row value is nana if so continue
            if (row[movie_id] != row[movie_id]):
                continue
            # Calculate the predicted rating
            predicted_rating = rating_prediction(movie_id, user_id, item_user_matrix, similarity_matrix)
            print('movie_id', movie_id, 'user_id', user_id, 'expected ', row[movie_id],'predicted',predicted_rating)
            # Calculate the squared difference between the predicted and actual rating
            squared_differences.append((predicted_rating - row[movie_id]) ** 2)

    # Calculate the MSE as the average of the squared differences
    mse = np.mean(squared_differences)
    print("MSE:", mse)
    # Calculate the RMSE as the square root of the MSE
    return np.sqrt(mse)

In [182]:
rmse = calculate_rmse(test_df, user_item_matrix, item_item_pearson_similarity_matrix)
print(f'RMSE: {rmse:.2f}')

movie_id 1 user_id 3 expected  3.0 predicted 3.0
movie_id 2 user_id 3 expected  5.0 predicted 4.0
movie_id 4 user_id 3 expected  4.0 predicted 4.4
movie_id 5 user_id 3 expected  4.0 predicted 4.5
movie_id 6 user_id 3 expected  3.0 predicted 3.1
movie_id 2 user_id 12 expected  3.0 predicted 5.0
movie_id 4 user_id 12 expected  5.0 predicted 3.0
MSE: 1.3457142857142856
RMSE: 1.16


In [25]:
# # Item-Item
# reader = Reader(rating_scale=(0.5, 5))
# data = Dataset.load_from_df(df[['userId', 'title', 'rating']], reader)
# trainSet, testSet = train_test_split(data, test_size=.25, random_state=0)
# algo = SVD(random_state=0)
# algo.fit(trainSet)
# predictions = algo.test(testSet)

# def MAE(predictions):
#         return accuracy.mae(predictions, verbose=False)
# def RMSE(predictions):
#         return accuracy.rmse(predictions, verbose=False)
    
# print("RMSE: ", RMSE(predictions))
# print("MAE: ", MAE(predictions))

In [26]:
# https://towardsdatascience.com/which-evaluation-metric-should-you-use-in-machine-learning-regression-problems-20cdaef258e

# Widgets Implementation


In [27]:
# def get_indexes(imdb_id=[]):
#     movies = []
#     for x in range(len(imdb_id)):
#         movieIndex = df.loc[df['imdbId'] == imdb_id[x]].index[0]
#         movies.append(df.iloc[movieIndex])
    
#     return movies

# def displayRecommendations(recommendations=[]):
#     movies = []
#     for _, row in links.iterrows():
#         if row['movieId'] in recommendations:
#             movies.append(int(row['imdbId']))
    
#     return get_indexes(movies)

In [28]:
def user_user_recommendations(selected_user, recommendations, similarity_strategy):
    if similarity_strategy == 0:
        recommendation_list = get_recommendation_list(selected_user, recommendations, similarity_strategy='none')
    if similarity_strategy == 1:
        recommendation_list = get_recommendation_list(selected_user, recommendations, similarity_strategy='pearson')
    if similarity_strategy == 2:
        recommendation_list = get_recommendation_list(selected_user, recommendations, similarity_strategy='adjusted')

    return recommendation_list

In [29]:
def item_item_recommendations(selected_user, recommendations, similarity_strategy):
    if similarity_strategy == 0:
        recommendation_list = get_recommendation_list(selected_user, recommendations, similarity_strategy='none')
    if similarity_strategy == 1:
        recommendation_list = get_recommendation_list(selected_user, recommendations, similarity_strategy='pearson')
    if similarity_strategy == 2:
        recommendation_list = get_recommendation_list(selected_user, recommendations, similarity_strategy='adjusted')

    return recommendation_list

In [30]:
from IPython.core.display_functions import clear_output
import ipywidgets as widgets

selected_user = widgets.Dropdown(
    options=list(x+1 for x in range(movies['userId'].nunique())),
    description='Select a user:\n ',
    disabled=False,
    layout={'width': 'max-content'}
)
recommendations = widgets.IntText(
    min=0,
    value=3,
    description='Number of recommendations:\n ',
    disabled=False,
)
recommendation_method = widgets.RadioButtons(
    options=[('User-User',0), ('Item-Item',1)],
    description='Recommendation:',
    disabled=False
)
similarity_strategy = widgets.RadioButtons(
    options=[('Cosine',0), ('Pearson',1), ('Adjusted cosine',2)],
    description='Similarity metrics:',
    disabled=False
)
button = widgets.Button(
    description='Recommendation',
    disabled=False,
)

def execute_function(_):
    with out:
          clear_output()
          recommendation_list = []
          print(recommendation_method)
          if recommendation_method.value == 0:
            recommendation_list = user_user_recommendations(selected_user.value, recommendations.value, similarity_strategy.value)
          if recommendation_method.value == 1:
            recommendation_list = item_item_recommendations(selected_user.value, recommendations.value, similarity_strategy.value)
          print(f'Selected User: {selected_user.value}')
            
          print('Recommendation(s):')
          print(f'Got movies with the following ids as recommendations: {recommendation_list}')
            
button.on_click(execute_function)
out = widgets.Output()

box = widgets.VBox([recommendations, selected_user, recommendation_method, similarity_strategy, button, out])
box

VBox(children=(IntText(value=3, description='Number of recommendations:\n '), Dropdown(description='Select a u…