In [1]:

# Recommender System Demo
## WQD7009: Group Assignment (8/1/2021)

# Group members:
# Tan Chee Yu (17028703)
# Mohd Anas Ahmad (S2001089)
# Regine Lim (S2010204)
# Kong Mun Yeen (17055182)
# Archana Sundari Neelamegam (S17216646)
# Avinnaash Suresh (17219903)

# This app is developed for WQD7009 group assignment. This is to apply 
# the Singular Value Decomposition (SVD) and cosine similarity to rank 
# movies based on individual ratings.


In [2]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import HTML

import warnings
warnings.filterwarnings('ignore')

In [3]:
#Some text explanation to be added into the demo.

DESCRIPTION = """\
<div class="app-sidebar">
<p><b>Introduction:</b><br>This app is developed for WQD7009 group assignment. This is to apply the Singular Value Decomposition (SVD) and cosine similarity to rank movies based on individual ratings.</p>
<p><b>Group Members:</b><br>
Tan Chee Yu (17028703)<br>
Mohd Anas Ahmad (S2001089)<br>
Regine Lim (S2010204)<br>
Kong Mun Yeen (17055182)<br>
Archana Sundari Neelamegam (S17216646)<br>
Avinnaash Suresh (17219903)</p>

<p><b>Steps:</b><br>1. Select the genre that interest you. The movie list will be filtered based on this.<br>
2. Select the movie of your interest. Top recommendations will be generated.</p>
</div>
"""

In [4]:
# Functions Declaration
#
# This is the function to calculate the cosine similarity. It is based 
# on the example tutorial as stated in the assignment. However, some 
# modification was made. The first item is usually the same id as the 
# movie_id. So, the array is checked before return, making sure the 
# movie_id is not part of the output.
#
# Credit to Dr. Vaibhav Kumar
# Ref: https://analyticsindiamag.com/singular-value-decomposition-svd-application-recommender-system/

def top_cosine_similarity(data, movie_id, top_n=10):
    """
    Calculate top cosine similarity
    
    Parameters
    ----------    
    data : np.ndarray
        The dataset containing the movies ratings.
    movie_id : int
        The id of the movie as per the dataset.
    top_n : int, optional
        The number of similar movies (default is 10)
        
    Return
    ----------
    A numpy.array instance containing the top_n index of movies. This is the 
    index position of the movies in data, not the id of the movies
    """
    # Movie id starts from 1 in the dataset. Getting the rating for the movie of interest.
    index = movie_id - 1 
    movie_row = data[index, :]
    
    # Calculating the cosine similarity
    magnitude = np.sqrt(np.einsum('ij, ij -> i', data, data))
    similarity = np.dot(movie_row, data.T) / (magnitude[index] * magnitude)
    
    # Sorting the index with the highest similarity first 
    sort_indexes = np.argsort(-similarity)
    
    # Check if the movie_id is in the array, return top_n without movie_id
    output = sort_indexes[:top_n+1]
    if index in output:
        output = output[output!=index]
        return output
    else:
        return output[:top_n]
    

In [5]:
# The function similar_movies is to get a formatted string of top_n movies. 
# Credit to Dr. Vaibhav Kumar
# Ref: https://analyticsindiamag.com/singular-value-decomposition-svd-application-recommender-system/

def similar_movies(movie_data, movie_id, top_indexes):
    """
    Output top_n movies in a formatted string.
    
    Parameters
    ----------    
    movie_data : np.ndarray
        The dataset containing the movies title and genres.
    movie_id : int
        The id of the movie as per the dataset.
    top_indexes : np.ndarray
        An array of the index of the top_n movies.
        
    Return
    ----------
    A formatted string of top_n movies.
    """
    
    out = ('Recommendations for {0}: \n\n'.format(
    movie_data[movie_data.movie_id == movie_id].title.values[0]))

    for _id in top_indexes + 1:
        out += str(movie_data[movie_data.movie_id == _id].title.values[0]) + '\n'
    return out

In [6]:
# The similar_movies_html function is to get it formatted in html.
def similar_movies_html(movie_data, movie_id, top_indexes):
    """
    Output top_n movies in a html formatted string.
    
    Parameters
    ----------    
    movie_data : np.ndarray
        The dataset containing the movies title and genres.
    movie_id : int
        The id of the movie as per the dataset.
    top_indexes : np.ndarray
        An array of the index of the top_n movies.
        
    Return
    ----------
    A html formatted string of top_n movies.
    """
    
    out=""
    i = 1
    for _id in top_indexes + 1:
        out += str(i) + ". " + str(movie_data[movie_data.movie_id == _id].title.values[0]) + '<br>'
        i += 1
    return out

In [7]:
# Loading and Preparing the Data

In [8]:
# Reading dataset (MovieLens 1M movie ratings dataset: downloaded 
# from https://grouplens.org/datasets/movielens/1m/)
# Credit to Dr. Vaibhav Kumar
# Ref: https://analyticsindiamag.com/singular-value-decomposition-svd-application-recommender-system/

# Data set of the movie ratings. movie_id is the primary key
data = pd.io.parsers.read_csv('./data/ratings.dat', 
    names=['user_id', 'movie_id', 'rating', 'time'],
    engine='python', delimiter='::', encoding='latin-1')

# Data set of the movie title and genre. movie_id is the primary key
movie_data = pd.io.parsers.read_csv('./data/movies.dat',
    names=['movie_id', 'title', 'genre'],
    engine='python', delimiter='::', encoding='latin-1')

In [9]:
# Computing the Singular Value Decomposition (SVD)
# Credit to Dr. Vaibhav Kumar
# Ref: https://analyticsindiamag.com/singular-value-decomposition-svd-application-recommender-system/

# Creating the rating matrix (rows as movies, columns as users)
ratings_mat = np.ndarray(
    shape=(np.max(data.movie_id.values), np.max(data.user_id.values)),
    dtype=np.float64)
ratings_mat[data.movie_id.values-1, data.user_id.values-1] = data.rating.values

#Normalizing the matrix(subtract mean off)
normalised_mat = ratings_mat - np.asarray([(np.mean(ratings_mat, 1))]).T

#Computing the Singular Value Decomposition (SVD)
A = normalised_mat.T / np.sqrt(ratings_mat.shape[0] - 1)
U, S, V = np.linalg.svd(A)

In [10]:
# Slicing the transposed V to obtain k-principal components.
# Credit to Dr. Vaibhav Kumar
# Ref: https://analyticsindiamag.com/singular-value-decomposition-svd-application-recommender-system/

k = 50
sliced = V.T[:, :k]

In [11]:
# This block is to test wheter the functions are working well. Uncomment the last line to test.

# movie_id to find recommendations, top_n print n results
# Credit to Dr. Vaibhav Kumar
# Ref: https://analyticsindiamag.com/singular-value-decomposition-svd-application-recommender-system/

movie_id = 33 # (getting an id from movies.dat)
top_n = 10
indexes = top_cosine_similarity(sliced, movie_id, top_n)
# print(similar_movies(movie_data, movie_id, indexes))

In [12]:
%%capture
# This block is to prepare for the dropdown list in the app. A dictionary 
# with genre as key and list of movies as value is generated.

# Populating a dictionary with genre as key and list of movies as value
movies_by_genre = {}

# Populating the keys to the dictionary
for genre_list in movie_data.genre.unique():
    genres = genre_list.split('|')
    for genre in genres:
        movies_by_genre[genre] = []

# Function to populate movies title to the dictionary
def populate_movies(x):
    for genre in x['genre'].split('|'):
        movies_by_genre[genre].append((x.title,x.movie_id))
    return

# Populate movies title to the dictionary
movie_data.apply(populate_movies, axis=1)

# Check the populated genres
movies_by_genre.keys()

In [13]:
# Demo App
## Declaring and running the viola/ipython app.

In [16]:
class App:
    """
    This is an app created using Viola.
    Create an instance and call app.show() to show the app.
    """
    
    def __init__(self):
        # Initiate the genre option to the first item in the key list
        self.genre = list(movies_by_genre.keys())[0]
        self.genre_index = 0
        
        # How many recommendations
        self.top_n = 10
        
        # Get the genre list from keys in movies_by_genre
        self.genre_list = list(movies_by_genre.keys())
        
        # Setting up the widgets
        self.movie_label = widgets.widgets.HTML("") # list of top_n similar movies
        self._genre_dropdown = self.create_genre_dropdown(self.genre_index) # To select genre
        self._movies_dropdown = self.create_movies_dropdown(0)
        self._top_n_select = widgets.BoundedIntText(
            value=10,
            min=1,
            max=40,
            step=1,
            disabled=False,
            layout=widgets.Layout(margin='0 5em 0 1em', padding='0 1em 0 1em')
        )
        self._top_n_select.observe(self._on_change, names=['value'])
        self._recommendation_header = widgets.HTML("", layout=widgets.Layout(margin='2em 0 0 0'))
        
        # Setting up the left-side of the app
        self.container = widgets.VBox([
            widgets.HTML("<b>Movie genre:</b>"),
            self._genre_dropdown,
            widgets.HTML("<b>Movie title:</b>"),
            self._movies_dropdown,
            widgets.HTML("<b>How many to display?</b>"),
            self._top_n_select,
            self._recommendation_header,
            self.movie_label
        ], layout=widgets.Layout(margin='0 2em 0 2em'))
        
        # Adding the title, and split the area below title into 2 (left and right)
        # Call this to show the app
        self.body = widgets.VBox([
            widgets.HTML("<h1>Recommender System: A Simple Demo</h1>"),
            widgets.HBox([
                self.container,
                widgets.HTML(DESCRIPTION, layout=widgets.Layout(margin='2em 2em 0 2em', width='30em'))
            ])
        ])
        
        self._update_app()
    
    def show(self):
        """
        Show the app in Voila.
        """
        return self.body
    
    def create_genre_dropdown(self, index):
        """
        Create and populate the widget to select the genre of the movie.
        """
        # genre list from keys in movies_by_genre
        value = self.genre_list[index]
        
        # Generate the widget
        dropdown = widgets.Dropdown(options=self.genre_list,
                                    value=value, 
                                    layout=widgets.Layout(margin='0 5em 0 1em', padding='0 1em 0 1em'))
        
        # Setup the signal for when the widget value changes
        dropdown.observe(self._on_change, names=['value'])
        return dropdown
    
    def create_movies_dropdown(self, index):
        """
        Create and populate the widget to select the title of the movie.
        """
        # Update the movie list based on the genre
        options = movies_by_genre[self._genre_dropdown.value]
        value = options[index]
        
        # Generate the widget
        dropdown = widgets.Dropdown(options=options, 
                                    value=value[1], 
                                    layout=widgets.Layout(margin='0 5em 0 1em', padding='0 1em 0 1em'))
        
        # Setup the signal for when the widget value changes
        dropdown.observe(self._on_change, names=['value'])
        return dropdown
        
    def _on_change(self, _):
        """
        A callback function to handle changes to any of the widgets.
        """
        self._update_app()
        
    def _update_app(self):
        """
        A function to update the fields.
        """
        # Update the top_n based on selection
        self.top_n = self._top_n_select.value
        self._recommendation_header.value = "<b>Here are the top "+str(self.top_n)+" similar movies:</b>"
        
        # Update movie dropdown based on genre selection
        self._movies_dropdown.options = movies_by_genre[self._genre_dropdown.value]
        
        # Update the recommendations based on movie title selection
        indexes = top_cosine_similarity(sliced, self._movies_dropdown.value, self.top_n)
        self.movie_label.value = similar_movies_html(movie_data, self._movies_dropdown.value, indexes)

In [17]:
# Instantiate and running the app.
app = App()
app.show()

VBox(children=(HTML(value='<h1>Recommender System: A Simple Demo</h1>'), HBox(children=(VBox(children=(HTML(va…