# USING SINGULAR VALUE DECOMPOSITION (SVD) IN A  RECOMMENDER SYSTEMS

Tutorial from https://analyticsindiamag.com/singular-value-decomposition-svd-application-recommender-system/#:~:text=In%20the%20context%20of%20the,given%20to%20items%20by%20users

### Team Members:
1. Baharul Hisyam bin Baharudin (S2039609)
2. Nor Azyra Binti Omar (17120332)
3. Nur Farzanah Roslan (17089384)
4. Wardatul Fadhilah binti Amir Nazri (S2039977)

## Importing Libraries

In [4]:
import numpy as np
import pandas as pd

## Reading dataset (MovieLens 1M movie ratings dataset

Data source: https://grouplens.org/datasets/movielens/1m/

In [5]:
data = pd.io.parsers.read_csv('data/ratings.dat', 
    names=['user_id', 'movie_id', 'rating', 'time'],
    engine='python', delimiter='::')
movie_data = pd.io.parsers.read_csv('data/movies.dat',
    names=['movie_id', 'title', 'genre'],
    engine='python', delimiter='::')


In [33]:
print(len(data))
data[:5]

1000209


Unnamed: 0,user_id,movie_id,rating,time
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [35]:
print(len(movie_data))
movie_data[:5]

3883


Unnamed: 0,movie_id,title,genre
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


## Creating the rating matrix

Rows as movies and columns as users

In [8]:
ratings_mat = np.ndarray(
    shape=(np.max(data.movie_id.values), np.max(data.user_id.values)),
    dtype=np.uint8)
ratings_mat[data.movie_id.values-1, data.user_id.values-1] = data.rating.values

In [9]:
ratings_mat[:5]

array([[5, 0, 0, ..., 0, 0, 3],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

## Normalizing the matrix

Subtract mean off

In [10]:
normalised_mat = ratings_mat - np.asarray([(np.mean(ratings_mat, 1))]).T

In [11]:
normalised_mat[:5]

array([[ 3.57400662, -1.42599338, -1.42599338, ..., -1.42599338,
        -1.42599338,  1.57400662],
       [-0.37152318, -0.37152318, -0.37152318, ..., -0.37152318,
        -0.37152318, -0.37152318],
       [-0.23874172, -0.23874172, -0.23874172, ..., -0.23874172,
        -0.23874172, -0.23874172],
       [-0.07682119, -0.07682119, -0.07682119, ..., -0.07682119,
        -0.07682119, -0.07682119],
       [-0.14735099, -0.14735099, -0.14735099, ..., -0.14735099,
        -0.14735099, -0.14735099]])

## Computing the Singular Value Decomposition (SVD)

In [12]:
A = normalised_mat.T / np.sqrt(ratings_mat.shape[0] - 1)
U, S, V = np.linalg.svd(A)

In [50]:
#Function to calculate the cosine similarity (sorting by most similar
#and returning the top N)
def top_cosine_similarity(data, movie_id, top_n=10, bottom_n=3):
    index = movie_id - 1 # Movie id starts from 1 in the dataset
    movie_row = data[index, :]
    magnitude = np.sqrt(np.einsum('ij, ij -> i', data, data))

    similarity = np.dot(movie_row, data.T) / (magnitude[index] * magnitude)

    sort_indexes = np.argsort(-similarity)
    
  
    return sort_indexes[:top_n], sort_indexes[-bottom_n:]

In [51]:
def print_similar_movies(movie_data, movie_id, top_indexes, bottom_indexes):
    """
    Function to print top N similar movies and M least similar movies
    
    """
    print("\n\n************************************************")
    print("\n\n------------------------------------------------")
    print('Recommendations for {0}: \n'.format(
    movie_data[movie_data.movie_id == movie_id].title.values[0]))
    print("------------------------------------------------")
    for id in top_indexes + 1:
        print(movie_data[movie_data.movie_id == id].title.values[0] + ", Genre: " + movie_data[movie_data.movie_id == id].genre.values[0])  

    print("\n\n------------------------------------------------")
    print('Least similar to {0}: \n'.format(
    movie_data[movie_data.movie_id == movie_id].title.values[0]))
    print("------------------------------------------------")
    for id in bottom_indexes:
        print(movie_data[movie_data.movie_id == id].title.values[0]  + ", Genre: " + movie_data[movie_data.movie_id == id].genre.values[0])   

## Movies recommendations

Recommend 10 most similar movies and list out 3 least similar movies (not recommended) for a given movie id.



In [52]:
np.seterr(divide='ignore', invalid='ignore')


#k-principal components to represent movies, movie_id to find recommendations,
#top_n : print n results
k = 50
top_n = 10
bottom_n = 3
sliced = V.T[:, :k] # representative data

movie_id_list = [3793,  # X-Men (2000)
                 2808,  # Universal Soldier (1992)
                 10]    # GoldeEye (1995)


for movie_id in movie_id_list:
    similar_indexes, least_similar_indexes = top_cosine_similarity(sliced, movie_id, top_n, bottom_n)
    print_similar_movies(movie_data, movie_id, similar_indexes, least_similar_indexes)




************************************************


------------------------------------------------
Recommendations for X-Men (2000): 

------------------------------------------------
X-Men (2000), Genre: Action|Sci-Fi
Shaft (2000), Genre: Action|Crime
Romeo Must Die (2000), Genre: Action|Romance
Titan A.E. (2000), Genre: Adventure|Animation|Sci-Fi
Shanghai Noon (2000), Genre: Action
Gone in 60 Seconds (2000), Genre: Action|Crime
Gladiator (2000), Genre: Action|Drama
Mission: Impossible 2 (2000), Genre: Action|Thriller
Battlefield Earth (2000), Genre: Action|Sci-Fi
Highlander: Endgame (2000), Genre: Action|Adventure|Fantasy


------------------------------------------------
Least similar to X-Men (2000): 

------------------------------------------------
Swimming with Sharks (1995), Genre: Comedy|Drama
Bewegte Mann, Der (1994), Genre: Comedy
Breakfast of Champions (1999), Genre: Comedy


************************************************


---------------------------------------------