# 1. Create m x u matrix with movies as row and users as column
https://alyssaq.github.io/2015/20150426-simple-movie-recommender-using-svd/

In [27]:
#Import required libraries
import numpy as np
import pandas as pd

In [29]:
# We will be using movies.dat and rating.dat for building the recommender.

data = pd.io.parsers.read_csv('ml-1m/ratings.dat', names=['user_id', 'movie_id', 'rating', 'time'],
    engine='python', delimiter='::')
movie_data = pd.io.parsers.read_csv('ml-1m/movies.dat', names=['movie_id', 'title', 'genre'],
    engine='python', delimiter='::')

In [41]:
# movie_data

In [30]:
#1. Create m x u matrix with movies as row and users as column

ratings_mat = np.ndarray(shape=(np.max(data.movie_id.values), np.max(data.user_id.values)),dtype=np.uint8)
ratings_mat[data.movie_id.values-1, data.user_id.values-1] = data.rating.values

In [31]:
#2. Normalize the matrix
normalised_mat = ratings_mat - np.asarray([(np.mean(ratings_mat, 1))]).T

In [32]:
#3. Compute SVD to get U, S, and V. Use np.linalg.svd()
A = normalised_mat.T / np.sqrt(ratings_mat.shape[0] - 1)
U, S, V = np.linalg.svd(A)

In [37]:
#4. From your V.T select 50 components
k = 50
sliced = V.T[:, :k] # representative data


In [35]:
#5. Implement a function that takes movieID as input and then implement cosine similarity 
# along with sorting to recommend the top 10 movies.

def top_cosine_similarity(data, movie_id, top_n=10):
    index = movie_id - 1 # Movie id starts from 1
    movie_row = data[index, :]
    magnitude = np.sqrt(np.einsum('ij, ij -> i', data, data))
    similarity = np.dot(movie_row, data.T) / (magnitude[index] * magnitude)
    sort_indexes = np.argsort(-similarity)
    return sort_indexes[:top_n]

# Helper function to print top N similar movies
def print_similar_movies(movie_data, movie_id, top_indexes):
    print('Recommendations for {0}: \n'.format(
    movie_data[movie_data.movie_id == movie_id].title.values[0]))
    for id in top_indexes + 1:
        print(movie_data[movie_data.movie_id == id].title.values[0])

In [36]:
movie_id = 1 # Movie ID input here 
top_n = 10

indexes = top_cosine_similarity(sliced, movie_id, top_n)
print_similar_movies(movie_data, movie_id, indexes)

Recommendations for Toy Story (1995): 

Toy Story (1995)
Georgia (1995)
Lamerica (1994)
Catwalk (1995)
400 Blows, The (Les Quatre cents coups) (1959)
Whatever It Takes (2000)
Chushingura (1962)
Way of the Gun, The (2000)
Grandfather, The (El Abuelo) (1998)
Teaching Mrs. Tingle (1999)


# Project Report - 2. For part2, take any 3 random movie IDs. Compute and report the top 10 recommended movies. Describe if the movies are actually similar.

In [49]:
np.random.randint(low=1, high=3952, size=3)

array([1742,  178, 1266])

In [46]:
movie_id = 3261 # Movie ID input here 
top_n = 10

indexes = top_cosine_similarity(sliced, movie_id, top_n)
print_similar_movies(movie_data, movie_id, indexes)

Recommendations for Singles (1992): 

Singles (1992)
Illuminata (1998)
King of Marvin Gardens, The (1972)
Hidden, The (1987)
Night to Remember, A (1958)
Light It Up (1999)
Patriot Games (1992)
Nine 1/2 Weeks (1986)
Me, Myself and Irene (2000)
Flying Tigers (1942)


In [48]:
movie_id = 3349 # Movie ID input here 
top_n = 10

indexes = top_cosine_similarity(sliced, movie_id, top_n)
print_similar_movies(movie_data, movie_id, indexes)

Recommendations for Perils of Pauline, The (1947): 

Perils of Pauline, The (1947)
Brown's Requiem (1998)
Torso (Corpi Presentano Tracce di Violenza Carnale) (1973)
Portraits Chinois (1996)
Holy Smoke (1999)
Date with an Angel (1987)
Faraway, So Close (In Weiter Ferne, So Nah!) (1993)
Champ, The (1979)
Shaft (1971)
Replacements, The (2000)


In [50]:
movie_id = 1742 # Movie ID input here 
top_n = 10

indexes = top_cosine_similarity(sliced, movie_id, top_n)
print_similar_movies(movie_data, movie_id, indexes)

Recommendations for Caught Up (1998): 

Caught Up (1998)
Meatballs (1979)
Golden Child, The (1986)
Wild Man Blues (1998)
Pleasure Garden, The (1925)
Men Cry Bullets (1997)
Kiss Me, Guido (1997)
Out-of-Towners, The (1999)
Contempt (Le M�pris) (1963)
Substance of Fire, The (1996)


In [51]:
movie_id = 178 # Movie ID input here 
top_n = 10

indexes = top_cosine_similarity(sliced, movie_id, top_n)
print_similar_movies(movie_data, movie_id, indexes)

Recommendations for Love & Human Remains (1993): 

Love & Human Remains (1993)
S.F.W. (1994)
Total Eclipse (1995)
Nadja (1994)
Stars Fell on Henrietta, The (1995)
Tom & Viv (1994)
Safe Passage (1994)
Blue in the Face (1995)
Awfully Big Adventure, An (1995)
Little Odessa (1994)
