### Using the recommendation library

In [1]:
import os
os.chdir('..')

In [2]:
# Import all the packages we need to generate recommendations
import numpy as np
import pandas as pd
import src.utils as utils
import src.recommenders as recommenders
import src.similarity as similarity

# Enable logging on Jupyter notebook
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [3]:
# imports necesary for plotting
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline  

In [4]:
# loads the dataset (assumes the data is downloaded)
dataset_folder = os.getcwd()+'/data/'
my_ratings_file = dataset_folder+'ratings-imdb.csv'
dataset_folder_ready = dataset_folder+ '/ml-latest-small'
[ratings, my_customer_number] = utils.merge_datasets(dataset_folder_ready, my_ratings_file)

INFO:root:loaded 44 personal ratings
INFO:root:loaded 9125 movies
INFO:root:loaded 100048 ratings in total


In [5]:
# the data is stored in a long pandas dataframe
# we need to pivot the data to create a [user x movie] matrix
ratings_matrix = ratings.pivot_table(index='customer', columns='movie', values='rating', fill_value=0)
ratings_matrix = ratings_matrix.transpose()

#### Understanding Movie Similarity
1. Try with different movies 
2. Try with different types of **similarity metrics** (look in /src/similarity.py)
3. Which **similarity metric** works the best? 

In [None]:
# find similar movies 
# try with different movie titles and see what happens 
movie_title = 'Star Wars: Episode VI - Return of the Jedi (1983)'
similarity_type = "intersection"
logger.info('top-10 movies similar to %s, using %s similarity', movie_title, similarity_type)
print(similarity.compute_nearest_neighbours(movie_title, ratings_matrix, similarity_type)[0:10])

In [None]:
# find similar movies 
# try with different movie titles and see what happens 
movie_title = 'All About My Mother (Todo sobre mi madre) (1999)'
similarity_type = "pearson"
logger.info('top-10 movies similar to: %s, using %s similarity', movie_title, similarity_type)
print(similarity.compute_nearest_neighbours(movie_title, ratings_matrix, similarity_type)[0:10])

#### Creating recommendations for your personal ratings
1. Try with different **similarity metrics** (look in /src/similarity.py)
2. Try with different values of **K** (K is the number of neigbhours to consider when generating the recommendations)
3. Which combination of **K** and **number of metrics** works better?, discuss it with others.

In [6]:
# get recommendations for a single user
recommendations = recommenders.recommend_uknn(ratings, my_customer_number, K=200, similarity_metric='cosine', N=10)
recommendations

INFO:root:computed nearest neighbours using cosine
  movie, recommendations in recommendations.items()]


Unnamed: 0,rating,movie
0,,Fireproof (2008)
1,4.5,Working Girl (1988)
2,4.41406,Howl's Moving Castle (Hauru no ugoku shiro) (2...
3,4.413406,12 Angry Men (1957)
4,4.300721,Léon: The Professional (a.k.a. The Professiona...
5,4.253711,Raging Bull (1980)
6,4.192425,Memento (2000)
7,4.180205,Once (2006)
8,4.165266,"Dark Knight, The (2008)"
9,4.162363,The Hateful Eight (2015)


In [7]:
# get recommendations for a single user
recommendations = recommenders.recommend_iknn(ratings, my_customer_number, K=100, similarity_metric='cosine')
recommendations

Unnamed: 0,rating,movie
0,4.747497,"Blind Side, The (2009)"
1,4.706659,Harry Potter and the Half-Blood Prince (2009)
2,4.682608,"Grand Budapest Hotel, The (2014)"
3,4.671003,Ratatouille (2007)
4,4.670487,Star Trek Into Darkness (2013)
5,4.663013,Captain America: The First Avenger (2011)
6,4.662324,Mad Max: Fury Road (2015)
7,4.641638,Dallas Buyers Club (2013)
8,4.58398,Hugo (2011)
9,4.570574,Harry Potter and the Deathly Hallows: Part 1 (...
