### Using the recommendation library

In [1]:
import os
os.chdir('..')

In [2]:
# Import all the packages we need to generate recommendations
import numpy as np
import pandas as pd
import src.utils as utils
import src.recommenders as recommenders
import src.similarity as similarity

# Enable logging on Jupyter notebook
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [3]:
# imports necesary for plotting
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline  

In [4]:
# loads the dataset (assumes the data is downloaded)
dataset_folder = os.getcwd()+'/data/'
my_ratings_file = dataset_folder+'/ratings_humberto.csv'
dataset_folder_ready = dataset_folder+ '/ml-latest-small'
[ratings, my_customer_number] = utils.merge_datasets(dataset_folder_ready, my_ratings_file)

INFO:root:loaded 12 personal ratings
INFO:root:loaded 9125 movies
INFO:root:loaded 100016 ratings in total


In [5]:
# the data is stored in a long pandas dataframe
# we need to pivot the data to create a [user x movie] matrix
ratings_matrix = ratings.pivot_table(index='customer', columns='movie', values='rating', fill_value=0)
ratings_matrix = ratings_matrix.transpose()

#### Understanding Movie Similarity
1. Try with different movies 
2. Try with different types of **similarity metrics** (look in /src/similarity.py)
3. Which **similarity metric** works the best? 

In [6]:
# find similar movies 
# try with different movie titles and see what happens 
movie_title = 'Star Wars: Episode VI - Return of the Jedi (1983)'
similarity_type = "intersection"
logger.info('top-10 movies similar to %s, using %s similarity', movie_title, similarity_type)
print(similarity.compute_nearest_neighbours(movie_title, ratings_matrix, similarity_type)[0:10])

INFO:root:top-10 movies similar to Star Wars: Episode VI - Return of the Jedi (1983), using intersection similarity


                                                   item  similarity
7490  Star Wars: Episode VI - Return of the Jedi (1983)         217
7488          Star Wars: Episode IV - A New Hope (1977)         187
7489  Star Wars: Episode V - The Empire Strikes Back...         169
2933                                Forrest Gump (1994)         148
5102                                 Matrix, The (1999)         144
6460  Raiders of the Lost Ark (Indiana Jones and the...         139
657                           Back to the Future (1985)         138
4307                               Jurassic Park (1993)         131
5164                   Men in Black (a.k.a. MIB) (1997)         127
8248                                   Toy Story (1995)         127


#### Creating recommendations for your personal ratings
1. Try with different **similarity metrics** (look in /src/similarity.py)
2. Try with different values of **K** (K is the number of neigbhours to consider when generating the recommendations)
3. Which combination of **K** and **number of metrics** works better?, discuss it with others.

In [7]:
# get recommendations for a single user
recommendations = recommenders.recommend_iknn(ratings, my_customer_number, K=50, similarity_metric='pearson')
recommendations

Unnamed: 0,rating,movie
0,5.0,We Don't Live Here Anymore (2004)
1,5.0,The Skeleton Twins (2014)
2,5.0,Keep the River on Your Right: A Modern Canniba...
3,5.0,Dragon Ball Z: Battle of Gods (2013)
4,5.0,À nous la liberté (Freedom for Us) (1931)
5,5.0,"Wild Angels, The (1966)"
6,5.0,Whiplash (2014)
7,5.0,Where the Wild Things Are (2009)
8,5.0,"Wedding Banquet, The (Xi yan) (1993)"
9,5.0,"Way, Way Back, The (2013)"
