# Recommendation System

#### This program build a recommendation system and determine cosine similarity between movies and list top 10 movies similar to "Toy StoryI"

In [None]:
# Import libraies
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [None]:
# Load Movies data
metadata = pd.read_csv('dataset\movies_metadata.csv', low_memory=False)
#Print plot overviews of the first 5 movies.
print(metadata['overview'].head())
#Replace NaN values
metadata['overview'] = metadata['overview'].fillna('')

In [None]:
#Removing stopwords and defining tf-idf vectorizer
tfidf = TfidfVectorizer(stop_words='english')
#Applying tf-idf on the data
tfidf_matrix = tfidf.fit_transform(metadata['overview'])
#Shape of tfidf_matrix
print(tfidf_matrix.shape)
#Mapping from feature integer indices to feature name.
print(tfidf.get_feature_names()[5000:5010])

In [None]:
# Determining the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
print('cosine similiarity matrix shape:', cosine_sim.shape)
#Reverse map of indices to movie titles
indices = pd.Series(metadata.index, index=metadata['title']).drop_duplicates()

In [None]:
# Function that takes in movie title as input and outputs most similar movies
#Takes a movie title as input and output top 10 similar movies.
def get_recommendations(title, cosine_sim=cosine_sim):
    #Take index of movies matching the title
    idx = indices[title]
    # Pairwise similarity score of movies with input
    sim_scores = list(enumerate(cosine_sim[idx]))
    # Similarity based sorting
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    # Determine top 10 movies
    sim_scores = sim_scores[1:11]
    # Determining movie indices
    movie_indices = [i[0] for i in sim_scores]
    # Return the top 10 most similar movies
    return metadata['title'].iloc[movie_indices]
get_recommendations('Toy Story')