# Movie Recommendation using SentenceBERT

In [1]:
#Imports
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

In [2]:
#Load Dataset
movies = pd.read_csv("movies_metadata.csv", usecols = [5,9,20], nrows = 5000)
movies.head()

Unnamed: 0,id,overview,title
0,862,"Led by Woody, Andy's toys live happily in his ...",Toy Story
1,8844,When siblings Judy and Peter discover an encha...,Jumanji
2,15602,A family wedding reignites the ancient feud be...,Grumpier Old Men
3,31357,"Cheated on, mistreated and stepped on, the wom...",Waiting to Exhale
4,11862,Just when George Banks has recovered from his ...,Father of the Bride Part II


In [3]:
#See if any null values in dataset
movies[movies.isnull().any(axis=1)]

Unnamed: 0,id,overview,title
32,78802,,Wings of Courage
300,161495,,Roommates
634,287305,,Peanuts – Die Bank zahlt alles
635,339428,,Happy Weekend
641,10801,,The Superwife
644,278978,,Und keiner weint mir nach
679,117730,,Under The Domim Tree
792,281085,,A Boy Called Hate
802,282919,,Diebinnen
821,48144,,The Day the Sun Turned Cold


In [4]:
#Assign indexes
movies['index'] = [i for i in range(0, len(movies))]
#Drop null values
movies = movies.dropna()
movies.head()

Unnamed: 0,id,overview,title,index
0,862,"Led by Woody, Andy's toys live happily in his ...",Toy Story,0
1,8844,When siblings Judy and Peter discover an encha...,Jumanji,1
2,15602,A family wedding reignites the ancient feud be...,Grumpier Old Men,2
3,31357,"Cheated on, mistreated and stepped on, the wom...",Waiting to Exhale,3
4,11862,Just when George Banks has recovered from his ...,Father of the Bride Part II,4


In [5]:
# Helper Functions

#Get title of movie
def get_title(index):
    return movies[movies.index == index]["title"].values[0]

#Get index of movie
def get_index(title):
    return movies[movies.title == title]["index"].values[0]

In [6]:
#Load pre-trained SentenceBERT model
bert = SentenceTransformer('bert-base-nli-mean-tokens')

#Get Embeddings for movie overviews
sentence_embeddings = bert.encode(movies['overview'].tolist())

#Compute similarity between movie overviews
similarity = cosine_similarity(sentence_embeddings)

In [8]:
#User Input
notOver = True
while(notOver):
    user_movie = input("Enter the movie for which you want recommendations: ")

# Generate Recommendations
    recommendations = sorted(list(enumerate(similarity[get_index(user_movie)])), key = lambda x:x[1], reverse = True)
    print("The top 3 recommendations for" + " " + user_movie + " " + "are: ")
    print(get_title(recommendations[1][0]), get_title(recommendations[2][0]), get_title(recommendations[3][0]), sep = "\n")
    decision = input("Press 1 to enter another movie, 0 to exit")
    if(int(decision) == 0):
        print("Bye")
        notOver = False 

Enter the movie for which you want recommendations: Toy Story
The top 3 recommendations for Toy Story are: 
Candleshoe
Scenes from the Class Struggle in Beverly Hills
Snow White and the Seven Dwarfs
Press 1 to enter another movie, 0 to exit0
Bye
