# Movie Search Engine with Okapi BM25 Ranking

In [1]:
# import required libraries
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# !!pip install rank_bm25
from rank_bm25 import BM25Okapi
import sqlite3


In [2]:
# Load movie dataset
conn = sqlite3.connect("datasets/netflix_titles.db")
df = pd.read_sql("select * from netflix_titles", conn)
storys = df.copy()

In [3]:
storys

Unnamed: 0,index,title,actors,release_year,rating,description,duration,ranking
0,0,Dick Johnson Is Dead,empty,2020,PG-13,"As her father nears the end of his life, filmm...",90 min,0.0
1,1,Blood & Water,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",2021,TV-MA,"After crossing paths at a party, a Cape Town t...",2 Seasons,0.0
2,2,Ganglands,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",2021,TV-MA,To protect his family from a powerful drug lor...,1 Season,0.0
3,3,Jailbirds New Orleans,empty,2021,TV-MA,"Feuds, flirtations and toilet talk go down amo...",1 Season,0.0
4,4,Kota Factory,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",2021,TV-MA,In a city of coaching centers known to train I...,2 Seasons,0.0
...,...,...,...,...,...,...,...,...
8802,8802,Zodiac,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",2007,R,"A political cartoonist, a crime reporter and a...",158 min,0.0
8803,8803,Zombie Dumb,empty,2018,TV-Y7,"While living alone in a spooky town, a young g...",2 Seasons,0.0
8804,8804,Zombieland,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",2009,R,Looking to survive in a world taken over by zo...,88 min,0.0
8805,8805,Zoom,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",2006,PG,"Dragged from civilian life, a former superhero...",88 min,0.0


In [22]:
# Preprocess data
df['title'] = (df['title']+' '+ df['description']+' '+ df['release_year'].astype(str)).str.lower()
df['title'] = df['title'].apply(word_tokenize)
stop_words = set(stopwords.words('english'))
df['title'] = df['title'].apply(lambda x: [word for word in x if word not in stop_words])

In [4]:
df['title']

0        Dick Johnson Is Dead
1               Blood & Water
2                   Ganglands
3       Jailbirds New Orleans
4                Kota Factory
                ...          
8802                   Zodiac
8803              Zombie Dumb
8804               Zombieland
8805                     Zoom
8806                   Zubaan
Name: title, Length: 8807, dtype: object

In [23]:
# Create and build BM25 Model 
bm25 = BM25Okapi(df['title'].tolist())

In [24]:
# save the model to disk
import pickle
filename = 'bm25_model.sav'
pickle.dump(bm25, open(filename, 'wb'))

In [25]:
# load the model from disk
bm25 = pickle.load(open(filename, 'rb'))

In [26]:
def search(query):
    tokenized_query = word_tokenize(query.lower())
    doc_scores = bm25.get_scores(tokenized_query)
    # return doc_scores
    storys['scores'] = doc_scores
    return storys.nlargest(5, 'scores')

In [27]:
search("zombie")

Unnamed: 0,index,title,actors,release_year,rating,description,duration,ranking,scores
8803,8803,Zombie Dumb,empty,2018,TV-Y7,"While living alone in a spooky town, a young g...",2 Seasons,0.0,9.277093
7871,7871,Rise of the Zombie,"Luke Kenny, Kirti Kulhari, Ashwin Mushran, Ben...",2013,TV-MA,A heartbroken wildlife photographer throws him...,83 min,0.0,7.283041
390,390,The Walking Dead,"Andrew Lincoln, Steven Yeun, Norman Reedus, Ch...",2019,TV-MA,"In the wake of a zombie apocalypse, survivors ...",10 Seasons,0.0,6.766935
6961,6961,Here Alone,"Lucy Walters, Gina Piersanti, Adam David Thomp...",2016,TV-MA,When she finally encounters two other survivor...,97 min,0.0,6.61078
7216,7216,KL Zombi,"Zizan Razak, Siti Saleha, Zain Hamid, Izara Ai...",2013,TV-14,A field hockey-playing pizza delivery boy find...,92 min,0.0,6.61078
