# Portfolio Project - Build a Movie Recommendation System in Python

### Reading in Our Movie Data in Pandas

In [1]:
import pandas as pd

In [2]:
movies = pd.read_csv("movies.csv")

You can download the .csv file from [here](https://files.grouplens.org/datasets/movielens/ml-25m.zip)

In [3]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


### Cleaning Movie Titles Using Regex

In [4]:
import re

In [5]:
# This function takes in a title and returns the cleaned title. It should remove any character that isn't a letter, digit, or a space.
def cleaning_title(title):

    return re.sub(r"[^a-zA-Z0-9\s]*", "", title)

In [6]:
movies["clean_title"] = movies["title"].apply(cleaning_title)

In [7]:
movies.head()

Unnamed: 0,movieId,title,genres,clean_title
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,Jumanji 1995
2,3,Grumpier Old Men (1995),Comedy|Romance,Grumpier Old Men 1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,Waiting to Exhale 1995
4,5,Father of the Bride Part II (1995),Comedy,Father of the Bride Part II 1995


### Creating a TFIDF Matrix

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [9]:
vectorizer = TfidfVectorizer(ngram_range=(1, 2))

In [10]:
tfidf_matrix = vectorizer.fit_transform(movies["clean_title"])

### Creating a Search Function

In [11]:
import numpy as np

In [12]:
from sklearn.metrics.pairwise import cosine_similarity

In [13]:
def search(term):
    
    cleaned_pattern = cleaning_title(term)

    pattern_vector = vectorizer.transform([cleaned_pattern]) 

    similarities = cosine_similarity(pattern_vector, tfidf_matrix)

    top5 = np.argsort(similarities[0])[-5:][::-1]
    
    return movies.iloc[top5, 1]

In [28]:
toy_story = search("Toy Story") # Testing the function

In [15]:
toy_story

14813            Toy Story 3 (2010)
3021             Toy Story 2 (1999)
0                  Toy Story (1995)
59767            Toy Story 4 (2019)
20497    Toy Story of Terror (2013)
Name: title, dtype: object

### Building an Interactive Search Box in Jupyter

In [16]:
import ipywidgets as widgets
from IPython.display import display

In [17]:
input_widget = widgets.Text(placeholder="Please type the title here")
search_button = widgets.Button(description="Search")
output_widget = widgets.HTML()

In [24]:
def on_search_clicked(e):

    recommendation = search(input_widget.value)

    result = "<ul>"
    for title in recommendation:
        result += f"<li>{title} </li>"
    result += "</ul>"
    
    output_widget.value = result

In [25]:
search_button.on_click(on_search_clicked)

#### The search function in action

Note: It usable in realtime - feel free to try it

In [29]:
display(input_widget, search_button, output_widget)

Text(value='Matrix', placeholder='Please type the title here')

Button(description='Search', style=ButtonStyle())

HTML(value='<ul><li>Matrix, The (1999) </li><li>The Living Matrix (2009) </li><li>Matrix of Evil (2003) </li><…