In [14]:
#install needed packages for panada and widgets
%pip install ipywidgets
%pip install pandas

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [15]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import ipywidgets as widgets #widgets
from ipywidgets import Button, HBox, VBox, Layout #layout
from IPython.display import display #displays
import matplotlib.pyplot as plt

In [16]:
df= pd.read_csv('netfix_cleaned.csv') #import csv file aka the data
df.drop_duplicates(inplace=True)
df.to_csv('netfix_cleaned.csv',index=False)
#df # displays the DataFrame#Import TfIdfVectorizer from scikit-learn

In [17]:
def get_genre(movie_ind, g):
    new_list=[]
    for element in movie_ind:
       if g in df['genre'][element]:
           new_list.append(element)
    return new_list

In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer

#removes all english stop words
tfidf = TfidfVectorizer(stop_words='english')

#creates matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(df['description'])

#shape of tfidf_matrix
tfidf_matrix.shape

# Import linear_kernel
from sklearn.metrics.pairwise import linear_kernel

# cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

#reverse map of indices and movie titles
indices = pd.Series(df.index, index=df['names']).drop_duplicates()

#takes in movie title as input and outputs most similar movies
def get_recommendations(title, genre, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))
    #print(sim_scores)
    key_func= lambda x: x[1]

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=key_func)#, reverse=True)

    # Get the scores of the 30 most similar movies
    sim_scores = sim_scores[1:30]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]
    # Find what movies have the selected genre
    movie_indices= get_genre(movie_indices, genre)
    # Reduce list to the 5 most similar movies
    movie_indices=movie_indices[1:5]
    # Return the top 10 most similar movies
    return df['names'].iloc[movie_indices]

In [19]:
genres= df['genre'].tolist() #takes genres from the csv and puts them in a list

#takes list of genres from each movie and creates one list
genre_options = []
for inner_list in genres:
    new_element=inner_list.split(",")
    genre_options+=new_element

#takes list of all genres just created and finds just the unique values
unique_genre = []
for element in genre_options:
    if element not in unique_genre:
        unique_genre.append(element.lstrip(' '))

In [20]:
#creates dropdown widget of the unique genres listed in the data
genre_rb= widgets.Dropdown(
    value='Spy',
    options=unique_genre,
    layout= Layout(align_items= 'center', width='250px', vertical_align= 'middle'),
    disabled=False
)
# creates a list of movie titles
names= df['names'].tolist()
#combobox for movie titles, allows user to start typing a movie and select the title
movie_cb= widgets.Combobox(
    placeholder='Start typing for suggestions',
    options= names,
    description='Enter a movie name:',
    ensure_option=True,
    disabled=False,
    layout= Layout(display='flex', flex_flow='column', align_items= 'center',
                   width='250px', height='auto', 
                   padding= '10px', vertical_align= 'middle', border='hidden'),
    style={'description_width': 'initial'})

In [21]:
#Label widgets for title of app and genre widget
file = open("kadabra.png", "rb")
image = file.read()
title= widgets.Image(value=image, format='png', width=275, height=450)

title_genre= widgets.Label(value= 'Movie Genres', font_weight= 'bold',
                           layout=Layout(display='flex', flex_flow='column', align_items= 'center', 
                                         height='auto', width='250px', padding= '5px', vertical_align= 'middle'), background_color='#7b88db')

In [22]:
# empty label to create space, better presentation
empty_label_small= widgets.Label(value= '', font_weight= 'bold', layout=Layout(display='flex', flex_flow='column', align_items= 'center',
                                                                         width='5px', height='auto', padding= '2px', vertical_align= 'middle'))

empty_label_large= widgets.Label(value= '', font_weight= 'bold', layout=Layout(display='flex', flex_flow='column', align_items= 'center',
                                                                         width='5px', height='auto', padding= '40px', vertical_align= 'middle'))
#empty_title_box= widgets.Image(width=300, height=150)

In [23]:
# Function to handle the selection event
def on_movie_change(change):
    indices = pd.Series(df.index, index=df['names']).drop_duplicates()
    
    if change['type'] == 'change' and change['name'] == 'value':
        idx = indices[change['new']]
        summary.value= f"""{df['names'].iloc[idx]} 
{df['release_year'].iloc[idx]}   {df['maturity_rating'].iloc[idx]}   {df['duration'].iloc[idx]}

{df['description'].iloc[idx]}

Genre 
{df['genre'].iloc[idx]}

Cast 
{df['cast'].iloc[idx]}

Subtitles
{df['subtitles'].iloc[idx]} 

Audio
{df['audio'].iloc[idx]}"""
        

In [24]:
style = {'description_width': 'initial'}
s_movie=widgets.Text(value='', description='Selected Movie:',style=style, disabled=True)
s_genre=widgets.Text(value='', description='Selected Genre:',style=style, disabled=True)
out=widgets.Select(options=[], description='Movie Suggestions:',rows=5,style=style, disabled=False, background_color='pink')
title_summary= widgets.Label(value= 'Movie Information', font_weight= 'bold',
                             layout=Layout(display='flex', flex_flow='column', align_items= 'center',
                                           height='auto', width='auto', padding= '1px', vertical_align= 'middle'))
title_summary_select= widgets.Label(value= 'Select a Movie to Show More Information', font_weight= 'bold',
                             layout=Layout(display='flex', flex_flow='column', align_items= 'center',
                                           height='auto', width='auto', padding= '1px', vertical_align= 'middle'))
summary=widgets.Textarea(value='', style=style, disabled=True, border='solid 2px', rows=10, height='auto', width='250px', padding= '5px')

# Attach the event handler to the recommended movies
out.observe(on_movie_change, names='value')

In [25]:
def update_button():
    #values from dropdown and combobox are gathered
    g= genre_rb.value
    movie_selection=movie_cb.value
    #uses movie selection to get movie recommendations
    x= get_recommendations(movie_selection,g)
    
    #output
    if len(x)==0:
        out.options=['No movies match your search', 'Try Again!']        
    else:
        out.options= x
    s_movie.value=movie_selection
    s_genre.value=g

In [26]:
# button to run recommender
button=widgets.Button(description='Find A Movie', border='solid 2px', height='auto', width='250px', padding= '5px')

# action when button is presed
def on_button_clicked(b):
    update_button()

button.on_click(on_button_clicked)# widget box for radio buttons

# widget box for combo box and button
left_box= widgets.VBox([title, s_movie, s_genre, out,title_summary_select])
center_box= widgets.VBox([empty_label_small])
right_box= widgets.VBox([movie_cb,empty_label_small,title_genre, genre_rb, 
                         empty_label_small, button, empty_label_small,empty_label_small, title_summary, summary])
left_box.layout.align_items = 'center'
right_box.layout.align_items = 'center'
# widget box to hold other boxes
widgets.Box([left_box, center_box,center_box,center_box, right_box])


Box(children=(VBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01/\x00\x00\x00\xfa\x0…