In [1]:
#imports and installs - will run automatically when imported into the Final_Program.ipynb
!pip install nltk
import nltk #for language processing
nltk.download() #download all corpora and packages
import nltk #import the new downloads
from nltk.corpus import stopwords #import stopwords to remove from plot synopsis
from nltk.tokenize import word_tokenize #import tokenize function to break up the plot
import pandas as pd #for importing the csv into dataframes
import requests #for later
from IPython.display import display, HTML #for multiple displays and formatting

NLTK Downloader
---------------------------------------------------------------------------
    d) Download   l) List    u) Update   c) Config   h) Help   q) Quit
---------------------------------------------------------------------------
Downloader> q


In [2]:
def callimdblist(): #pull data from csv into dataframe
    '''Import data from local .csv into a dataframe'''
    imdb_top_250_english = "IMDB_Top250Engmovies2_OMDB_Detailed.csv" #rename for ease of calling
    data = pd.read_csv(imdb_top_250_english) #read .csv into a dataframe
    return data #return the dataframe



def create_table_250(): #create a refined table and cut down the plot synopses
    '''Picks certain columsn to reform a new dataframe.
    Tokenize the plots, remove stopwords, add to a new column.'''
    data = callimdblist() #call function to create the dataframe
    data = data[['Rank', 'Title', 'Year', 'Rated', 'Genre', 'Director', 'Actors', 'Plot']] #refine the dataset
    data['Tokens'] = '' #Initialize new column
    for index, row in data.iterrows(): #for each plot, tokenize and remove stopwords
        movie_plot = row['Plot'] #simplify the column to a colloqiual variable for easy calling
        tokenized_list = word_tokenize(movie_plot) #tokenize the movie plot
        list_without_punctuation = [word for word in tokenized_list if word.isalpha] #take out all punctuation from tokenized plot
        stop_words = set(stopwords.words('english')) #set the list of stopwords
        list_without_stopwords = [word for word in list_without_punctuation if not word in stop_words] #removes all stopwords from tokenized list
        data.at[index, 'Tokens'] = list_without_stopwords #adds the tokenized list without stopwords to correct row, column
    return data #return dataframe
    
    
    
def search_params(data): #user selects search category and term - passes those to search function
    '''Requires a dataframe as an input. Asks user for search category and passes those to search_func'''
    while True: #loop until appropriate input is received
        choice = input("Would you like to search by: plot, actor, director, or genre? ") #taking user category selection
        choice = str.lower(choice) #normalize user text
        if choice == "plot": #if user chooses plot
            column = "Tokens" #set search column to the tokenized plot
            term = input("Please enter a single word to search for: ") #take user search term
            search_func(term, column, data) #call search_func with given search term, category, and dataframe
            menu_route = continuation_func(data) #get user choice
            if menu_route == True: #if they want to go to menu, break -- otherwise re-loop
                break
        elif choice == "actor": #if user chooses actor
            column = "Actors" #set search column to actors
            term = input("Please enter a single name to search for: ") #take user search term
            search_func(term, column, data) #call search_func with given search term, category, and dataframe
            menu_route = continuation_func(data) #get user choice
            if menu_route == True: #if they want to go to menu, break -- otherwise re-loop
                break
        elif choice == "director": #if user chooses director
            column = "Director" #set search column to director
            term = input("Please enter a single name to search for: ") #take user search term
            search_func(term, column, data) #call search_func with given search term, category, and dataframe
            menu_route = continuation_func(data) #get user choice
            if menu_route == True: #if they want to go to menu, break -- otherwise re-loop
                break
        elif choice == "genre": #if user chooses genre
            column = "Genre" #set search column to genre
            term = input("Please enter a single word to search for: ") #take user search term
            term = str.capitalize(term) #normalize the input
            search_func(term, column, data) #call search_func with given search term, category, and dataframe
            menu_route = continuation_func(data) #get user choice
            if menu_route == True: #if they want to go to menu, break -- otherwise re-loop
                break
        else: #if user doesn't enter a valid selection
            print("\nYou did not make a valid selection. Please enter 'plot', 'actor', 'director', or 'genre'. \n") #error
    return #return to the menu

            
            
def search_func(search, column, table): #finds matching titles to search criteria
    '''Finds matching titles to search criteria. Requires search term, category, and dataframe as inputs. '''
    print("\nSearch Results: ") #print list header
    for index, row in table.iterrows(): #print titles of the films that match the search term in the plot
        if search in row[column]: #if there is a match then print the film title
            display(f"{row['Title']}")
    return #exit out of this function
                    

def continuation_func(data): #logic to see what the user wants to do next
    '''Requires a dataframe as input. Will query the user to see what they want to do next and execute the decision.'''
    while True: #enter loop
        print("Would you like to: ") #beginning of query
        print("    Search the top 250 again? Enter '1' to search again. ") #option 1 offered to user
        print("    Return to the main menu? Enter '2' to return. ") #option 2 offered to user
        choice = input() #take user input
        if choice == '1': #if user chooses option 1
            menu_route = False #they do not want to return to the menu
            break
        elif choice == '2': #if user chooses option 2
            menu_route = True #they DO want to return to the menu
            break
        else: #if they make an invalid selection
            print("Please make a valid selection. Choose '1' to search again, or '2' to return. ") #return them to pick again
    return menu_route #return selection of menu or not