In [1]:
import webbrowser # enables transition from python to webbrowser
import pandas as pd
import random
from IPython.display import clear_output # enables clearing of output

In [2]:
# data cleaning

df = pd.read_csv("movies_streaming.csv") # read csv file
df.drop(df.columns[[0,1,4,6,7,8,9,10,11]], axis = 1, inplace = True) # drop unnecessary columns
df.dropna(inplace=True) # drop NaN to avoid errors
df['Runtime'] = df['Runtime'].astype(int) # convert float to int
df.drop(df[df['IMDb'] < 6.0].index, inplace=True) # drop rows with IMDb ratings less than 6
df.drop(df[df['Year'] < 1980].index, inplace=True) # drop rows with movie year before 1980

In [3]:
# find out the number of available genres by isolating the first genre in the [Genres] column

df_isolated_genre = df.copy() # create a new df copy

new = df["Genres"].str.split(",", n = 1, expand = True) # new df with split value columns
df_isolated_genre["Extracted_Genre"] = new[0] # making a new [Extracted_Genre] column in new df
df_isolated_genre.drop(columns =["Genres"], inplace = True) # Dropping old [Genres] column 

df_isolated_genre.sample(n=3) # df display 

Unnamed: 0,Title,Year,IMDb,Directors,Country,Language,Runtime,Extracted_Genre
4707,Magic Mike,2012,6.1,Steven Soderbergh,United States,English,110,Comedy
16218,Up,2009,8.2,"Pete Docter,Bob Peterson",United States,English,96,Animation
12562,Zedd: True Colors,2016,7.2,"Susan Bonds,Alex Lieu",United States,English,88,Documentary


In [4]:
# as each movie can have multiple genres, split the original [Genres] column to form a list

df_listed_genre = df.copy()# create a new df copy

df_listed_genre["Genres"] = df_listed_genre["Genres"].str.split(",", n = -1, expand = False) # split [Genres] column

df_listed_genre.sample(n=3) # df display 

Unnamed: 0,Title,Year,IMDb,Directors,Genres,Country,Language,Runtime
443,Mississippi Grind,2015,6.4,"Anna Boden,Ryan Fleck","[Comedy, Drama]",United States,English,108
8409,Art Show Bingo,2017,6.1,Matthew Fine,[Romance],United States,English,90
4262,Eve's Christmas,2004,6.0,Timothy Bond,"[Comedy, Family]","Canada,United States",English,96


In [5]:
# from here, we can create the user interaction
# STAGE 1 = user choose a genre

genre_list = df_isolated_genre['Extracted_Genre'].unique().tolist() # Returns list of unique genre names for user to select

print('Select one of the following genres:\n')
for x in genre_list:
    print(x)
user_selected_genre = input("Which genre are you in the mood to watch today?: ").title()

Select one of the following genres:

Action
Adventure
Animation
Biography
Drama
Comedy
Crime
Documentary
Horror
Thriller
Mystery
Fantasy
Romance
Short
Sport
Musical
Music
Family
Reality-TV
Sci-Fi
War
History
Western
Which genre are you in the mood to watch today?: comedy


In [6]:
# STAGE 2 = finding a match

genre_match = False

while genre_match == False:
    
    random_number = random.randint(0, len(df_listed_genre)) # generate random index no.
    random_movie = df_listed_genre.iloc[random_number] # get hold of a random movie and all its data
    random_movie_genre = random_movie.Genres # get the genres of that random movie 
    
    if user_selected_genre in random_movie_genre:
        print(f'Hooray! This is your randomly selected movie:\n\nTitle: {random_movie.Title}\nYear: {random_movie.Year}\nIMDb Rating: {random_movie.IMDb}\nGenre: {random_movie.Genres}\nDirector: {random_movie.Directors}\nLanguage: {random_movie.Language}\nCountry: {random_movie.Country}\nRuntime: {random_movie.Runtime} minutes\n')
        user_selection = input('Would you like to proceed with this? (Yes / No): ').title()
        
        if user_selection == 'Yes':
            genre_match = True
            webbrowser.open(f"https://www.justwatch.com/us/search?q={random_movie.Title}")
            # JustWatch website will be opened showing the result when user says "Yes"
        else:
            clear_output(wait=False) # old output will be cleared and new output will be showed


Hooray! This is your randomly selected movie:

Title: Hank and Asha
Year: 2014
IMDb Rating: 6.5
Genre: ['Comedy', 'Romance']
Director: James E. Duff
Language: English
Country: United States,Czech Republic
Runtime: 73 minutes

Would you like to proceed with this? (Yes / No): yes
