In [1]:
import webbrowser
import pandas as pd
import random
from IPython.display import clear_output

In [2]:
# data cleaning
df = pd.read_csv("movies_streaming.csv") # read csv file
df.drop(df.columns[[0,1,4,6,7,8,9,10,11]], axis = 1, inplace = True) # drop unnecessary columns
df.dropna(inplace=True) # drop NaN to avoid errors
df['Runtime'] = df['Runtime'].astype(int) # convert float to int
df.drop(df[df['IMDb'] < 6.0].index, inplace=True) # drop rows with IMDb ratings less than 6
df.drop(df[df['Year'] < 1980].index, inplace=True) # drop rows with movie year before 1980

In [3]:
# find out the number of available genres by separating the [Genres] column
df_separated_genres = df.copy() # create a new df copy

new = df["Genres"].str.split(",", n = 1, expand = True) # new data frame with split value columns
df_separated_genres["Extracted_Genre"] = new[0] # making a new [Extracted_Genre] column in new df
#df_separated_genres.drop(columns =["Genres"], inplace = True) # Dropping old [Genres] column 

df_separated_genres.sample(n=3) # df display 

Unnamed: 0,Title,Year,IMDb,Directors,Genres,Country,Language,Runtime,Extracted_Genre
1901,Düğün Dernek 2: Sünnet,2015,6.4,Selçuk Aydemir,Comedy,Turkey,Turkish,112,Comedy
841,My Big Night,2015,6.0,Álex de la Iglesia,Comedy,Spain,Spanish,100,Comedy
4824,Faults,2014,6.7,Riley Stearns,"Comedy,Crime,Drama",United States,English,89,Comedy


In [4]:
# as each movie can have multiple genres, split the [Genres] column to form a list
df_listed_genre = df.copy()# create a new df copy

df_listed_genre["Genres"] = df_listed_genre["Genres"].str.split(",", n = -1, expand = False) # new df with split genre

df_listed_genre.sample(n=3) # df display 

Unnamed: 0,Title,Year,IMDb,Directors,Genres,Country,Language,Runtime
1766,Naam Shabana,2017,6.3,Shivam Nair,"[Action, Thriller]",India,Hindi,147
2682,A 3 Minute Hug,2018,6.4,Everardo González,"[Documentary, Short]","Mexico,United States",Spanish,28
4476,Temple Grandin,2010,8.3,Mick Jackson,"[Biography, Drama]",United States,English,107


In [5]:
# from here, we can create the user interaction
# STAGE 1
genre_list = df_separated_genres['Extracted_Genre'].unique().tolist() # Returns list of unique genre names

print('Select one of the following genres:\n')
for x in genre_list:
    print(x)
user_selected_genre = input("Which genre are you in the mood to watch today?: ").title()

Select one of the following genres:

Action
Adventure
Animation
Biography
Drama
Comedy
Crime
Documentary
Horror
Thriller
Mystery
Fantasy
Romance
Short
Sport
Musical
Music
Family
Reality-TV
Sci-Fi
War
History
Western
Which genre are you in the mood to watch today?: Comedy


In [6]:
# STAGE 2
genre_match = False

while genre_match == False:
    
    random_number = random.randint(0, len(df_listed_genre)) # generate random index no.
    random_movie = df_listed_genre.iloc[random_number] # get hold of a random movie
    random_movie_genre = random_movie.Genres # get the genres of that random movie 
    
    if user_selected_genre in random_movie_genre:
        print(f'Hooray! This is your randomly selected movie:\n\nTitle: {random_movie.Title}\nYear: {random_movie.Year}\nIMDb Rating: {random_movie.IMDb}\nGenre: {random_movie.Genres}\nDirector: {random_movie.Directors}\nLanguage: {random_movie.Language}\nCountry: {random_movie.Country}\nRuntime: {random_movie.Runtime} minutes\n')
        user_selection = input('Would you like to proceed with this? (Yes / No): ').title()
        
        if user_selection == 'Yes':
            genre_match = True
            #webbrowser.open(f"https://www.justwatch.com/us/search?q={random_movie.Title} {random_movie.Year} {random_movie.Directors}")
            webbrowser.open(f"https://www.justwatch.com/us/search?q={random_movie.Title}")
        else:
            clear_output(wait=False)


Hooray! This is your randomly selected movie:

Title: Bolt
Year: 2008
IMDb Rating: 6.8
Genre: ['Animation', 'Adventure', 'Comedy', 'Drama', 'Family']
Director: Byron Howard,Chris Williams
Language: English
Country: United States
Runtime: 96 minutes

Would you like to proceed with this? (Yes / No): yes
