# Installing packages

In [1]:
# After comparing differen models - we saw the best results with BERtopic
!pip install bertopic

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bertopic
  Downloading bertopic-0.14.1-py2.py3-none-any.whl (120 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m120.7/120.7 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Collecting hdbscan>=0.8.29 (from bertopic)
  Downloading hdbscan-0.8.29.tar.gz (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m40.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting umap-learn>=0.5.0 (from bertopic)
  Downloading umap-learn-0.5.3.tar.gz (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.2/88.2 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sentence-transformers>=0.4.1 (from

In [2]:
# Packages
from bertopic import BERTopic
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from tabulate import tabulate
import joblib
import random
import urllib.parse


In [3]:
# Connect to drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Open movies datset after EDA

In [4]:
# Read preprocessed dataset - taking out stopwords, normalizing all the words and numbers
path = "/content/drive/MyDrive/Colab Notebooks/Movie recommender/Datasets/data_movies_complete.csv"
movies_df = pd.read_csv(path)
movies_df

Unnamed: 0.1,Unnamed: 0,title,year,runtime,genre,rating,synopsis,director,votes,cast1,cast2
0,0,The Dark Knight,2008,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,Christopher Nolan,2669470,Christian Bale,Heath Ledger
1,1,The Lord of the Rings: The Return of the King,2003,201 min,"Action, Adventure, Drama",9.0,Gandalf and Aragorn lead the World of Men agai...,Peter Jackson,1856911,Elijah Wood,Viggo Mortensen
2,2,Inception,2010,148 min,"Action, Adventure, Sci-Fi",8.8,A thief who steals corporate secrets through t...,Christopher Nolan,2368139,Leonardo DiCaprio,Joseph Gordon-Levitt
3,3,The Lord of the Rings: The Fellowship of the Ring,2001,178 min,"Action, Adventure, Drama",8.8,A meek Hobbit from the Shire and eight compani...,Peter Jackson,1886353,Elijah Wood,Ian McKellen
4,4,The Lord of the Rings: The Two Towers,2002,179 min,"Action, Adventure, Drama",8.8,While Frodo and Sam edge closer to Mordor with...,Peter Jackson,1676766,Elijah Wood,Ian McKellen
...,...,...,...,...,...,...,...,...,...,...,...
10062,10062,Dudley Do-Right,1999,77 min,"Comedy, Family, Romance",3.9,Inept Canadian mountie Dudley Do-Right chases ...,Hugh Wilson,10928,Brendan Fraser,Sarah Jessica Parker
10063,10063,Tubelight,2017,136 min,"Drama, War",3.9,A story of two brothers set during the Sino-In...,Kabir Khan,20743,Salman Khan,Sohail Khan
10064,10064,The Disappointments Room,2016,91 min,"Drama, Horror, Thriller",3.9,A mother and her young son release unimaginabl...,D.J. Caruso,10081,Kate Beckinsale,Mel Raido
10065,10065,Material Girls,2006,98 min,"Comedy, Family, Romance",3.9,"Two wealthy sisters, both heiresses to their f...",Martha Coolidge,22415,Hilary Duff,Haylie Duff


# Importing model with joblib

In [5]:
loaded_model = joblib.load('/content/drive/MyDrive/Colab Notebooks/Movie recommender/final_model.joblib')

In [6]:
# create df/list with topics of pre-processed synopsis
topics_list = loaded_model.topics_

# Merge the two dataframes 

---



In [7]:
# Assign the list as a new column in the DataFrame
movies_df['topics'] = topics_list
# Drop Unnamed column
movies_df = movies_df.drop(["Unnamed: 0"], axis=1)
movies_df

Unnamed: 0,title,year,runtime,genre,rating,synopsis,director,votes,cast1,cast2,topics
0,The Dark Knight,2008,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,Christopher Nolan,2669470,Christian Bale,Heath Ledger,79
1,The Lord of the Rings: The Return of the King,2003,201 min,"Action, Adventure, Drama",9.0,Gandalf and Aragorn lead the World of Men agai...,Peter Jackson,1856911,Elijah Wood,Viggo Mortensen,-1
2,Inception,2010,148 min,"Action, Adventure, Sci-Fi",8.8,A thief who steals corporate secrets through t...,Christopher Nolan,2368139,Leonardo DiCaprio,Joseph Gordon-Levitt,22
3,The Lord of the Rings: The Fellowship of the Ring,2001,178 min,"Action, Adventure, Drama",8.8,A meek Hobbit from the Shire and eight compani...,Peter Jackson,1886353,Elijah Wood,Ian McKellen,-1
4,The Lord of the Rings: The Two Towers,2002,179 min,"Action, Adventure, Drama",8.8,While Frodo and Sam edge closer to Mordor with...,Peter Jackson,1676766,Elijah Wood,Ian McKellen,-1
...,...,...,...,...,...,...,...,...,...,...,...
10062,Dudley Do-Right,1999,77 min,"Comedy, Family, Romance",3.9,Inept Canadian mountie Dudley Do-Right chases ...,Hugh Wilson,10928,Brendan Fraser,Sarah Jessica Parker,152
10063,Tubelight,2017,136 min,"Drama, War",3.9,A story of two brothers set during the Sino-In...,Kabir Khan,20743,Salman Khan,Sohail Khan,-1
10064,The Disappointments Room,2016,91 min,"Drama, Horror, Thriller",3.9,A mother and her young son release unimaginabl...,D.J. Caruso,10081,Kate Beckinsale,Mel Raido,170
10065,Material Girls,2006,98 min,"Comedy, Family, Romance",3.9,"Two wealthy sisters, both heiresses to their f...",Martha Coolidge,22415,Hilary Duff,Haylie Duff,-1


In [8]:
############################
# Save these new movies_df #
############################
# Specify the file path where you want to save the DataFrame
#file_path = '/content/drive/MyDrive/Colab Notebooks/Movie recommender/Datasets/topic_movies.csv'

# Save the DataFrame as a CSV file
#movies_df.to_csv(file_path, index=False)

# Our function :)

In [9]:
def movie_recommendation(user_input, loaded_model, movies_cluster_df):
    # Find topics
    num_of_topics = 3
    similar_topics, similarity = loaded_model.find_topics(user_input, top_n=num_of_topics)
    
    # Store the selected movies
    selected_movies = []
    
    # Check if similar_topics is in movies_cluster_df['topics']
    for i in range(len(similar_topics)):
        if similar_topics[i] in movies_df['topics'].values:
            # Filter the dataframe based on the similar topic
            filtered_df = movies_df[movies_df['topics'] == similar_topics[i]]
            
            # Randomly choose a row
            random_row = random.choice(filtered_df.index)
            
            # Select the desired columns from the random row
            selected_movie = filtered_df.loc[random_row, ['title', 'year', 'runtime', 'genre', 'rating', 'synopsis', 'director', 'votes', 'cast1', 'cast2']]
            
            # Add the selected movie to the list
            selected_movies.append(selected_movie)
    
    # Print the selected movies in a tabular format
    headers = selected_movies[0].index.tolist()
    rows = [movie.tolist() for movie in selected_movies]
    table = tabulate(rows, headers=headers, tablefmt="pretty")
    print(table)
    
    # Return the list of selected movies
    return selected_movies

In [10]:
user_input = input("Enter your input: ")
recommendation = movie_recommendation(user_input, loaded_model, movies_df)
#print("Suggested Movie:")
#print(recommendation)

Enter your input: classic musical broadway
+--------------------+------+---------+-----------------------+--------+-------------------------------------------------------------------------------------------------------------------------------------+----------------+-------+------------------+--------------------+
|       title        | year | runtime |         genre         | rating |                                                              synopsis                                                               |    director    | votes |      cast1       |       cast2        |
+--------------------+------+---------+-----------------------+--------+-------------------------------------------------------------------------------------------------------------------------------------+----------------+-------+------------------+--------------------+
|      Beaches       | 1988 | 123 min | Comedy, Drama, Music  |  6.9   | A privileged rich debutante and a cynical struggling entertainer sha

# Movie recommendation2 with youtube link

In [2]:
def movie_recommendation2(user_input, loaded_model, movies_cluster_df):
    # Find topics
    num_of_topics = 3
    similar_topics, similarity = loaded_model.find_topics(user_input, top_n=num_of_topics)
    
    # Store the selected movies
    selected_movies = []
    
    # Check if similar_topics is in movies_cluster_df['topics']
    for i in range(len(similar_topics)):
        if similar_topics[i] in movies_df['topics'].values:
            # Filter the dataframe based on the similar topic
            filtered_df = movies_df[movies_df['topics'] == similar_topics[i]]
            
            # Randomly choose a row
            random_row = random.choice(filtered_df.index)
            
            # Select the desired columns from the random row
            selected_movie = filtered_df.loc[random_row, ['title', 'year', 'runtime', 'genre', 'rating', 'synopsis', 'director', 'votes', 'cast1', 'cast2']]
            
            # Add the selected movie to the list
            selected_movies.append(selected_movie)
    
    # Print the selected movies in a tabular format
    headers = selected_movies[0].index.tolist()
    rows = [movie.tolist() for movie in selected_movies]
    table = tabulate(rows, headers=headers, tablefmt="pretty")
    print(table)

    # Generate YouTube query links for each selected movie
    for movie in selected_movies:
      movie_title = movie['title']
      youtube_query = urllib.parse.quote(movie_title)
      youtube_link = f"https://www.youtube.com/results?search_query={youtube_query}"
      print(f"Movie Title: {movie_title}")
      print(f"YouTube Query Link: {youtube_link}")
      print()
    
    # Return the list of selected movies
    return selected_movies

In [None]:
user_input = input("Enter your input: ")
recommendation = movie_recommendation2(user_input, loaded_model, movies_df)
#print("Suggested Movie:")
#print(recommendation)