In [2]:
# Import libraries
import pandas as pd
import re
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
movies = pd.read_csv("C:/Users/anhng/Documents/Masters/630/movies.csv")
ratings = pd.read_csv("C:/Users/anhng/Documents/Masters/630/ratings.csv")
tags = pd.read_csv("C:/Users/anhng/Documents/Masters/630/tags.csv")

# Create a function to clean the dataframes and compute similarity matrix
def preprocess_data(movies, ratings, tags):
    # Compute TF-IDF matrix for genres
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(movies['genres'].fillna(''))
    genre_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)
    
    # Compute average ratings for each movie
    avg_ratings = ratings.groupby('movieId')['rating'].mean()
    movies = movies.merge(avg_ratings, on='movieId', how='left')
    movies['rating'] = movies['rating'].fillna(movies['rating'].mean())
    
    # Combine all tags for each movie intro a single string
    tag_data = tags.groupby('movieId')['tag'].apply(lambda x: ' '.join(x)).reset_index()
    movies = movies.merge(tag_data, on='movieId', how='left')
    movies['tag'] = movies['tag'].fillna('')
    
    # Compute TF-IDF matrix for tags
    tag_tfidf = TfidfVectorizer(stop_words='english')
    tag_matrix = tag_tfidf.fit_transform(movies['tag'])
    tag_similarity = cosine_similarity(tag_matrix, tag_matrix)
    
    # Compute similarity matrix as the average of genre and tag similarities 
    final_similarity = (genre_similarity + tag_similarity) / 2
    return movies, final_similarity

# Create a function to clean movie titles
def clean_title(title):
    return re.sub(r'\s*\(\d{4}\)$', '', title).strip().lower()

# Create a function to retrieve recommendations based on similarity scores
def get_recommendations(user_input, movies, similarity_matrix):
    cleaned_title = clean_title(user_input)
    movies['cleaned_title'] = movies['title'].apply(clean_title)
    
    # Find the index of the user's input
    idx = movies[movies['cleaned_title'] == cleaned_title].index
    if len(idx) == 0:
        print(f"Sorry, '{user_input}' is not in our list of movies. Therefore, we cannot provide a recommendation based on this. Please try another movie:")
        return pd.DataFrame()
    idx = idx[0]
    
    # Compute similarity scores for the user's input
    sim_scores = list(enumerate(similarity_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = [s for s in sim_scores if s[0] != idx] 
    sim_scores = sim_scores[:10]
    movie_indices = [i[0] for i in sim_scores]
    return movies.iloc[movie_indices][['title']]

# Compute similarity matrix
movies, similarity_matrix = preprocess_data(movies, ratings, tags)

In [3]:
# Create a loop to request user to input movie titles and provide recommendations
first_input = True

while True:
    if first_input:
        user_input = input("Hello! Please input a movie you love: ")
        first_input = False
    else:
        user_input = input("Please input another movie you love: ")

    recommendations = get_recommendations(user_input, movies, similarity_matrix)

    while True:
        # Print recommendations on top 10 movies for user to watch next
        if not recommendations.empty:
            print(f"Because you love {user_input}, we recommend these movies as your next watch!")
            for i, row in enumerate(recommendations.itertuples(), 1):
                print(f"{i}. {row.title}")
        else:
            break
        
        # Ask the user for next action
        while True:
            user_choice = input("Would you like to (1) input another movie or (2) exit? Please enter 1 or 2: ")
            if user_choice == '1':
                recommendations = pd.DataFrame() 
                break  
            elif user_choice == '2':
                print("Thank you for using our movie recommendation system! Have a great day!")
                recommendations = pd.DataFrame() 
                break 
            else:
                print("Invalid input. Please enter 1 or 2.")
        
        if user_choice == '2':
            break
   
    if user_choice == '2':
        break

Hello! Please input a movie you love:  Shrek


Because you love Shrek, we recommend these movies as your next watch!
1. Into the Woods (1991)
2. Gnomeo & Juliet (2011)
3. Puss in Boots (Nagagutsu o haita neko) (1969)
4. Toy Story (1995)
5. Antz (1998)
6. Toy Story 2 (1999)
7. Adventures of Rocky and Bullwinkle, The (2000)
8. Emperor's New Groove, The (2000)
9. Monsters, Inc. (2001)
10. Wild, The (2006)


Would you like to (1) input another movie or (2) exit? Please enter 1 or 2:  Toy Story


Invalid input. Please enter 1 or 2.


Would you like to (1) input another movie or (2) exit? Please enter 1 or 2:  1
Please input another movie you love:  Toy Story


Because you love Toy Story, we recommend these movies as your next watch!
1. Bug's Life, A (1998)
2. Toy Story 2 (1999)
3. The Lego Movie (2014)
4. Up (2009)
5. Antz (1998)
6. Adventures of Rocky and Bullwinkle, The (2000)
7. Emperor's New Groove, The (2000)
8. Monsters, Inc. (2001)
9. Wild, The (2006)
10. Shrek the Third (2007)


Would you like to (1) input another movie or (2) exit? Please enter 1 or 2:  Monsters


Invalid input. Please enter 1 or 2.


Would you like to (1) input another movie or (2) exit? Please enter 1 or 2:  1
Please input another movie you love:  Monsters


Because you love Monsters, we recommend these movies as your next watch!
1. Powder (1995)
2. Forbidden Planet (1956)
3. Contact (1997)
4. Metropolis (1927)
5. 1984 (Nineteen Eighty-Four) (1984)
6. Last Night (1998)
7. Brother from Another Planet, The (1984)
8. Unbreakable (2000)
9. Altered States (1980)
10. Silent Running (1972)


Would you like to (1) input another movie or (2) exit? Please enter 1 or 2:  1
Please input another movie you love:  Sandlot


Sorry, 'Sandlot' is not in our list of movies. Therefore, we cannot provide a recommendation based on this. Please try another movie:


Please input another movie you love:  Sandlot, The


Because you love Sandlot, The, we recommend these movies as your next watch!
1. Pollyanna (1960)
2. Wide Awake (1998)
3. Jack Frost (1998)
4. Secondhand Lions (2003)
5. Beethoven (1992)
6. Because of Winn-Dixie (2005)
7. Ice Princess (2005)
8. Material Girls (2006)
9. Son of Rambow (2007)
10. Monsieur Lazhar (2011)


Would you like to (1) input another movie or (2) exit? Please enter 1 or 2:  2


Thank you for using our movie recommendation system! Have a great day!
