In [7]:
pip install scikit-surprise

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



In [8]:
pip install scikit-surprise --no-warn-script-location

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os

In [4]:
directory_to_add = r'C:\Users\cnhay\AppData\Roaming\Python\Python39\Scripts'
os.environ['PATH'] += os.pathsep + directory_to_add

In [5]:
print(os.environ['PATH'])

C:\Program Files\ArcGIS\Pro\bin;C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3;C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\Library\mingw-w64\bin;C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\Library\usr\bin;C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\Library\bin;C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\Scripts;C:\Program Files (x86)\VMware\VMware Player\bin;C:\Python311\Scripts;C:\Python311;C:\Program Files\Common Files\Oracle\Java\javapath;C:\windows\system32;C:\windows;C:\windows\System32\Wbem;C:\windows\System32\WindowsPowerShell\v1.0;C:\windows\System32\OpenSSH;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files\NVIDIA Corporation\NVIDIA NvDLISR;C:\MinGW\bin;C:\Program Files\PuTTY;C:\Program Files\MATLAB\R2022a\runtime\win64;C:\Program Files\MATLAB\R2022a\bin;C:\Program Files\dotnet;C:\Program Files\Microsoft SQL Server\150\Tools\Binn;C:\Program Files\Microsoft SQL Server\Client SDK\ODBC\170\Too

In [9]:
import pandas as pd
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import SVD
from surprise import accuracy

movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")
links = pd.read_csv("links.csv")
tags = pd.read_csv("tags.csv") 


# Merge tags with movies based on movieId
movies_tags = pd.merge(movies, tags, on='movieId', how='left')

# Create a Surprise dataset from ratings data
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)


# Build a full trainset
trainset = data.build_full_trainset()

# Use Singular Value Decomposition (SVD) for collaborative filtering
model = SVD()

# Train the model on the full dataset
model.fit(trainset)

# Function to get recommendations for a given user and genre
def get_recommendations(user_id, genre, seen_movies=None, n=10):
    # Filter movies by genre
    genre_movies = movies_tags[movies_tags['genres'].str.contains(genre, case=False)]
    
    # Get movie IDs for the filtered genre movies
    genre_movie_ids = set(genre_movies['movieId'])
    
    # Filter genre movies that the user has not seen yet
    if seen_movies:
        genre_movie_ids -= set(seen_movies)
    
    # Make predictions for the filtered genre movies
    predictions = [(item, round(model.predict(user_id, item).est, 2)) for item in genre_movie_ids]
    
    # Sort the predictions by estimated rating
    predictions.sort(key=lambda x: x[1], reverse=True)
    
    # Get the top n recommendations
    top_n = predictions[:n]
    
    return top_n

# Function to get all tags for a given movie
def get_tags_for_movie(movie_id):
    tags_list = tags[tags['movieId'] == movie_id]['tag'].tolist()
    return tags_list

# Function to rate a movie and update ratings.csv
def rate_movie(user_id, movie_id, rating):
    new_rating = pd.DataFrame([[user_id, movie_id, rating]], columns=['userId', 'movieId', 'rating'])
    updated_ratings = pd.concat([ratings, new_rating], ignore_index=True)
    updated_ratings.to_csv('ratings.csv', index=False)
    print(f"Rating {rating} for movie ID {movie_id} saved.")

# Function to search for a movie by title and return its ID
def search_movie_by_title(title):
    movie = movies[movies['title'].str.contains(title, case=False, regex=False)]
    if not movie.empty:
        movie_id = movie.iloc[0]['movieId']
        return movie_id
    else:
        return None

# Example usage
user_id = 9999  # Your own user ID (you may need to adjust this)
desired_genre = input("Enter the genre you want to watch: ")
seen_movies = []  # List of movie IDs that you have seen

while True:
    recommendations = get_recommendations(user_id, desired_genre, seen_movies)
    print(f"\nTop {len(recommendations)} recommendations in the genre {desired_genre}:")
    for rank, (item, rating) in enumerate(recommendations, 1):
        movie_title = movies[movies['movieId'] == item]['title'].values[0]
        tags_list = get_tags_for_movie(item)
        print(f"{rank}. Title: {movie_title} | Rating: {rating}")
        print("   Descriptions:", ', '.join(tags_list))
    
    choice = input("\nEnter 's' if you have seen any of the above movies, 'n' for a new list of recommendations, 'r' to rate a movie, or 'q' to quit: ")
    if choice.lower() == 'q':
        break
    elif choice.lower() == 'n':
        # Add previously recommended movies to seen_movies list
        seen_movies += [item for item, _ in recommendations]
        continue
    elif choice.lower() == 's':
        # List movies for user to choose from when marking seen movies
        print("\nList of movies:")
        for rank, (item, _) in enumerate(recommendations, 1):
            movie_title = movies[movies['movieId'] == item]['title'].values[0]
            print(f"{rank}. Title: {movie_title}")

        choice = input("Enter the number of the movie you have seen (1-10), or type 'q' to quit: ")
        if choice.lower() == 'q':
            break

        try:
            # Convert the user's choice to the corresponding movie ID
            movie_index = int(choice) - 1
            movie_id = recommendations[movie_index][0]
            seen_movies.append(movie_id)
        except (ValueError, IndexError):
            print("Invalid input. Please enter a number between 1 and 10.")
    elif choice.lower() == 'r':
        movie_title = input("Enter the movie title you want to rate: ")
        movie_id = search_movie_by_title(movie_title)
        if movie_id:
            rating = float(input("Enter your rating (1-5) for the movie: "))
            rate_movie(user_id, movie_id, rating)
            print("Rating has been saved.")
        else:
            print("Movie not found. Please enter a valid movie title.")
            continue


print("Thank you for using the recommendation system!")

Enter the genre you want to watch: Romance

Top 10 recommendations in the genre Romance:
1. Title: Casablanca (1942) | Rating: 4.72
   Descriptions: start of a beautiful friendship
2. Title: Bound (1996) | Rating: 4.65
   Descriptions: 
3. Title: Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001) | Rating: 4.63
   Descriptions: whimsical
4. Title: Philadelphia Story, The (1940) | Rating: 4.6
   Descriptions: divorce
5. Title: Life Is Beautiful (La Vita è bella) (1997) | Rating: 4.57
   Descriptions: Holocaust, bittersweet, emotional, Heartwarming, poignant, sentimental, tear jerker, tearjerking
6. Title: Singin' in the Rain (1952) | Rating: 4.52
   Descriptions: movie business
7. Title: North by Northwest (1959) | Rating: 4.49
   Descriptions: Mount Rushmore, Alfred Hitchcock, imdb top 250
8. Title: His Girl Friday (1940) | Rating: 4.47
   Descriptions: Screwball
9. Title: Sound of Music, The (1965) | Rating: 4.47
   Descriptions: Rogers and Hammerstein
10. Title: Forrest Gump (1994)