In [1]:
# Import libraries
import pandas as pd
import numpy as np
from collections import defaultdict, Counter
import sklearn.model_selection
import surprise.model_selection
from surprise import accuracy
from surprise import dump
from surprise import Dataset, Reader, SVD, NormalPredictor
from surprise.model_selection import GridSearchCV
from surprise.model_selection import cross_validate

%load_ext lab_black

# Load data and trained model

In [2]:
# Load rating and movie data
data_df = pd.read_csv("ml-latest-small/ratings.csv")

movies_df = pd.read_csv("ml-latest-small/movies.csv")

# Load tuned SVD
_, svdtuned = dump.load("best_svd")

# Make recommendations

## For an existing group
(Cambridge Spark [tutorial](https://blog.cambridgespark.com/tutorial-practical-introduction-to-recommender-systems-dbe22848392b))

In [3]:
# Get a list of users who want to watch a movie together
def user_list():

    # Create an empty list
    id_list = []

    # Get number of users
    n_user = int(input("How many users are watching together: "))

    # Ask for each user's ID
    for i in range(0, n_user):
        user_id = int(input("User #:"))
        id_list.append(user_id)  # Add ID to the list

    return id_list

In [4]:
# Recommend a list of movies that may be liked by most (if any)
def movie_rec():

    # Get user ID's
    uids = user_list()

    # List of all movie ID's
    iids = data_df["movieId"].unique()

    # List of movies rated by any user
    iids_seen = data_df.loc[data_df["userId"].isin(uids), "movieId"]

    # Remove these movies
    iids_to_pred = np.setdiff1d(iids, iids_seen)

    # Generate recommendations for each user
    rec_list = []

    for uid in uids:

        # Predict how each user would rate each movie
        testset = [[uid, iid, 5.0] for iid in iids_to_pred]
        predictions = svdtuned.test(testset)

        # Find top 20 movie for each user
        pred_ratings = np.array([pred.est for pred in predictions])
        top20 = np.argpartition(pred_ratings, -20)[-20:]  # Indices of top 20 ratings
        iid_idx = iids_to_pred[top20]  # Corresponding movie ID's
        iid_title = movies_df.loc[
            movies_df["movieId"].isin(iid_idx), "title"
        ].tolist()  # List of movie titles
        rec_list.append(iid_title)

    # Find overlap
    rec_flat_list = [item for sublist in rec_list for item in sublist]
    rec_counts = Counter(rec_flat_list)

    # Make recommendations
    if max(rec_counts.values()) == min(rec_counts.values()):
        print("Oops, these users don't seem to share movie taste!")
    else:
        # Iterate over all the items in dictionary to find keys with max value
        winner_list = []
        for key, value in rec_counts.items():
            if value == max(rec_counts.values()):
                winner_list.append(key)
        print(
            f"Recommended for the group (liked by {max(rec_counts.values())} out of {len(uids)}):"
        )
        for winner in winner_list:
            print(f"- {winner}")

In [5]:
# Try an arbitrary group
movie_rec()

How many users are watching together:  7
User #: 17
User #: 3
User #: 201
User #: 303
User #: 5000
User #: 80000
User #: 1


Recommended for the group (liked by 7 out of 7):
- Streetcar Named Desire, A (1951)
- Lawrence of Arabia (1962)
- Harold and Maude (1971)
- Great Escape, The (1963)
- Animal House (1978)


## For a new group