# Step 0.1: Data Loading

In [1]:
import os
import pandas as pd
from openai import OpenAI
from func import get_user_watch_history
from sklearn.model_selection import train_test_split

client = OpenAI()

data = pd.read_csv("data/merged_df.csv")

train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
system_prompt = "You are a movie recommender system that will compare user previous watch history and ratings."
unique_movie_titles = set(data['title'].unique().tolist())

user_id = 123
candidate_size = 100

In [2]:
train_user_df = train_data[train_data["userId"] == user_id]
test_user_df = test_data[test_data["userId"] == user_id]

train_title, train_rating = train_user_df["title"], train_user_df["rating"]
test_title, test_rating = test_user_df["title"], test_user_df["rating"]

# Step 0.2: User Filtering

This principle operates on the premise that candidate movies should be appealing to users who share similarities with the target user. To accomplish this, we begin by encoding each user's watched movies into a multi-hot vector representation. Similar users are identified by comparing the target user's vector with other users' vectors using cosine similarity. We then choose the 𝑚 most similar users and create a candidate movie set of size 𝑠 by selecting the most popular movies from the films that these similar users have interacted with.

We set the candidate set size as 100, since we found out that this size aligns with the GPT model's availability in processing long list of items.

In [3]:
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

def get_similar_users(user_id, data):
    user_movie_matrix = data.pivot_table(index='userId', columns='title', values='rating')
    user_movie_matrix = user_movie_matrix.fillna(0)
    similarity_matrix = cosine_similarity(user_movie_matrix)
    similarity_df = pd.DataFrame(similarity_matrix, index=user_movie_matrix.index, columns=user_movie_matrix.index)
    similar_users = similarity_df[user_id].sort_values(ascending=False)
    return similar_users

similar_users = list(get_similar_users(user_id, train_data).iloc[:candidate_size].index)
train_similar_df = train_data[train_data["userId"].isin(similar_users)]
movie_popularity = train_similar_df.groupby('title').size().sort_values(ascending=False)
candidate1 = list(movie_popularity.head(candidate_size).head(candidate_size).index)

# Step 0.3: Item Filtering

In a manner akin to user filtering, we depict each movie using a multi-hot vector determined by the users who have engaged with it. By measuring cosine similarity between pairs of movies, we identify the 100 most closely related movies for each movie within the target user's interaction history. Subsequently, we assemble a candidate set comprising 100 items, taking into account the "popularity" of these akin movies relative to the ones within the target user's interaction history.

In [4]:
train_movie = train_user_df["movieId"]
watched = pd.unique(train_movie).tolist()

In [5]:
user_item_matrix = data.pivot_table(index='userId', columns='movieId', values='rating')
user_item_matrix = user_item_matrix.fillna(0)
item_similarity = cosine_similarity(user_item_matrix.T)
def find_similar_movies(target_item_id, data):
    target_item_index = user_item_matrix.columns.get_loc(target_item_id)
    similarities = item_similarity[target_item_index]
    similar_items_df = pd.DataFrame({'movieId': user_item_matrix.columns, 'similarity_score': similarities})
    similar_items_df = similar_items_df.sort_values(by='similarity_score', ascending=False)
    N = candidate_size
    top_similar_items = similar_items_df.head(N)
    return top_similar_items

similar_movies = []
for movie in watched:
    similar_movies.append(find_similar_movies(movie, train_data))
similar_df = pd.concat(similar_movies)

movie_popularity = similar_df.groupby('movieId').size().sort_values(ascending=False)
candidate2 = list(movie_popularity.head(candidate_size).head(candidate_size).index)
candidate2 = train_data.loc[train_data['movieId'].isin(candidate2), 'title'].tolist()
candidate = list(set(candidate1) | set(candidate2))

In [6]:
def clean_candidate(candidate, train_title, test_title):
    to_remove = []
    
    for movie in candidate:
        if movie in train_title:
            ro_remove.append(movie)
            print(f"to remove: {movie} ")
    count1 = 0
    count2 = 0
    for movie in to_remove:
        candidate.append(movie)
    for movie in test_title:
        if movie not in candidate:
            # print(f"not in candidate: {movie} ")
            count1 += 1
        else:
            # print(movie)
            count2 += 1
    print(count1, count2)
            
    return candidate
            
candidate = clean_candidate(candidate, train_title, test_title)

2 4


In [7]:
print(f"The length of the training set: {len(train_title)}")
print(f"The length of the testing set: {len(test_title)}")
print(f"The length of the candidate set: {len(candidate)}")

The length of the training set: 50
The length of the testing set: 6
The length of the candidate set: 128


# Step 1: Summarize User Preferences

During this step, we implemented the following enhancements:
1. We eliminated the need for candidate sets in answer1. In fact, the inclusion of candidate sets tends to create confusion for the model due to the extended context.
2. We incorporated movie ratings into the list of watched movies. Additionally, we positioned the ratings alongside the movie titles to facilitate easier identification and matching by the GPT model.

In [8]:
import numpy as np
import random
movie_df = list(pd.unique(data["title"]))

In [9]:

movie_rating = ""
for i in range(len(train_title)):
    movie_rating += f"{train_title.iloc[i]}: {train_rating.iloc[i]} \n"

if user_id in data['userId'].values:
    titles_list, ratings_list = get_user_watch_history(user_id, data)
    messages=[
        # {"role": "user", "content": f"Candidate Set(candidate movies): "},
        {"role": "user", "content": f"The movies I have watched(watched movies): {movie_rating}"},
        {"role": "user", "content": f"Step 1: What features are most important to me when selecting movies? "},
    ]
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=messages
    )

answer1 = (completion.choices[0].message.content)
answer1


"Based on the list of movies you've watched and their ratings, it seems like you prioritize a mix of genres, including action, thriller, science fiction, and drama. You also seem to appreciate movies with strong storytelling, character development, and plot twists. Additionally, it looks like you enjoy visually stunning and immersive films with engaging narratives. These aspects seem to be important to you when selecting movies."

# Step 2: User's Favorite Movies

In the second phase, this prompt encompasses the preceding prompt content, followed by the incorporation of the response from Step 1. It also introduces the directive: "Step 2: You will select the movies ... that appeal to me the most ... presented in descending order of preference (...)". This step aims to identify the movies previously interacted with that most accurately capture the preferences of the target user.

In [10]:
# movie_rating = ""
# for i in range(len(train_title)):
#     movie_rating += f"{train_title.iloc[i]}: {train_rating.iloc[i]} \n"

messages=[
    {"role": "user", "content": f"The movies I have watched(watched movies) and their ratings: {movie_rating}"},
    {"role": "user", "content": f"Step 1: What features are most important to me when selecting movies? "},
]

messages.append({"role": "assistant", "content": answer1})
                
step2 = "You will select the movies (at most 10 movies) that appeal to me the most from the list of movies \
    I have watched, based on my personal preferences. The selected movies will be presented in descending \
    order of preference. (Format: no. a watched movie)."
    
messages.append({"role": "user", "content": step2})
                
completion = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=messages
    )

answer2 = (completion.choices[0].message.content)
print(answer2)

Certainly! Based on your ratings, the top 10 movies that seem to appeal to you the most are as follows:

1. Star Wars: Episode IV - A New Hope (1977)
2. The Imitation Game (2014)
3. Nightcrawler (2014)
4. Memento (2000)
5. American History X (1998)
6. Edge of Tomorrow (2014)
7. Inglourious Basterds (2009)
8. Fight Club (1999)
9. Source Code (2011)
10. Seven (a.k.a. Se7en) (1995)

These films appear to align most closely with your ratings and genre preferences.


# Step3: Recommend From Candidate

Again, this prompt includes the previous text appended with the answers of Step 2. It then includes the instruction “Step 3: Can you recommend 10 movies from the Candidate Set similar to ...”.

We enhanced this prompt in the following manner:
1. We relocated the candidate set to be within step3. Through experimentation, we observed that when placing the candidate set at the very beginning of the conversation, the recommender often provided recommendations that fell outside the boundaries of the candidate set. To rectify this issue, we positioned these elements closer together.

In [12]:

messages.append({"role": "assistant", "content": answer2})

# messages.append({"role": "user", "content": f"Candidate Set(candidate movies): {', '.join(candidate)}"},)
                
step3 = "Can you recommend 10 different movies only from the Candidate Set similar to the selected \
    movies I've watched (Format: [<n>. <a watched movie> : <a candidate movie>])?" + f"Candidate Set\
    (candidate movies): {', '.join(candidate)}"
    
messages.append({"role": "user", "content": step3})
                
completion = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=messages
    )

answer3 = (completion.choices[0].message.content)
print(answer3)

Sure! Based on your preferences, here are 10 recommended movies from the candidate set that you might enjoy:

1. Nightcrawler (2014) : Shutter Island (2010) 
2. Memento (2000) : Inception (2010) 
3. American History X (1998) : Fight Club (1999) 
4. Edge of Tomorrow (2014) : Minority Report (2002) 
5. Inglourious Basterds (2009) : Django Unchained (2012) 
6. Seven (a.k.a. Se7en) (1995) : Zodiac (2007) 
7. Fight Club (1999) : The Matrix (1999) 
8. Source Code (2011) : Inception (2010) 
9. Lord of the Rings: The Return of the King, The (2003) : Gladiator (2000) 
10. Star Wars: Episode IV - A New Hope (1977) : Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)  

These recommendations are based on the similarity to the movies you've watched and enjoyed. Enjoy your movie time!


# Step 4: Evaluation

For evaluation purposes, we set aside 20% of the dataset to create a testing set. We utilize the training set for prompting and fine-tuning our Language Model (LLM). After obtaining the recommendation results, we calculate accuracy by measuring the proportion of recommended movies that match those in the testing dataset. If a recommended movie also exists in the testing set, we consider it correct, as it indicates that the user actually watched the recommended movie.

Consequently, the "3-step" prompting model achieves an accuracy of 20%, while the "most-popular" model achieves an accuracy of 10%. This 20% accuracy is noteworthy, considering the large movie dataset of 10,000 items, with only approximately 10 items in the testing set.

It's worth noting that the LLM model's responses may not remain consistent even when using the same prompt. To address this, we rigorously format the prompting to ensure that the GPT's responses are parseable.

In [13]:
import re

def parse_answer3(answer3):
    lines = answer3.split('\n')
    pattern = r': (.*?) \((\d+)\)'
    movie_pred = []
    for line in lines:
        match = re.search(pattern, line)
        if match:
            title = match.group(1)
            year = match.group(2)
            movie_pred.append((title, year))
    return movie_pred

def accuracy(movie_pred, test_title):
    correct = 0
    test_title_list = list(test_title)
    for title, year in movie_pred:
        if "The" in title:
            title = title[4:]
        find_candidate = 0
        for movie in candidate:
            if title in movie:
                find_candidate = 1
                break
        if not find_candidate:
            # print(f"{title} not in candidate")
            pass
        for movie in test_title_list:
            if title in movie:
                # print(f"{title} in test")
                correct += 1
                break
    return correct / len(movie_pred)

def accuracy_baseline(movie_pred, test_title):
    correct = 0
    test_title_list = list(test_title)
    for movie in movie_pred:
        if movie in test_title_list:
            # print(f"{movie} in test")
            correct += 1
            break
    return correct / len(movie_pred)

In [14]:
movie_pred = parse_answer3(answer3)
print(accuracy(movie_pred, test_title))

0.2


In [15]:
movie_popularity = train_data.groupby('title').size().sort_values(ascending=False)
baseline_pred = []
for movie in movie_popularity.index:
    if movie in candidate:
        baseline_pred.append(movie)
    if len(baseline_pred) >= 10:
        break
accuracy_baseline(baseline_pred, test_title)
print(accuracy_baseline(baseline_pred, test_title))

0.1
