# Load Data

In [1]:
import os
import pandas as pd
from openai import OpenAI
from func import get_user_watch_history
from sklearn.model_selection import train_test_split
from src.func import *
import re

client = OpenAI()

merged_df = pd.read_csv("data/merged_df.csv")
unique_movie_titles = merged_df['title'].unique().tolist()
data = merged_df
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
system_prompt = "You are a movie recommender system that will compare user previous watch history and ratings."

USER_ID = 23
USER_ID2 = 445

# Method1: Prompt with User History

In [2]:
user_id = USER_ID
if user_id in data['userId'].values:
    train_data_user = train_data[train_data["userId"] == user_id]
    titles_list, ratings_list = train_data_user["title"], train_data_user["rating"]
    messages=[
        {"role": "system", "content": f"{system_prompt} Based on the genre and ratings of \
            those movies that this particular user gave, please recommend 10 movie that this \
            user would enjoy watching and predict the rating of these 10 movies given by this user"},
        {"role": "user", "content": f"User {user_id}'s previously watch movies include {titles_list}\
            and the corresponded ratings are {ratings_list}. Please suggest 10 movie based on the \
            watch history and predict ratings of those 10 movies based on user preferences. \
            Format: [n. <Movie Name> (<Year>) - Predicted Rating: <Rating>]"}
    ]
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=messages
    )
    response = completion.choices[0].message.content
    recommendations1 = extract_string1(response)
    print(response)
else:
    print(f"{user_id} is not a valid user ID, please try again")


Based on User 23's watch history and ratings, I recommend the following 10 movies that they may enjoy:

1. The Insider (1999) - Predicted Rating: 4.2
2. Amadeus (1984) - Predicted Rating: 4.0
3. Cinema Paradiso (Nuovo Cinema Paradiso) (1989) - Predicted Rating: 4.5
4. The Lives of Others (Das Leben der Anderen) (2006) - Predicted Rating: 4.3
5. Umberto D. (1952) - Predicted Rating: 4.1
6. Rififi (Du rififi chez les hommes) (1955) - Predicted Rating: 4.4
7. My Neighbor Totoro (Tonari no Totoro) (1988) - Predicted Rating: 4.2
8. Central Station (Central do Brasil) (1998) - Predicted Rating: 4.0
9. The Secret in Their Eyes (El secreto de sus ojos) (2009) - Predicted Rating: 4.3
10. Leon: The Professional (Léon) (1994) - Predicted Rating: 4.5


# Watch party with 2 userID
user_A = 23

user_B = 445

In [3]:
userA, userB = USER_ID, USER_ID2
if userA and userB in data['userId'].values:
    train_data_userA = train_data[train_data["userId"] == userA]
    titles_list_A, ratings_list_A = train_data_userA["title"], train_data_userA["rating"]
    train_data_userB = train_data[train_data["userId"] == userB]
    titles_list_B, ratings_list_B = train_data_userB["title"], train_data_userB["rating"]
    messages=[
        {"role": "system", "content": f"{system_prompt} Based on the genre and ratings of movies from \
            2 users, please recommend 10 movie that these 2 users would enjoy watching together and \
            predict the rating of these 10 movies"},
        {"role": "user", "content": f"User {userA}'s previously watch movies include {titles_list_A} \
            and the corresponded ratings are {ratings_list_A}. User {userB}'s previously watch movies \
            include {titles_list_B} and the corresponded ratings are {ratings_list_B} Please suggest \
            10 movie that both users would enjoy watching based on the watch histories and predict \
            specific ratings of those 10 movies based on user preferences. Format: \
            [n. <Movie Name> (<Year>) - Predicted Rating: <Rating>]"}
    ]
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=messages
    )
    response = completion.choices[0].message.content
    print(response)
    recommendations2 = extract_string1(response)
else:
    print(f"{userA} or {userB} is not a valid user ID, please try again")

Based on the watch history and ratings of User 23 and User 445, here are 10 movie recommendations that both users would enjoy watching together, along with the predicted ratings based on their preferences:

1. Dr. Strangelove or: How I Learned to Stop Worr... (1964) - Predicted Rating: 4.0
2. Grand Budapest Hotel, The (2014) - Predicted Rating: 4.5
3. Sixth Sense, The (1999) - Predicted Rating: 4.0
4. Shutter Island (2010) - Predicted Rating: 4.5
5. Fargo (1996) - Predicted Rating: 4.0
6. How to Train Your Dragon (2010) - Predicted Rating: 4.0
7. Casablanca (1942) - Predicted Rating: 4.0
8. Star Wars: Episode IV - A New Hope (1977) - Predicted Rating: 4.5
9. Matrix, The (1999) - Predicted Rating: 4.0
10. Inception (2010) - Predicted Rating: 4.5


# Recommend based on genre

In [4]:
if user_id in data['userId'].values:
    user_data = train_data[train_data["userId"] == user_id]
    user_data['genres'] = user_data['genres'].str.split('|')
    genres_expanded = user_data.explode('genres')
    genre_ratings = genres_expanded.groupby('genres')['rating'].mean()
    top_3_genres = genre_ratings.sort_values(ascending=False).head(3).index.tolist()
    messages=[
        {"role": "system", "content": "You are a movie recommender system that will recommend \
            10 movie in this user's favorite genres and predict the rating of these 10 movies \
            given by this user"},
        {"role": "user", "content": f"User {user_id}'s favorite genre include {top_3_genres}. \
            Please suggest 10 movie in these genre for the user. \
            Format: [n. <Movie Name> (<Year>) - Predicted Rating: <Rating>]"}
    ]
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=messages
    )
    response = completion.choices[0].message.content
    recommendations3 = extract_string1(response)
    print(response)
else:
    print(f"{userId} is not a valid user ID, please try again")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_data['genres'] = user_data['genres'].str.split('|')


Sure, based on User 23's favorite genres, here are 10 movie recommendations:

1. The Shining (1980) - Predicted Rating: 4.5
2. Get Out (2017) - Predicted Rating: 4.4
3. The Matrix (1999) - Predicted Rating: 4.3
4. A Quiet Place (2018) - Predicted Rating: 4.2
5. The Sixth Sense (1999) - Predicted Rating: 4.4
6. Aliens (1986) - Predicted Rating: 4.3
7. The Babadook (2014) - Predicted Rating: 4.1
8. Inception (2010) - Predicted Rating: 4.5
9. The Others (2001) - Predicted Rating: 4.3
10. The Cabin in the Woods (2011) - Predicted Rating: 4.2

I hope User 23 enjoys these suggestions!


# Recommend based on similar user

In [5]:
if user_id in data['userId'].values:
    most_similar = get_similar_users(user_id, train_data)
    train_data_user = train_data[train_data["userId"] == most_similar]
    titles_list, ratings_list = train_data_user["title"], train_data_user["rating"]
    messages=[
        {"role": "system", "content": "You are a movie recommender system that will suggest movies \
            that the user may also like based on similar user's watch histories. From the most similar \
            user's watch histories, please recommend 10 movies that this user would enjoy watching and \
            predict the rating of these 10 movies given by this user"},
        {"role": "user", "content": f"User {user_id}'s most similar user is user {most_similar}, \
            which has previously watched {titles_list} and the corresponded ratings are {ratings_list}. \
            Please suggest 10 movies that user {user_id} may also like and provide rating prediction of \
            those 10 movies based on user preferences. \
            Format: [n. <Movie Name> (<Year>) - Predicted Rating: <Rating>]"}
    ]
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=messages
    )
    response = completion.choices[0].message.content
    print(response)
    recommendations4 = extract_string1(response)
else:
    print(f"{userId} is not a valid user ID, please try again")

1. Goodfellas (1990) - Predicted Rating: 4.2
2. Fight Club (1999) - Predicted Rating: 4.3
3. The Departed (2006) - Predicted Rating: 4.5
4. Inglourious Basterds (2009) - Predicted Rating: 4.1
5. Seven (Se7en) (1995) - Predicted Rating: 4.4
6. The Prestige (2006) - Predicted Rating: 4.3
7. The Grand Budapest Hotel (2014) - Predicted Rating: 4.0
8. Amadeus (1984) - Predicted Rating: 4.6
9. Snatch (2000) - Predicted Rating: 4.2
10. Oldboy (2003) - Predicted Rating: 4.0


# Rating prediction for evaluation metric

In [6]:
if user_id in data['userId'].values:
    train_data_user = train_data[train_data["userId"] == user_id]
    train_titles_list, train_ratings_list = train_data_user["title"], train_data_user["rating"]
    test_data_user = test_data[test_data["userId"] == user_id]
    test_title_list, true_ratings = test_data_user["title"], test_data_user["rating"]
    messages=[
        {"role": "system", "content": "You are a movie rating prediction system that will predict \
            rating with a list of titles given"},
        {"role": "user", "content": f"User {user_id}'s previously watch movies include {train_titles_list} \
            and the corresponded ratings are {train_ratings_list}. Based on these ratings, \
            please provide a list of prediction rating corresponded to {test_title_list}. \
            Format: [n. <Movie Name> (<Year>) - Predicted Rating: <Rating>]"}
    ]
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=messages
    )
    response = completion.choices[0].message.content
    print(response)
    recommendations5 = extract_string1(response)
    # rms = evaluation(recommendations, user_id)
    # print(f"Recommendation Rating RMSE: {rms}")
    # return messages, response, recommendations
else:
    print(f"{userId} is not a valid user ID, please try again")
    # return None, None, None

Sure! Here are the predicted ratings for the list of movies:

1. Iron Giant, The (1999) - Predicted Rating: 3.8
2. Carlito's Way (1993) - Predicted Rating: 3.9
3. Fistful of Dollars, A (Per un pugno di dollari) (1964) - Predicted Rating: 3.7
4. 2001: A Space Odyssey (1968) - Predicted Rating: 4.1
5. Killer, The (Die xue shuang xiong) (1989) - Predicted Rating: 3.4
6. Lock, Stock & Two Smoking Barrels (1998) - Predicted Rating: 3.9
7. Postman, The (Postino, Il) (1994) - Predicted Rating: 3.8
8. Third Man, The (1949) - Predicted Rating: 3.9
9. Looking for Richard (1996) - Predicted Rating: 3.7
10. Fear and Loathing in Las Vegas (1998) - Predicted Rating: 3.8
11. Gosford Park (2001) - Predicted Rating: 3.8
12. Clockwork Orange, A (1971) - Predicted Rating: 4.0
13. Die Hard (1988) - Predicted Rating: 3.9
14. Dark City (1998) - Predicted Rating: 3.9
15. Chinatown (1974) - Predicted Rating: 4.0
16. Seventh Seal, The (Sjunde inseglet, Det) (1957) - Predicted Rating: 3.7
17. Titus (1999) - Pre

# Evaluation metric (RMSE of the prediction)

In [7]:
from func import extract_ratings
predicted_ratings = []
for movie, rating in recommendations5:
    predicted_ratings.append(float(rating))

In [8]:
import math
from func import calculate_rmse
rmse_score = calculate_rmse(predicted_ratings, true_ratings)
print(f"RMSE: {rmse_score}")

RMSE: 0.5813060008865095
