<a href="https://colab.research.google.com/github/akessela/deep-learning-code-examples/blob/main/personalization_wacthedx.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Sample data: user-movie interaction (watch counts)
data = {
    'user_id': [1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 4],
    'movie_id': [101, 102, 103, 101, 104, 105, 102, 103, 101, 102, 104, 105],
    'watch_count': [5, 4, 3, 5, 3, 4, 4, 5, 3, 5, 4, 5]
}

# Create a dataframe
df = pd.DataFrame(data)
df


Unnamed: 0,user_id,movie_id,watch_count
0,1,101,5
1,1,102,4
2,1,103,3
3,2,101,5
4,2,104,3
5,2,105,4
6,3,102,4
7,3,103,5
8,4,101,3
9,4,102,5


In [5]:
# Create a pivot table to have users as rows and movies as columns
movie_matrix = df.pivot_table(index='user_id', columns='movie_id', values='watch_count').fillna(0)
movie_matrix


movie_id,101,102,103,104,105
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,5.0,4.0,3.0,0.0,0.0
2,5.0,0.0,0.0,3.0,4.0
3,0.0,4.0,5.0,0.0,0.0
4,3.0,5.0,0.0,4.0,5.0


In [6]:

# Compute the cosine similarity between movies based on user interactions
movie_similarity = cosine_similarity(movie_matrix.T)  # Transpose to get movies as rows
movie_similarity_df = pd.DataFrame(movie_similarity, index=movie_matrix.columns, columns=movie_matrix.columns)
movie_similarity_df

movie_id,101,102,103,104,105
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
101,1.0,0.603538,0.334908,0.70302,0.711623
102,0.603538,1.0,0.726897,0.529813,0.517143
103,0.334908,0.726897,1.0,0.0,0.0
104,0.70302,0.529813,0.0,1.0,0.999512
105,0.711623,0.517143,0.0,0.999512,1.0


In [9]:
# Get the list of movies the user has watched and their watch count
user_id = 1
user_movies = movie_matrix.loc[user_id]
print(user_movies)
watched_movies = user_movies[user_movies > 0].index.tolist()
print(watched_movies)
# Store similarity scores for all movies the user has watched
movie_scores = pd.Series(dtype=float)



movie_id
101    5.0
102    4.0
103    3.0
104    0.0
105    0.0
Name: 1, dtype: float64
[101, 102, 103]


In [11]:
top_n=3
for movie in watched_movies:
    # Get the similarity scores for this watched movie
    similar_movies = movie_similarity_df[movie]
    # Weight the similarity by how much the user has watched this movie (or could be ratings)
    weighted_similarity = similar_movies * user_movies[movie]
    # Accumulate scores
    movie_scores = movie_scores.add(weighted_similarity, fill_value=0)

# Remove the movies that the user has already watched
movie_scores = movie_scores.drop(watched_movies)

# Sort the movies by similarity score and recommend the top N
recommended_movies = movie_scores.sort_values(ascending=False).head(top_n)

In [12]:
def recommend_movies_for_user(user_id, movie_matrix, similarity_df, top_n=3):
    """Recommend movies personalized for a specific user."""
    if user_id not in movie_matrix.index:
        return "User not found."

    # Get the list of movies the user has watched and their watch count
    user_movies = movie_matrix.loc[user_id]
    watched_movies = user_movies[user_movies > 0].index.tolist()

    if not watched_movies:
        return "No movies watched by the user."

    # Store similarity scores for all movies the user has watched
    movie_scores = pd.Series(dtype=float)

    for movie in watched_movies:
        # Get the similarity scores for this watched movie
        similar_movies = similarity_df[movie]
        # Weight the similarity by how much the user has watched this movie (or could be ratings)
        weighted_similarity = similar_movies * user_movies[movie]
        # Accumulate scores
        movie_scores = movie_scores.add(weighted_similarity, fill_value=0)

    # Remove the movies that the user has already watched
    movie_scores = movie_scores.drop(watched_movies)

    # Sort the movies by similarity score and recommend the top N
    recommended_movies = movie_scores.sort_values(ascending=False).head(top_n)

    return recommended_movies.index.tolist()

# Example: Recommend movies personalized for user 1
user_id = 1
personalized_recommendations = recommend_movies_for_user(user_id, movie_matrix, movie_similarity_df)

print(f"Personalized movie recommendations for user {user_id}: {personalized_recommendations}")

Personalized movie recommendations for user 1: [104, 105]
