In [1]:

# Collaborative Filtering:
# Description: Collaborative Filtering recommends items to users based on the preferences and behaviors of similar users. It can be user-based or item-based, and it relies on the idea that users who liked similar items in the past will also like similar items in the future.
# Typical Use Cases:
#  Movie recommendations.
#  Product recommendations in e-commerce.
#  Music recommendations in streaming services.

# Content-Based Filtering:
# Description: Content-Based Filtering recommends items to users based on the attributes and features of the items and the user's preferences. It focuses on item characteristics rather than user behavior.
# Typical Use Cases:
#  Movie recommendations based on genres, actors, directors, etc.
#  Article recommendations based on content.
#  Product recommendations based on product features.
        

In [1]:

# Objective: Collaborative Filtering aims to recommend items (products, movies, etc.) to users based on the 
# preferences and behaviors of similar users.

# User Behavior: It relies on the idea that users who have similar preferences in the past are likely to 
# have similar preferences in the future.

# Two Types:
# User-Based Collaborative Filtering: Recommends items by identifying users with similar preferences and 
# suggesting items liked by those similar users.

# Item-Based Collaborative Filtering: Recommends items by identifying similar items and suggesting items 
# that are similar to those the user has liked.
# Similarity Measures: Common similarity measures include cosine similarity and Pearson correlation 
# coefficient. These measures quantify the similarity between users or items based on their ratings.


# USER BASED COLLABORATIVE FILTERING:
# Recommends items by identifying USERS WITH SIMILAR PREFERENCES and suggesting items liked by those similar users.
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Generate a synthetic dataset with names
np.random.seed(42)

# Create a list of common names for users and general household items
user_names = ["Bob", "Tim", "Tom", "Rachel", "Brad", "Susie", "Doug", "Alice", "Charlie", "Eva"]
product_names = ["Detergent", "Soap", "Plates", "Cups", "Toilet Paper", "Kitchen Towels", "Shampoo", "Conditioner", "Toothpaste", "Mouthwash"]

# Generate random data
users = np.random.choice(user_names, size=200, replace=True)
products = np.random.choice(product_names, size=200, replace=True)
ratings = np.random.choice(range(1, 6), size=200, replace=True)

df = pd.DataFrame({'user_name': users, 'product_name': products, 'rating': ratings})

# Create a user-product matrix
user_product_matrix = df.pivot_table(index='user_name', columns='product_name', values='rating', fill_value=0)

# Calculate cosine similarity between users
user_similarity = cosine_similarity(user_product_matrix)

# Convert the similarity matrix to a DataFrame for better indexing
user_similarity_df = pd.DataFrame(user_similarity, index=user_product_matrix.index, columns=user_product_matrix.index)

# Function to get user-based recommendations
def get_user_recommendations(user_name, top_n=5):
    # Get the user's ratings
    user_ratings = user_product_matrix.loc[user_name]

    # Calculate the weighted average of ratings based on user similarity
    weighted_ratings = user_similarity_df.loc[user_name] @ user_product_matrix

    # Exclude products the user has already rated
    unrated_products = weighted_ratings.index[weighted_ratings == 0]

    # If all products are rated by the user, consider recommendations from other users
    if len(unrated_products) == 0:
        other_users = user_product_matrix.index[user_product_matrix.index != user_name]
        unrated_products = user_product_matrix.columns[user_product_matrix.loc[other_users].sum() > 0]

    # Get top N recommendations
    recommendations = unrated_products[np.argsort(weighted_ratings[unrated_products])[-top_n:]][::-1]

    return recommendations

# Example: Get recommendations for a random user
random_user = np.random.choice(user_names)
recommendations = get_user_recommendations(random_user, top_n=3)

print(f"Top recommendations for user {random_user}: {recommendations}")


Top recommendations for user Alice: Index(['Conditioner', 'Kitchen Towels', 'Mouthwash'], dtype='object', name='product_name')


In [2]:

# The recommendations for user Alice are based on the collaborative filtering approach using cosine 
# similarity between users. The recommendations are determined by considering the products that users 
# similar to Alice have liked. If many users who are similar to Alice have rated 'Conditioner', 
# 'Kitchen Towels', and 'Mouthwash' highly, these products are likely to be recommended to Alice as well.

# In the context of collaborative filtering, the recommendations are driven by user similarities and 
# the products that similar users have shown a preference for. If Alice and users with similar 
# preferences have rated those specific products highly, they are more likely to be recommended to Alice.
# It's important to note that collaborative filtering is a personalized approach, and the recommendations 
# are tailored to the individual user based on their historical ratings and the preferences of similar 
# users in the dataset.


In [3]:

# ITEM BASED COLLABORATIVE FILTERING:
# The focus would shift from comparing users to comparing items directly. The recommendations would be 
# generated BASED ON SIMILARITY BETWEEN ITEMSs rather than users.

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Generate a synthetic dataset with names
np.random.seed(42)

# Create a list of common names for users and general household items
user_names = ["Bob", "Tim", "Tom", "Rachel", "Brad", "Susie", "Doug", "Alice", "Charlie", "Eva"]
product_names = ["Detergent", "Soap", "Plates", "Cups", "Toilet Paper", "Kitchen Towels", "Shampoo", "Conditioner", "Toothpaste", "Mouthwash"]

# Generate random data
users = np.random.choice(user_names, size=200, replace=True)
products = np.random.choice(product_names, size=200, replace=True)
ratings = np.random.choice(range(1, 6), size=200, replace=True)

df = pd.DataFrame({'user_name': users, 'product_name': products, 'rating': ratings})

# Create a user-product matrix
user_product_matrix = df.pivot_table(index='user_name', columns='product_name', values='rating', fill_value=0)

# Calculate cosine similarity between items (products)
item_similarity = cosine_similarity(user_product_matrix.T)  # Transpose to get item-item similarity

# Convert the similarity matrix to a DataFrame for better indexing
item_similarity_df = pd.DataFrame(item_similarity, index=product_names, columns=product_names)

# Function to get item-based recommendations
def get_item_recommendations(user_name, top_n=5):
    # Get the user's ratings
    user_ratings = user_product_matrix.loc[user_name]

    # Calculate the weighted average of ratings based on item similarity
    weighted_ratings = user_ratings @ item_similarity_df

    # Exclude items the user has already interacted with
    unrated_items = user_ratings.index[user_ratings == 0]

    # If all items are rated by the user, consider recommendations from other users
    if len(unrated_items) == 0:
        other_users = user_product_matrix.index[user_product_matrix.index != user_name]
        unrated_items = user_product_matrix.columns[user_product_matrix.loc[other_users].sum() > 0]

    # Get top N recommendations
    recommendations = unrated_items[np.argsort(weighted_ratings[unrated_items])[-top_n:]][::-1]

    return recommendations

# Example: Get item-based recommendations for a random user
random_user = np.random.choice(user_names)
item_recommendations = get_item_recommendations(random_user, top_n=3)

print(f"Top item-based recommendations for user {random_user}: {item_recommendations}")


Top item-based recommendations for user Alice: Index(['Soap'], dtype='object', name='product_name')


In [None]:

# In collaborative filtering, recommendations are made based on the similarity between items. If 'Soap' was 
# recommended to Alice, it indicates that 'Soap' is considered similar to other items that Alice has 
# interacted with or liked.

# The recommendation is derived from the weighted average of ratings given by other users to 'Soap' and 
# the items that Alice has interacted with. In other words, users who liked items similar to the ones 
# Alice liked have also liked 'Soap' in the past.

# The specific reasons for the recommendation may vary depending on the patterns in the data. 'Soap' 
# could be similar to other personal care or household items that Alice has shown a preference for based 
# on the user-item interactions in the dataset.
 

In [4]:

# Content-Based Filtering recommends items to users based on the attributes or features of the 
# items and the user's preferences. It's different from collaborative filtering in that it focuses on the characteristics of items and the user's past interactions with items.


import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Sample movie dataset with genres
movies = pd.DataFrame({
    'movie_id': [1, 2, 3, 4, 5],
    'title': ['Movie A', 'Movie B', 'Movie C', 'Movie D', 'Movie E'],
    'genres': ['Action, Adventure', 'Drama, Romance', 'Action, Sci-Fi', 'Comedy', 'Drama, Sci-Fi']
})

# User preferences
user_preferences = {'Action': 5, 'Adventure': 4, 'Drama': 3, 'Sci-Fi': 4, 'Comedy': 2, 'Romance': 1}

# TF-IDF Vectorizer to convert genres into numerical vectors
tfidf_vectorizer = TfidfVectorizer()
genres_matrix = tfidf_vectorizer.fit_transform(movies['genres'])

# Calculate cosine similarity between movies based on genres
cosine_similarity_matrix = linear_kernel(genres_matrix, genres_matrix)

# Function to get content-based recommendations
def get_content_based_recommendations(movie_title, top_n=3):
    movie_index = movies[movies['title'] == movie_title].index[0]
    cosine_similarity_scores = list(enumerate(cosine_similarity_matrix[movie_index]))

    # Sort movies based on similarity scores
    sorted_movies = sorted(cosine_similarity_scores, key=lambda x: x[1], reverse=True)[1:]

    # Get top N recommendations
    top_recommendations = [(movies.iloc[idx]['title'], score) for idx, score in sorted_movies[:top_n]]

    return top_recommendations

# Example: Get content-based recommendations for a movie
selected_movie = 'Movie A'
content_based_recommendations = get_content_based_recommendations(selected_movie, top_n=3)

print(f"Top content-based recommendations for {selected_movie}: {content_based_recommendations}")


Top content-based recommendations for Movie A: [('Movie C', 0.3625261793170715), ('Movie B', 0.0), ('Movie D', 0.0)]


In [None]:

# Movies are represented by their titles and associated genres.

# TF-IDF Vectorization:
# The genres of each movie are converted into numerical vectors using the TF-IDF (Term Frequency-Inverse 
# Document Frequency) vectorizer. This process transforms the genres into a numerical representation that 
# reflects their importance in the entire dataset.

# Cosine Similarity Calculation:
# Cosine similarity is then calculated between the TF-IDF vectors of movies. Cosine similarity measures 
# the cosine of the angle between two non-zero vectors and provides a similarity score between 0 and 1. 
# A higher score indicates greater similarity.

#Recommendation Function:
# The get_content_based_recommendations function takes a movie title ('Movie A' in this case) and finds 
# its index in the movie dataset.
# It then retrieves the cosine similarity scores between 'Movie A' and all other movies.
# The movies are sorted based on their similarity scores in descending order.
# The top N recommendations are selected, excluding the input movie itself.

# Result Explanation:
# The result ('Movie C', 0.3625261793170715) indicates that 'Movie C' is the most similar to 'Movie A' 
# with a similarity score of approximately 0.36.
# The scores for 'Movie B' and 'Movie D' are both 0.0, suggesting that they are not similar to 'Movie A' 
# based on their genres.
    

In [None]:

# A few other types of recommendation systems include:
# Matrix Factorization
# Typical Use Cases:
#  Movie recommendations.
#  Book recommendations.
#  Recommendation in a sparse user-item interaction matrix.

# Deep Learning-based Recommenders
#  Personalized content recommendations.
#  E-commerce product recommendations.
#  Video recommendations in streaming platforms.

# Association Rule Mining 
# Typical Use Cases:
#  Recommending complementary products in e-commerce.
#  Menu suggestions in restaurants.
#  Cross-selling recommendations.

# Hybrid Recommender Systems
# Typical Use Cases:
#  Enhanced movie recommendations using both user preferences and movie attributes.
#  Hybrid e-commerce recommendations combining user behavior and product features.

# Context-Aware Recommenders
# Typical Use Cases:
#  Location-based recommendations.
#  Time-sensitive recommendations.
#  Device-specific recommendations.

# Bandit Algorithms
#  Online advertising recommendations.
#  Personalized news feed recommendations.
#  Dynamic pricing recommendations.
