In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

# Load product data
products = pd.read_csv('products.csv')
print("Products:\n", products.head())

# One-Hot Encoding of categorical data
categorical_features = ['Brand', 'Category', 'Type', 'Color', 'Size']
encoder = OneHotEncoder()
encoded_features = encoder.fit_transform(products[categorical_features])

# Convert to sparse matrix
encoded_sparse = csr_matrix(encoded_features)

# Dimensionality reduction
n_components = len(encoder.get_feature_names_out(categorical_features))  # number of components equal to columns in one-hot encoded matrix
svd = TruncatedSVD(n_components=n_components)
reduced_features = svd.fit_transform(encoded_sparse)

# Combine with product ID for reference
product_vectors_reduced = pd.concat([products[['Id']].reset_index(drop=True), pd.DataFrame(reduced_features)], axis=1)
print("Product Vectors Reduced:\n", product_vectors_reduced.head())

# Load the user interactions file
user_interactions_df = pd.read_csv('user_interactions.csv')
print("User Interactions Data:\n", user_interactions_df)

# Identify products user has interacted with
user_id = 1  # Assuming we're working with user ID 1
interacted_product_ids = user_interactions_df[user_interactions_df['userID'] == user_id]['productID'].unique()
print("Products User has Interacted With:\n", interacted_product_ids)

# Define user's category and price filters and exclude interacted products
user_category = "Men's Fashion"
price_lower = 30
price_upper = 60

filtered_products = products[
    (products['Category'].str.lower() == user_category.lower()) &
    (products['Price'] >= price_lower) &
    (products['Price'] <= price_upper) &
    (~products['Id'].isin(interacted_product_ids))
]

print("Filtered Products (excluding interacted):\n", filtered_products.head())

# Reset index for proper concatenation
filtered_products_reset = filtered_products.reset_index(drop=True)

# Apply dimensionality reduction on filtered products
encoded_filtered_features = encoder.transform(filtered_products_reset[categorical_features])
reduced_filtered_features = svd.transform(encoded_filtered_features)
print("Reduced Filtered Features Shape:", reduced_filtered_features.shape)

# Combine with filtered product ID
filtered_product_vectors_reduced = pd.concat(
    [filtered_products_reset[['Id']].reset_index(drop=True), pd.DataFrame(reduced_filtered_features)],
    axis=1
)
print("Filtered Product Vectors Reduced:\n", filtered_product_vectors_reduced.head())

# Function to create user preference vector
def get_preference_vector(user_id, interactions, product_vectors):
    user_interactions = interactions[interactions['userID'] == user_id]
    preference_vector = np.zeros(product_vectors.shape[1] - 1)  # Exclude 'Id'
    interaction_weights = {
        'like': 2,
        'dislike': -1,
        'add to cart': 4,
        'add to collection': 3,
        'bought': 5
    }
    for _, interaction in user_interactions.iterrows():
        product_id = interaction['productID']
        interaction_type = interaction['interaction']
        weight = interaction_weights.get(interaction_type, 0)
        product_vector = product_vectors[product_vectors['Id'] == product_id].drop(columns=['Id']).values.flatten()
        preference_vector += weight * product_vector
    return preference_vector

# Get user preference vector for user 1
if not user_interactions_df.empty:
    user_preference_vector = get_preference_vector(user_id, user_interactions_df, product_vectors_reduced)
    if user_preference_vector.size == 0:
        print("User Preference Vector is empty.")
else:
    user_preference_vector = None

# Debugging: Check the user preference vector
if user_preference_vector is not None:
    print("User Preference Vector Shape:", user_preference_vector.shape)
    print("User Preference Vector:\n", user_preference_vector)

# Function to recommend products based on cosine similarity
def recommend_products(user_preference_vector, filtered_product_vectors, top_n=10):
    if user_preference_vector is None:
        # Return top_n filtered products directly if no user interactions
        return filtered_product_vectors.head(top_n)
    
    product_matrix = filtered_product_vectors.drop(columns=['Id']).values
    
    # Check dimensions before cosine similarity
    if user_preference_vector.size != product_matrix.shape[1]:
        raise ValueError("User preference vector size does not match the product matrix.")
    
    similarities = cosine_similarity(user_preference_vector.reshape(1, -1), product_matrix)
    similarity_scores = similarities.flatten()
    product_indices = np.argsort(similarity_scores)[::-1]
    recommended_products = filtered_product_vectors.iloc[product_indices[:top_n]]
    return recommended_products

# Recommend top 10 products for user 1
recommended_products = recommend_products(user_preference_vector, filtered_product_vectors_reduced, top_n=10)
print("Recommended Products Vectors:\n", recommended_products)

# Convert recommended_products back to original format
recommended_products_ids = recommended_products['Id']
recommended_products_original = products[products['Id'].isin(recommended_products_ids)]

print("Recommended Products in Original Format:\n", recommended_products_original)

Products:
    Id   Brand         Category     Type   Color Size  Price
0   1  Adidas    Men's Fashion    Dress   Black   XL     40
1   2     H&M  Women's Fashion    Shoes   Black    L     82
2   3  Adidas  Women's Fashion    Dress  Yellow   XL     44
3   4    Zara    Men's Fashion    Shoes   White    S     23
4   5  Adidas    Men's Fashion  T-shirt   Black    M     79
Product Vectors Reduced:
    Id         0         1         2         3         4         5         6  \
0   1  1.016298 -0.438497  0.699128  0.596664 -0.166325 -0.215168 -0.472892   
1   2  1.066204 -0.332802 -0.652976 -0.145217  0.185796  0.754863 -0.609598   
2   3  1.045318 -0.761346 -0.736471  0.413838 -0.186159 -0.420028 -0.433022   
3   4  1.108539 -0.293838  0.966454 -0.317575  0.434091 -0.057442 -0.800971   
4   5  1.033990 -0.227593  0.925274 -0.825005 -0.423071 -0.138410  0.137063   

          7         8  ...        13        14        15        16        17  \
0  0.134314 -0.488986  ...  0.238188 -0.004564  