In [156]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder
from sklearn.decomposition import TruncatedSVD
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

In [143]:
products = pd.read_csv('products.csv')

In [144]:
print(products.head())

   Id   Brand         Category     Type   Color Size  Price
0   1  Adidas    Men's Fashion    Dress   Black   XL     40
1   2     H&M  Women's Fashion    Shoes   Black    L     82
2   3  Adidas  Women's Fashion    Dress  Yellow   XL     44
3   4    Zara    Men's Fashion    Shoes   White    S     23
4   5  Adidas    Men's Fashion  T-shirt   Black    M     79


In [161]:
# One-Hot Encoding of categorical data
categorical_features = ['Brand', 'Category', 'Type', 'Color', 'Size']
encoder = OneHotEncoder()
encoded_features = encoder.fit_transform(products[categorical_features])
encoded_df = pd.DataFrame(encoded_features.toarray(), columns=encoder.get_feature_names_out(categorical_features))

# Combine with product ID and Price
product_vectors = pd.concat([products[['Id', 'Price']], encoded_df], axis=1)

print(product_vectors)

       Id         0         1         2         3         4         5  \
0       1  1.016298 -0.438497  0.699128  0.596664 -0.166325 -0.215168   
1       2  1.066204 -0.332802 -0.652976 -0.145217  0.185796  0.754863   
2       3  1.045318 -0.761346 -0.736471  0.413838 -0.186159 -0.420028   
3       4  1.108539 -0.293838  0.966454 -0.317575  0.434091 -0.057442   
4       5  1.033990 -0.227593  0.925274 -0.825005 -0.423071 -0.138410   
..    ...       ...       ...       ...       ...       ...       ...   
995   996  1.070255 -0.542435 -0.372960 -0.937580 -0.547990 -0.398467   
996   997  1.065053  0.926152 -0.246278  0.168690  0.065477  0.753570   
997   998  1.054590 -0.206431  0.832007  0.046202 -0.315185  1.093893   
998   999  1.067339 -0.493632 -0.336372 -0.464436  0.638075 -0.322161   
999  1000  1.113347 -0.480316 -0.687854 -0.359417 -0.159307  0.876721   

            6         7         8  ...        13        14        15  \
0   -0.472892  0.134314 -0.488986  ...  0.238188 -0

In [158]:
# Load the empty user interactions file
user_interactions_df = pd.read_csv('user_interactions.csv')

print("User Interactions Data (Initially Empty):")
print(user_interactions_df)

User Interactions Data (Initially Empty):
   userID  productID interaction
0       1          1     dislike
1       1          4     dislike
2       1          5     dislike
3       1          6        like
4       1          7     dislike
5       1          9        like
6       1         11        like


In [162]:
# Identify products user has interacted with
user_id = 1  # Assuming we're working with user ID 1
interacted_product_ids = user_interactions_df[user_interactions_df['userID'] == user_id]['productID'].unique()
print("Products User has Interacted With:\n", interacted_product_ids)

# Define user's category and price filters and exclude interacted products
user_category = "Men's Fashion"
price_lower = 30
price_upper = 60

filtered_products = products[
    (products['Category'].str.lower() == user_category.lower()) &
    (products['Price'] >= price_lower) &
    (products['Price'] <= price_upper) &
    (~products['Id'].isin(interacted_product_ids))  # Exclude interacted products
]

print("Filtered Products (excluding interacted):\n", filtered_products)

Products User has Interacted With:
 [ 1  4  5  6  7  9 11]
Filtered Products (excluding interacted):
       Id   Brand       Category     Type   Color Size  Price
14    15    Zara  Men's Fashion    Shoes  Yellow    M     54
19    20   Gucci  Men's Fashion    Dress   Green    L     53
20    21     H&M  Men's Fashion    Jeans  Yellow    S     56
42    43  Adidas  Men's Fashion  T-shirt   White    M     39
45    46    Zara  Men's Fashion    Dress  Yellow   XL     38
..   ...     ...            ...      ...     ...  ...    ...
964  965    Nike  Men's Fashion  Sweater  Yellow    S     55
970  971     H&M  Men's Fashion    Shoes   Green   XL     45
982  983    Zara  Men's Fashion    Dress     Red   XL     50
987  988     H&M  Men's Fashion  T-shirt    Blue   XL     56
997  998    Zara  Men's Fashion  Sweater   White    L     47

[110 rows x 7 columns]


In [164]:

# Reset index for proper concatenation
filtered_products_reset = filtered_products.reset_index(drop=True)

# Apply dimensionality reduction on filtered products
encoded_filtered_features = encoder.transform(filtered_products_reset[categorical_features])
reduced_filtered_features = svd.transform(encoded_filtered_features)
# print("Reduced Filtered Features Shape:", reduced_filtered_features.shape)

# Combine with filtered product ID
filtered_product_vectors_reduced = pd.concat(
    [filtered_products_reset[['Id']].reset_index(drop=True), pd.DataFrame(reduced_filtered_features)],
    axis=1
)
print(filtered_product_vectors_reduced)

      Id         0         1         2         3         4         5  \
0     15  1.085719 -0.343884  0.867678 -0.828459 -0.676487 -0.274307   
1     20  1.007616 -0.290483  0.470443 -0.013464  0.065301  0.979147   
2     21  1.088271 -0.284661  0.751642  0.231882  0.894288 -0.122524   
3     43  1.076649 -0.300025  0.906436 -0.767102 -0.625636  0.016953   
4     46  1.044272 -0.521553  0.705886  0.604566 -0.395126 -0.271297   
..   ...       ...       ...       ...       ...       ...       ...   
105  965  1.043617 -0.176747  0.826580  0.070319  0.830998 -0.180733   
106  971  1.064988 -0.318605  0.744719  0.582500 -0.077383 -0.190515   
107  983  1.017798 -0.500535  0.741792  0.644438 -0.241930 -0.176804   
108  988  1.057656 -0.274700  0.818139  0.720151 -0.162199 -0.187210   
109  998  1.054590 -0.206431  0.832007  0.046202 -0.315185  1.093893   

            6         7         8  ...        13        14        15  \
0   -0.244076 -0.523676  0.291518  ... -0.471031 -0.011577  0.4

In [149]:
# Function to create user preference vector
def get_preference_vector(user_id, interactions, product_vectors):
    user_interactions = interactions[interactions['userID'] == user_id]
    preference_vector = np.zeros(product_vectors.shape[1] - 1)  # Exclude 'Id'
    interaction_weights = {
        'like': 2,
        'dislike': -1,
        'add to cart': 4,
        'add to collection': 3,
        'bought': 5
    }
    for _, interaction in user_interactions.iterrows():
        product_id = interaction['productID']
        interaction_type = interaction['interaction']
        weight = interaction_weights.get(interaction_type, 0)
        product_vector = product_vectors[product_vectors['Id'] == product_id].drop(columns=['Id']).values.flatten()
        preference_vector += weight * product_vector
    return preference_vector

In [150]:
# Get user preference vector for user 1
if not user_interactions_df.empty:
    user_preference_vector = get_preference_vector(user_id, user_interactions_df, product_vectors_reduced)
    if user_preference_vector.size == 0:
        print("User Preference Vector is empty.")
else:
    user_preference_vector = None

# Debugging: Check the user preference vector
if user_preference_vector is not None:
    print("User Preference Vector Shape:", user_preference_vector.shape)
    print("User Preference Vector:\n", user_preference_vector)

[ 2. -1.  2.  0. -1.  0.  2.  0.  1. -1. -1.  2.  1. -2.  0.  2.  2. -2.
  2.  2. -1.  1.  0.]
User Preference Vector Shape: (23,)
User Preference Vector:
 [ 2. -1.  2.  0. -1.  0.  2.  0.  1. -1. -1.  2.  1. -2.  0.  2.  2. -2.
  2.  2. -1.  1.  0.]


In [151]:
# Function to recommend products based on cosine similarity
def recommend_products(user_preference_vector, filtered_product_vectors, top_n=10):
    if user_preference_vector is None:
        # Return top_n filtered products directly if no user interactions
        return filtered_product_vectors.head(top_n)
    
    product_matrix = filtered_product_vectors.drop(columns=['Id']).values
    
    # Check dimensions before cosine similarity
    if user_preference_vector.size != product_matrix.shape[1]:
        raise ValueError("User preference vector size does not match the product matrix.")
    
    similarities = cosine_similarity(user_preference_vector.reshape(1, -1), product_matrix)
    similarity_scores = similarities.flatten()
    product_indices = np.argsort(similarity_scores)[::-1]
    recommended_products = filtered_product_vectors.iloc[product_indices[:top_n]]
    return recommended_products

In [165]:
# Recommend top 10 products for user 1
recommended_products = recommend_products(user_preference_vector, filtered_product_vectors_reduced, top_n=10)
print("Recommended Products Vectors:\n", recommended_products)

# Convert recommended_products back to original format
recommended_products_ids = recommended_products['Id']
recommended_products_original = products[products['Id'].isin(recommended_products_ids)]

print("Recommended Products in Original Format:\n", recommended_products_original)

KeyError: "['Price'] not found in axis"