In [18]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model

# Load pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
model = Model(inputs=base_model.input, outputs=base_model.output)

# Load product data
products = pd.read_csv('products.csv')
print("Products:\n", products.head())

# One-Hot Encoding of categorical data
categorical_features = ['Brand', 'Category', 'Type', 'Color', 'Size']
encoder = OneHotEncoder()
encoded_features = encoder.fit_transform(products[categorical_features])

# Convert to sparse matrix
encoded_sparse = csr_matrix(encoded_features)

# Dimensionality reduction
n_components = len(encoder.get_feature_names_out(categorical_features))  # number of components equal to columns in one-hot encoded matrix
svd = TruncatedSVD(n_components=n_components)
reduced_features = svd.fit_transform(encoded_sparse)

# Combine with product ID for reference
product_vectors_reduced = pd.concat([products[['Id']].reset_index(drop=True), pd.DataFrame(reduced_features)], axis=1)
print("Product Vectors Reduced:\n", product_vectors_reduced.head())

# Load the user interactions file
user_interactions_df = pd.read_csv('user_interactions.csv')
print("User Interactions Data:\n", user_interactions_df)

# Mapping interaction types to numeric values
interaction_weights = {
    'like': 2,
    'dislike': -1,
    'add to cart': 4,
    'add to collection': 3,
    'bought': 5
}

user_interactions_df['interaction_numeric'] = user_interactions_df['interaction'].map(interaction_weights)
print("User Interactions with Numeric Mapping:\n", user_interactions_df)

# Identify products user has interacted with
user_id = 1  
interacted_product_ids = user_interactions_df[user_interactions_df['userID'] == user_id]['productID'].unique()
print("Products User has Interacted With:\n", interacted_product_ids)

# Define user's category and price filters and exclude interacted products
user_category = "Men's Fashion"
price_lower = 30
price_upper = 60

filtered_products = products[
    (products['Category'].str.lower() == user_category.lower()) &
    (products['Price'] >= price_lower) &
    (products['Price'] <= price_upper) &
    (~products['Id'].isin(interacted_product_ids))
]

print("Filtered Products (excluding interacted):\n", filtered_products.head())

# Reset index for proper concatenation
filtered_products_reset = filtered_products.reset_index(drop=True)

# Apply dimensionality reduction on filtered products
encoded_filtered_features = encoder.transform(filtered_products_reset[categorical_features])
reduced_filtered_features = svd.transform(encoded_filtered_features)
print("Reduced Filtered Features Shape:", reduced_filtered_features.shape)

# Combine with filtered product ID
filtered_product_vectors_reduced = pd.concat(
    [filtered_products_reset[['Id']].reset_index(drop=True), pd.DataFrame(reduced_filtered_features)],
    axis=1
)
print("Filtered Product Vectors Reduced:\n", filtered_product_vectors_reduced.head())

# Function to create user preference vector
def get_preference_vector(user_id, interactions, product_vectors):
    user_interactions = interactions[interactions['userID'] == user_id]
    preference_vector = np.zeros(product_vectors.shape[1] - 1)  # Exclude 'Id'
    interaction_weights = {
        'like': 2,
        'dislike': -1,
        'add to cart': 4,
        'add to collection': 3,
        'bought': 5
    }
    for _, interaction in user_interactions.iterrows():
        product_id = interaction['productID']
        interaction_type = interaction['interaction']
        weight = interaction_weights.get(interaction_type, 0)
        product_vector = product_vectors[product_vectors['Id'] == product_id].drop(columns=['Id']).values.flatten()
        preference_vector += weight * product_vector
    return preference_vector

# Get user preference vector for user 1
if not user_interactions_df.empty:
    user_preference_vector = get_preference_vector(user_id, user_interactions_df, product_vectors_reduced)
    if user_preference_vector.size == 0:
        print("User Preference Vector is empty.")
else:
    user_preference_vector = None

# Debugging: Check the user preference vector
if user_preference_vector is not None:
    print("User Preference Vector Shape:", user_preference_vector.shape)
    print("User Preference Vector:\n", user_preference_vector)

# Function to recommend products based on cosine similarity (Content-Based)
def recommend_content_based(user_preference_vector, filtered_product_vectors, top_n):
    if user_preference_vector is None:
        # Return top_n filtered products directly if no user interactions
        return filtered_product_vectors.head(top_n)
    
    product_matrix = filtered_product_vectors.drop(columns=['Id']).values
    
    # Check dimensions before cosine similarity
    if user_preference_vector.size != product_matrix.shape[1]:
        raise ValueError("User preference vector size does not match the product matrix.")
    
    similarities = cosine_similarity(user_preference_vector.reshape(1, -1), product_matrix)
    similarity_scores = similarities.flatten()
    product_indices = np.argsort(similarity_scores)[::-1]
    recommended_products = filtered_product_vectors.iloc[product_indices[:top_n]]
    return recommended_products

# Create user-item interaction matrix for collaborative filtering using numeric interactions
user_item_matrix = user_interactions_df.pivot(index='userID', columns='productID', values='interaction_numeric').fillna(0)
print("User-Item Interaction Matrix:\n", user_item_matrix)

user_item_sparse = csr_matrix(user_item_matrix.values)

# Compute user similarity matrix
user_similarity = cosine_similarity(user_item_sparse)

# Function to recommend products based on collaborative filtering
def recommend_collaborative(user_id, user_item_matrix, user_similarity, top_n):
    if user_id not in user_item_matrix.index:
        print(f"User {user_id} not found in collaborative filtering data.")
        return pd.Index([])  # Return empty Index if user not found
    
#     print("collaborative working")
    
    user_index = user_item_matrix.index.get_loc(user_id)
    user_similarities = user_similarity[user_index]
    
    # Compute scores for all products
    scores = user_item_matrix.T.dot(user_similarities)
    
#     print(scores)
    
    # Exclude already interacted products
    interacted_products = user_interactions_df[user_interactions_df['userID'] == user_id]['productID'].unique()
    scores = pd.Series(scores, index=user_item_matrix.columns).drop(interacted_products)
    
    # Recommend top_n products
    recommended_product_ids = scores.nlargest(top_n).index
#     print(recommended_product_ids)
    return recommended_product_ids

# Function to recommend popular products based on filtered products
def recommend_popular_filtered_products(filtered_product_ids, user_item_matrix, top_n):
    # Ensure filtered_product_ids are in the user_item_matrix columns
    valid_product_ids = [pid for pid in filtered_product_ids if pid in user_item_matrix.columns]
    
    if not valid_product_ids:
        print("No valid filtered product IDs found in the user-item matrix. Falling back to popular products globally.")
        return recommend_popular_products(user_item_matrix, top_n)
    
    # Calculate weighted popularity score for each valid product
    filtered_interactions = user_item_matrix[valid_product_ids]
    weighted_scores = filtered_interactions.sum(axis=0)
    
#     print(weighted_scores)
    
    # Exclude products with negative total scores (universally disliked)
    positive_scores = weighted_scores[weighted_scores > 0]
    
    # Sort scores in descending order and get top_n products
    sorted_scores = positive_scores.sort_values(ascending=False)
#     print(sorted_scores)
    popular_product_ids = sorted_scores.nlargest(top_n).index
    print(popular_product_ids)
    
    return popular_product_ids

# Function to combine content-based and collaborative filtering recommendations
def hybrid_recommendations(user_id, user_item_matrix, user_similarity, filtered_product_vectors, user_preference_vector, top_cb=7, top_cf=3):
    # Get the IDs of the filtered products
    filtered_product_ids = filtered_product_vectors['Id']
    
    # Check if collaborative filtering can be applied
    if user_id in user_item_matrix.index:
        # Get collaborative filtering recommendations
        cf_product_ids = recommend_collaborative(user_id, user_item_matrix, user_similarity, top_cf)
    else:
        # Recommend popular products within the filtered products for new users
        cf_product_ids = recommend_popular_filtered_products(filtered_product_ids, user_item_matrix, top_cf+3) # 6 popular products for new user that lie in their strict filters
    
    # Get content-based recommendations for remaining slots or full if CF not available
    top_cb_needed = top_cb + (top_cf - len(cf_product_ids))
    
    print("CF product ids - ", cf_product_ids)
    
    cb_recommendations = recommend_content_based(user_preference_vector, filtered_product_vectors, top_cb_needed)
    cb_product_ids = cb_recommendations['Id'].values
    
    print("CB product ids - ", pd.Index(cb_product_ids))
    
    # Combine and deduplicate
    combined_ids = pd.Index(cf_product_ids).append(pd.Index(cb_product_ids)).unique()
    
    print(combined_ids)
    
    # Convert combined_ids to a list to preserve order
    combined_ids_list = list(combined_ids)
    
    # Fetch and combine results, maintaining the order of combined_ids
    combined_recommendations = products.set_index('Id').loc[combined_ids_list].reset_index()
    
#     return combined_recommendations.head(10)
    return combined_recommendations.head(top_cb + top_cf)

# Get combined recommendations for user_id
combined_recommendations = hybrid_recommendations(user_id, user_item_matrix, user_similarity, filtered_product_vectors_reduced, user_preference_vector)
print("Combined Recommended Products:\n", combined_recommendations)

Products:
    Id   Brand         Category     Type   Color Size  Price
0   1  Adidas    Men's Fashion    Dress   Black   XL     40
1   2     H&M  Women's Fashion    Shoes   Black    L     82
2   3  Adidas  Women's Fashion    Dress  Yellow   XL     44
3   4    Zara    Men's Fashion    Shoes   White    S     23
4   5  Adidas    Men's Fashion  T-shirt   Black    M     79
Product Vectors Reduced:
    Id         0         1         2         3         4         5         6  \
0   1  1.016298 -0.438497  0.699128  0.596664 -0.166325 -0.215168 -0.472892   
1   2  1.066204 -0.332802 -0.652976 -0.145217  0.185796  0.754863 -0.609598   
2   3  1.045318 -0.761346 -0.736471  0.413838 -0.186159 -0.420028 -0.433022   
3   4  1.108539 -0.293838  0.966454 -0.317575  0.434091 -0.057442 -0.800971   
4   5  1.033990 -0.227593  0.925274 -0.825005 -0.423071 -0.138410  0.137063   

          7         8  ...        13        14        15        16        17  \
0  0.134314 -0.488986  ... -0.238188 -0.004564 -