<a href="https://colab.research.google.com/github/gohilriddhi21/embeddings/blob/main/Multi_Modal_Embeddings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
# Import required libraries
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from sklearn.metrics.pairwise import cosine_similarity

In [24]:
# Step 1: Load Data
# Load product data
df = pd.read_csv('products.csv')  # CSV with columns: product_id, description, image_path, ratings
print("Product Data:")
print(df.head())

# Load user history data
user_history_df = pd.read_csv('user_history.csv')  # CSV with columns: user_id, product_id, rating, comment, likes
print("\nUser History Data:")
print(user_history_df.head())

Product Data:
   product_id                 description          image_path  ratings
0           1    Comfortable leather sofa   images/sofa1.jpeg      4.5
1           2  Modern wooden dining table  images/table1.jpeg      4.0
2           3      Soft cotton bed sheets  images/sheets1.jpg      4.7

User History Data:
   user_id  product_id  rating                 comment  likes
0        1           1     4.5   Very comfortable sofa      1
1        1           2     3.0  Table is sturdy but...      0
2        2           3     5.0  Love these bed sheets!      1


In [25]:
# Step 2: Generate Text Embeddings
# Load a pre-trained text embedding model
text_model = SentenceTransformer('all-MiniLM-L6-v2')

In [26]:
# Generate text embeddings for product descriptions
text_embeddings = text_model.encode(df['description'].tolist())
print("\nText Embeddings Shape:", text_embeddings.shape)



Text Embeddings Shape: (3, 384)


In [27]:
# Step 3: Generate Image Embeddings
# Load a pre-trained ResNet50 model for image embeddings
image_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')


def get_image_embedding(img_path):
    """Generate image embedding for a given image path."""
    img = image.load_img(img_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    return image_model.predict(img_data).flatten()

# Generate image embeddings for all product images
image_embeddings = np.array([get_image_embedding(img_path) for img_path in df['image_path']])
print("Image Embeddings Shape:", image_embeddings.shape)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step
Image Embeddings Shape: (3, 2048)


In [28]:

# Step 4: Combine Text and Image Embeddings
# Concatenate text and image embeddings
combined_embeddings = np.hstack((text_embeddings, image_embeddings))
print("\nCombined Embeddings Shape:", combined_embeddings.shape)



Combined Embeddings Shape: (3, 2432)


In [29]:

# Step 5: Create User Profile (Updated)
def create_user_profile(user_id, user_history_df):
    """Create a user profile based on their history."""
    user_data = user_history_df[user_history_df['user_id'] == user_id]

    # Average rating given by the user
    avg_rating = user_data['rating'].mean()

    # Combine all comments into a single text
    combined_comments = " ".join(user_data['comment'].dropna().tolist())

    # Generate text embedding for the combined comments (using the same text model)
    comment_embedding = text_model.encode(combined_comments)

    # Get product IDs of liked products
    liked_product_ids = user_data[user_data['likes'] == 1]['product_id'].tolist()

    return {
        'user_id': user_id,
        'avg_rating': avg_rating,
        'comment_embedding': comment_embedding,  # 384-dimensional
        'liked_product_ids': liked_product_ids
    }

# Step 6: Recommend Products for a Specific User (Updated)
def recommend_products(user_id, user_history_df, df, combined_embeddings, text_embeddings):
    """Recommend products for a given user_id."""
    # Create user profile
    user_profile = create_user_profile(user_id, user_history_df)

    # Get text embeddings for liked products
    liked_product_indices = [df[df['product_id'] == pid].index[0] for pid in user_profile['liked_product_ids']]
    liked_product_text_embeddings = text_embeddings[liked_product_indices]

    # Calculate the average text embedding of liked products
    liked_products_avg_text_embedding = np.mean(liked_product_text_embeddings, axis=0)

    # Combine user's comment embedding with liked products' average text embedding
    user_preference_embedding = 0.5 * user_profile['comment_embedding'] + 0.5 * liked_products_avg_text_embedding

    # Normalize the user preference embedding
    user_preference_embedding = user_preference_embedding / np.linalg.norm(user_preference_embedding)

    # Calculate cosine similarity between user preference and product text embeddings
    similarities = cosine_similarity([user_preference_embedding], text_embeddings).flatten()

    # Weight similarities by the user's average rating
    weighted_similarities = similarities * user_profile['avg_rating']

    # Get top 5 recommended product IDs
    top_5_indices = weighted_similarities.argsort()[-5:][::-1]
    recommended_products = df.iloc[top_5_indices]['product_id'].tolist()

    return recommended_products


In [30]:
# Step 7: Test the Function (Updated)
# Example: Get recommendations for user_id = 1
user_id = 1
recommended_products = recommend_products(user_id, user_history_df, df, combined_embeddings, text_embeddings)
print(f"\nRecommended Products for User {user_id}: {recommended_products}")

# Example: Get recommendations for user_id = 2
user_id = 2
recommended_products = recommend_products(user_id, user_history_df, df, combined_embeddings, text_embeddings)
print(f"\nRecommended Products for User {user_id}: {recommended_products}")



Recommended Products for User 1: [1, 2, 3]

Recommended Products for User 2: [3, 1, 2]
