In [None]:

import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
from sklearn.decomposition import TruncatedSVD
from tqdm import tqdm

In [None]:

ratings_df = pd.read_parquet("sample_user_ratings.parquet")

print(ratings_df.head())
print(ratings_df.shape)


   rating  product_id                       user_id
0     5.0  B09TR3DQ8R  AHHSGIZIOTG5UB5E535ZSYS3ZAZA
1     5.0  B003I4F6NG  AGBU4666V6SPXZ5CU3BO6OF3OUHA
2     5.0  B078JYNSCR  AFPKQFRRTCUUI52KWYNWZDWLG37A
3     3.0  B08GKY6N3X  AE3C6BG455J24M3ZFG3LTOJ6NMQQ
4     5.0  B07T9NS94T  AFHPAUGWK56E2AE6J5BL7IERSY5Q
(100000, 3)


In [None]:

user_ids = ratings_df['user_id'].unique()
product_ids = ratings_df['product_id'].unique()

user_to_idx = {user: i for i, user in enumerate(user_ids)}
product_to_idx = {prod: i for i, prod in enumerate(product_ids)}

# Map the user and product IDs to indices
row_indices = ratings_df['user_id'].map(user_to_idx)
col_indices = ratings_df['product_id'].map(product_to_idx)
data = ratings_df['rating'].values

# Create a sparse user-item matrix in COO format
R = coo_matrix((data, (row_indices, col_indices)), shape=(len(user_ids), len(product_ids)))
print("Matrix Shape: ", R.shape)

Matrix Shape:  (45927, 69877)


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Convert to CSR format for efficient row slicing
R_csr = R.tocsr()

# Compute cosine similarity between products
item_similarity = cosine_similarity(R_csr.T)

# Convert to DataFrame for easy lookup
item_sim_df = pd.DataFrame(item_similarity, index=product_ids, columns=product_ids)




In [None]:
def get_similar_products(product_id, k=5):
    """Returns top-k similar products based on item similarity."""
    if product_id not in item_sim_df:
        return []
    return item_sim_df[product_id].nlargest(k + 1)[1:].index.tolist()  # Skip itself

# Example Usage
input_product = 'B003I4F6NG'  # Example product
print(input_product)
similar_products = get_similar_products(input_product, k=5)
print("Similar Products:", similar_products)

B003I4F6NG
Similar Products: ['B07PXKYVSP', 'B001CDO9QA', 'B09TR3DQ8R', 'B078JYNSCR', 'B08GKY6N3X']


In [None]:
product_descriptions_df = pd.read_parquet("sample_item_info.parquet")

print(product_descriptions_df.head())
print(product_descriptions_df.shape)

                                               title  product_id  \
0  Heaven - 100 Stick Bulk Pack of In-Scents Incense  B001OO1JF6   
1  unison gifts YJF-550 4 INCH Shark WATERGLOBE, ...  B01LFX64NM   
2  Home Office Armless Mesh Ergonomic Executive C...  B09QKQJ1PS   
3  AUFANK Canopy Bed with Sturday Metal Bed Frame...  B07RVPZL35   
4  The Paisley Box Personalized Tumbler/Custom Tu...  B094C3B5PF   

                                         description  \
0                                                 []   
1                          [4 INCH SHARK WATERGLOBE]   
2  [What is the difference between HHS office cha...   
3  [■  The hot spots of this metal bed are its si...   
4                                                 []   

                                              images  
0  {'hi_res': [None, None], 'large': ['https://m....  
1  {'hi_res': ['https://m.media-amazon.com/images...  
2  {'hi_res': ['https://m.media-amazon.com/images...  
3  {'hi_res': ['https://m.media-am

In [None]:
def get_product_description(product_id):
    product = product_descriptions_df[product_descriptions_df['product_id'] == product_id]
    if not product.empty:
      description = product.iloc[0]['description']
      if description is None or len(description) == 0:
        description = [""]

      return product.iloc[0]['title'] + ". " + description[0]
    return None
print(get_product_description('B003I4F6NG'))

None


In [None]:
import torch
from transformers import AutoModel, AutoTokenizer


# Load BLAIR model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("hyp1231/blair-roberta-large")
model = AutoModel.from_pretrained("hyp1231/blair-roberta-large")

In [None]:
def get_complementary_products_blair(input_product, similar_products, k=3):
    """Use BLAIR to find the most complementary products among similar ones."""

    input_desc = get_product_description(input_product)
    similar_descs = [get_product_description(pid) for pid in similar_products]

    if not input_desc or all(not desc for desc in similar_descs):
        return similar_products[:k]  # Default to first k if descriptions are missing

    # **New: Complementary instruction**
    context_prompt = ("I am looking for products that complement the following product, meaning they should be bought together.")

    # Create input texts for BLAIR
    texts = [context_prompt + " " + input_desc] + similar_descs
    inputs = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")

    with torch.no_grad():
        embeddings = model(**inputs, return_dict=True).last_hidden_state[:, 0]
        embeddings = embeddings / embeddings.norm(dim=1, keepdim=True)  # Normalize embeddings

    input_embedding = embeddings[0]
    similarity_scores = (input_embedding @ embeddings[1:].T).cpu().numpy()  # Cosine similarity

    # **New: Rank by lowest similarity to avoid redundancy and find complementary items**
    ranked_indices = np.argsort(similarity_scores)[::-1][:k]

    return [similar_products[i] for i in ranked_indices]

In [None]:
similar_products = ['B001OO1JF6','B01LFX64NM', 'B09QKQJ1PS','B07RVPZL35', 'B094C3B5PF' ]
print(get_complementary_products_blair('B09QKQJ1PS', similar_products, k=3))

['B09QKQJ1PS', 'B07RVPZL35', 'B01LFX64NM']


In [None]:
print( get_product_description('B09QKQJ1PS'))

print(get_product_description('B07RVPZL35'))
print(get_product_description('B01LFX64NM'))
# print(get_product_description('B01LFX64NM'))
# print(get_product_description('B09QKQJ1PS'))
# print(get_product_description('B07RVPZL35'))

Home Office Armless Mesh Ergonomic Executive Chair Adjustable Mid Back Chair for Women Small Modern Swivel Rolling Desk ,Task Chair with Wheels for Home, Office, Blue. What is the difference between HHS office chair and other office chairs Our company has been manufacturing office chairs for over 10 years and we have a professional company. Compared with other this office chair is Ergonomic design/ Comfortable/ Build to last This office chair is designed with ergonomic structure that provides ease and comfort. The office chair is made of selected materials for durability and provides perfect back support. When spending a lot of time in front of the computer, this office chair can make you feel comfortable and reduce pain. Mesh office chair, comfortable and not stuffy for long hours. Thickened foam provides great support. If you're looking for a comfortable and economical chair,this office chair is the first choice. You can also use the chair to watch movies and play games for hours, al