# Data Preprocessing

In [1]:
import pandas as pd

# Load data
articles = pd.read_csv("articles.csv")
customers = pd.read_csv("customers.csv")
transactions = pd.read_csv("transactions_train.csv")

# Merge transactions with articles to get purchase details
purchases = transactions.merge(articles, on="article_id")

# Aggregate purchase history for each customer
customer_history = purchases.groupby("customer_id").agg(
    top_colors=("colour_group_name", lambda x: x.mode()[0]),
    top_garments=("garment_group_name", lambda x: x.mode()[0]),
    purchase_count=("article_id", "count")
).reset_index()

# Merge with customer metadata
customers = customers.merge(customer_history, on="customer_id", how="left")

# Clean data (handle missing age/history)
customers["age"] = customers["age"].fillna(customers["age"].median())
customers["top_colors"] = customers["top_colors"].fillna("unknown")
customers["top_garments"] = customers["top_garments"].fillna("unknown")

# Generate embeddings for customers and articles

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

# Customer descriptions (e.g., "Age 28, club member: ACTIVE. Often buys: Black Jeans")
customers["text"] = customers.apply(
    lambda row: f"Age {int(row['age'])}, club member: {row['club_member_status']}. " \
                f"Frequently purchases: {row['top_colors']} {row['top_garments']}.",
    axis=1
)

# Article descriptions (e.g., "Black Jeans, Casual Bottom. Details: Stretchable denim...")
articles["text"] = articles.apply(
    lambda row: f"{row['colour_group_name']} {row['product_type_name']}, " \
                f"{row['garment_group_name']}. Details: {row['detail_desc']}",
    axis=1
)

# Generate embeddings
customer_embeddings = model.encode(customers["text"].tolist())
article_embeddings = model.encode(articles["text"].tolist())

In [3]:
print(customer_embeddings)
print()
print(article_embeddings)

[[-0.02797653  0.07051973 -0.05869647 ... -0.05855928 -0.10431531
  -0.02881365]
 [-0.027063    0.01821919 -0.01693011 ... -0.13709706 -0.06905062
   0.02507331]
 [-0.03677861  0.02284298 -0.04318574 ... -0.11188638 -0.05914948
  -0.02920037]
 ...
 [-0.03673286  0.06444747 -0.0497928  ... -0.05970166 -0.09897787
  -0.02091572]
 [-0.03069127  0.00627806 -0.0019699  ... -0.10621992 -0.05224483
   0.00457215]
 [-0.02795942  0.00421076 -0.06229747 ... -0.10586537 -0.05249738
  -0.01472464]]

[[-0.04593923  0.11852942 -0.02479955 ... -0.01646774 -0.07933109
   0.0530654 ]
 [-0.02130283  0.12884027 -0.00710012 ...  0.01478944 -0.05145642
   0.04764028]
 [-0.02877827  0.1323706  -0.00681566 ...  0.01599214 -0.07031216
   0.04941762]
 ...
 [-0.07343706  0.11924972 -0.01072632 ... -0.01446385 -0.06523024
  -0.04872168]
 [-0.12494753  0.0708767   0.07486433 ... -0.08998467 -0.03891225
   0.0947194 ]
 [ 0.00733496  0.09287497  0.03617004 ... -0.06707434 -0.02687085
  -0.08259148]]


# Build FAISS Index for Articles

In [4]:
import faiss
import numpy as np

# Convert to FAISS-compatible format
article_embeddings = np.array(article_embeddings).astype("float32")

# Create FAISS index
dimension = article_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(article_embeddings)

# Recommendation Function

In [7]:
def recommend_clothes(customer_id, top_k=5):
    # Check if customer exists
    customer_data = customers[customers["customer_id"] == customer_id]
    if customer_data.empty:
        return f"No customer found with ID: {customer_id}"
    # Get customer embedding
    customer_idx = customers[customers["customer_id"] == customer_id].index[0]
    query_embedding = customer_embeddings[customer_idx].reshape(1, -1)
    
    # Search FAISS
    distances, indices = index.search(query_embedding, top_k)
    
    # Get recommended articles
    recommendations = articles.iloc[indices[0]][["article_id", "text"]]
    return recommendations

# Example usage
print(recommend_clothes("00000dbacae5abe5e23885899a1fa44253a17956c6d1c3d25f88aa139fdfc657"))

       article_id                                               text
85655   820541001  Black Top, Jersey Fancy. Details: Fitted body ...
5621    497741001  Black T-shirt, Jersey Fancy. Details: T-shirt ...
11793   549850001  Black T-shirt, Jersey Fancy. Details: Heavily ...
6462    506166005  Black T-shirt, Jersey Fancy. Details: Short-sl...
6477    506166058  Black T-shirt, Jersey Fancy. Details: Short-sl...
