In [None]:
%%capture
!pip install gdown
!gdown "1igAuIEW_4h_51BG1o05WS0Q0-Cp17_-t&confirm=t"
!unzip data


In [None]:
%%capture
!pip install -U fashion-clip

In [None]:
import sys
#sys.path.append("fashion-clip/")
from fashion_clip.fashion_clip import FashionCLIP
import pandas as pd
import numpy as np
from collections import Counter
from PIL import Image
import numpy as np
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.linear_model import LogisticRegression

In [None]:
%%capture
fclip = FashionCLIP('fashion-clip')

In [None]:
articles = pd.read_csv("data_for_fashion_clip/articles.csv")

# drop items that have the same description
subset = articles.drop_duplicates("detail_desc").copy()

# remove items of unkown category
subset = subset[~subset["product_group_name"].isin(["Unknown"])]

# FashionCLIP has a limit of 77 tokens, let's play it safe and drop things with more than 40 tokens
subset = subset[subset["detail_desc"].apply(lambda x : 4 < len(str(x).split()) < 40)]

# We also drop products types that do not occur very frequently in this subset of data
most_frequent_product_types = [k for k, v in dict(Counter(subset["product_type_name"].tolist())).items() if v > 10]
subset = subset[subset["product_type_name"].isin(most_frequent_product_types)]

# lots of data here, but we will just use only descriptions and a couple of other columns
subset.head(10)

Vectorization of Clothing Inputs

In [None]:
from PIL import Image
import torch

# Load FashionCLIP
from fashion_clip.fashion_clip import FashionCLIP
fclip = FashionCLIP('fashion-clip')


Connect to Chromadb

In [None]:
pip install chromadb

import chromadb

client = chromadb.Client()
collection = client.create_collection(name="clothing_items")

for idx, row in articles.iterrows():
    try:
        # Load and preprocess image
        image_path = f"images/{row['article_id']}.jpg"  # adjust path based on your folder structure
        image = Image.open(image_path).convert("RGB")

        # Generate image embedding
        embedding = fclip.encode_images([image], batch_size = 1)[0].tolist()  # Convert to list for ChromaDB

        # Construct metadata
        metadata = {
            "category": row["product_type_name"],
            "season": str(row.get("season", "unknown")),  # If season exists
            "gender": row.get("index_name", "unknown")
        }

        # Add to ChromaDB
        collection.add(
            embeddings=[embedding],
            documents=[row["detail_desc"]],
            metadatas=[metadata],
            ids=[f"item_{row['article_id']}"]
        )

    except Exception as e:
        print(f"Skipping item {row['article_id']} due to error: {e}")










Querying the db

In [None]:
# Search with an uploaded image
query_image = Image.open("user_upload.jpg").convert("RGB")
query_embedding = fclip.encode_images([query_image], batch_size = 1)[0].tolist()

results = collection.query(
    query_embeddings=[query_embedding],
    n_results=5,
    where={"season": "summer", "gender": "Ladieswear"}  # Optional metadata filtering
)

** If we Dont Have Image Data Avaiable, can also use encode_text...

In [None]:
from fashion_clip.fashion_clip import FashionCLIP
import chromadb
import pandas as pd

# Load FashionCLIP
fclip = FashionCLIP('fashion-clip')

# Load and filter your dataset (if not done already)
df = pd.read_csv("data_for_fashion_clip/articles.csv")
df = df.drop_duplicates("detail_desc")
df = df[~df["product_group_name"].isin(["Unknown"])]
df = df[df["detail_desc"].apply(lambda x: 4 < len(str(x).split()) < 40)]
most_frequent = df["product_type_name"].value_counts()[df["product_type_name"].value_counts() > 10].index
df = df[df["product_type_name"].isin(most_frequent)].copy()

# Initialize ChromaDB
client = chromadb.Client()
collection = client.get_or_create_collection(name="clothing_text_embeddings")

# Batch loop: Encode and insert into ChromaDB
for idx, row in df.iterrows():
    try:
        text = row["detail_desc"]
        text_embedding = fclip.encode_text([text], batch_size = 1)[0].tolist()

        metadata = {
            "category": row["product_type_name"],
            "group": row["product_group_name"],
            "gender": row["index_name"],
            "id": str(row["article_id"])
        }

        collection.add(
            embeddings=[text_embedding],
            documents=[text],
            metadatas=[metadata],
            ids=[f"text_{row['article_id']}"]
        )

    except Exception as e:
        print(f"Error embedding item {row['article_id']}: {e}")

In [None]:
#

import gradio as gr

def search_clothes(user_query):
    filters = parse_query(user_query)  # GPT or rule-based
    vector = get_embedding(user_query)
    results = search_db(vector, filters)
    return format_results(results)

gr.Interface(fn=search_clothes, inputs="text", outputs="html").launch()


AI Search Interface

In [None]:
import gradio as gr
from fashion_clip.fashion_clip import FashionCLIP
import chromadb

# Load everything
fclip = FashionCLIP('fashion-clip')
client = chromadb.Client()
collection = client.get_or_create_collection("clothing_text_embeddings")

In [None]:
# Dummy parser for now (can use GPT later)
def parse_query(text):
    text = text.lower()
    filters = {}
    if "summer" in text: filters["season"] = "summer"
    if "winter" in text: filters["season"] = "winter"
    if "dress" in text: filters["category"] = "Dress"
    return filters

# Search function
def search_clothes(user_query):
    try:
      # encodes our query into parsable, vector emebddings
        query_vector = fclip.encode_text([user_query], batch_size = 1)[0].tolist()
        # searches for known key words
        filters = parse_query(user_query)

        # Vector Search; searches for vectors closest to the query vector
        results = collection.query(
            query_embeddings=[query_vector],
            n_results=5,
            where=filters if filters else None
        )

        # Format results
        items = results["documents"][0]
        metadatas = results["metadatas"][0]
        display = ""
        for item, meta in zip(items, metadatas):
            display += f"<b>{meta.get('category', 'Item')}</b>: {item}<br><i>{meta}</i><br><br>"
        return display or "No results found."

    except Exception as e:
        return f"Error: {str(e)}"

# Launch app
gr.Interface(fn=search_clothes, inputs="text", outputs="html", title="AI Fashion Search").launch()
