In [1]:
import pandas as pd
import ollama
from sklearn.metrics.pairwise import cosine_similarity
import timeit

print("✅ Libraries imported successfully.")

✅ Libraries imported successfully.


In [2]:
# --- 1. Data Preparation ---

data = {
    "name": ["Boho Maxi Dress", "Streetwear Hoodie", "Minimalist Blazer", "Vintage Leather Jacket", "Cozy Knit Sweater", "High-Top Sneakers"],
    "desc": ["Flowy, with earthy tones and floral patterns for a free-spirited look.", "Oversized fit, bold graphic print, and heavyweight cotton for city streets.", "A clean, tailored cut in a neutral tone for a sharp, professional look.", "Distressed authentic leather with a timeless, iconic biker silhouette.", "A soft, warm cable-knit pullover for relaxing evenings by the fire.", "Classic design with vibrant color accents. The perfect urban footwear."]
}
products_df = pd.DataFrame(data)

print("--- Product Catalog ---")
products_df

--- Product Catalog ---


Unnamed: 0,name,desc
0,Boho Maxi Dress,"Flowy, with earthy tones and floral patterns f..."
1,Streetwear Hoodie,"Oversized fit, bold graphic print, and heavywe..."
2,Minimalist Blazer,"A clean, tailored cut in a neutral tone for a ..."
3,Vintage Leather Jacket,"Distressed authentic leather with a timeless, ..."
4,Cozy Knit Sweater,"A soft, warm cable-knit pullover for relaxing ..."
5,High-Top Sneakers,Classic design with vibrant color accents. The...


In [3]:
# --- 2. Embeddings Generation ---
print("Generating embeddings for all products... (this may take a moment)")

# This list comprehension calls Ollama for each description in our DataFrame
product_embeddings = [
    ollama.embeddings(model='nomic-embed-text', prompt=desc)['embedding']
    for desc in products_df['desc']
]

# Add the generated embeddings as a new column in our DataFrame
products_df['embeddings'] = product_embeddings

print("✅ Embeddings generated and added to DataFrame.")
products_df.head() # Display the first few rows to show the new column

Generating embeddings for all products... (this may take a moment)
✅ Embeddings generated and added to DataFrame.


Unnamed: 0,name,desc,embeddings
0,Boho Maxi Dress,"Flowy, with earthy tones and floral patterns f...","[-0.04384007304906845, 1.2307884693145752, -4...."
1,Streetwear Hoodie,"Oversized fit, bold graphic print, and heavywe...","[0.12464896589517593, 0.9056154489517212, -4.0..."
2,Minimalist Blazer,"A clean, tailored cut in a neutral tone for a ...","[0.7826835513114929, 0.12513747811317444, -3.7..."
3,Vintage Leather Jacket,"Distressed authentic leather with a timeless, ...","[0.8005531430244446, 0.21221484243869781, -3.9..."
4,Cozy Knit Sweater,"A soft, warm cable-knit pullover for relaxing ...","[-0.9872151017189026, 0.5584101676940918, -4.2..."


In [4]:
# --- 3. Vector Search Simulation Logic ---

def find_vibe_matches(query, df, top_n=3, score_threshold=0.6):
    """Finds the top N products matching the query vibe."""
    # Embed the user's query
    query_embedding = ollama.embeddings(model='nomic-embed-text', prompt=query)['embedding']
    
    # Get all product embeddings
    product_embeddings = list(df['embeddings'])
    
    # Calculate cosine similarity
    similarities = cosine_similarity([query_embedding], product_embeddings)[0]
    
    # Add scores and rank the results
    results_df = df.copy()
    results_df['score'] = similarities
    strong_matches = results_df[results_df['score'] >= score_threshold]
    
    if strong_matches.empty:
        return pd.DataFrame()

    return strong_matches.sort_values(by='score', ascending=False).head(top_n)

def display_results(query, matches_df):
    """Formats and prints the results nicely."""
    print("="*50)
    print(f"QUERY: '{query}'")
    if matches_df.empty:
        print("\nSorry, couldn't find a strong match for that vibe.")
    else:
        print("\nHere are your top matches!")
        for _, row in matches_df.iterrows():
            print(f"\n  - Name: {row['name']} (Score: {row['score']:.2f})")
            print(f"  - Description: {row['desc']}")
    print("="*50 + "\n")

print("✅ Core functions `find_vibe_matches` and `display_results` are now defined.")

✅ Core functions `find_vibe_matches` and `display_results` are now defined.


In [5]:
# --- 4. Test & Evaluation ---

print("--- Running Test Queries ---\n")

# Test Case 1: Good, specific match
query1 = "energetic urban chic"
matches1 = find_vibe_matches(query1, products_df)
display_results(query1, matches1)

# Test Case 2: Different vibe, should match the sweater
query2 = "cozy and relaxed for a quiet night in"
matches2 = find_vibe_matches(query2, products_df)
display_results(query2, matches2)

# Test Case 3: Edge case, should find no strong matches
query3 = "tropical beach vacation"
matches3 = find_vibe_matches(query3, products_df)
display_results(query3, matches3)

--- Running Test Queries ---

QUERY: 'energetic urban chic'

Here are your top matches!

  - Name: High-Top Sneakers (Score: 0.66)
  - Description: Classic design with vibrant color accents. The perfect urban footwear.

QUERY: 'cozy and relaxed for a quiet night in'

Here are your top matches!

  - Name: Cozy Knit Sweater (Score: 0.62)
  - Description: A soft, warm cable-knit pullover for relaxing evenings by the fire.

QUERY: 'tropical beach vacation'

Sorry, couldn't find a strong match for that vibe.



In [6]:
# --- Latency Test ---

# Use timeit to run the function multiple times and get an average
search_time = timeit.timeit(lambda: find_vibe_matches("a test query", products_df), number=10)
avg_latency_ms = (search_time / 10) * 1000

print(f"✅ Average search time is {avg_latency_ms:.2f} ms per query.")

✅ Average search time is 37.03 ms per query.


Project Reflection: Vibe Matcher

1. Improvement: Transitioning from Linear Scan to Real-Time Vector Search. Our current prototype uses a "linear scan" (calculating similarity against every item), which is perfect for a small catalog but fails at scale. The immediate next step is integrating a dedicated Vector Database like Pinecone, Weaviate, or Milvus. This would enable sub-second, high-throughput Approximate Nearest Neighbor (ANN) searches, allowing the system to handle millions of products and thousands of concurrent users with minimal latency, transforming it from a prototype into a production-ready service.

2. Improvement: Enhancing Semantic Nuance with Fine-Tuning. The pre-trained embedding model understands general language well, but it doesn't know the specific jargon of fashion. To elevate our matching quality, we would fine-tune an embedding model on a curated dataset of fashion descriptions, product tags, and user reviews. This process would teach the model the subtle differences between "chic," "boho," and "gorpcore," leading to far more accurate and intuitive "vibe" matches that feel tailor-made for the fashion domain.

3. Improvement: Evolving to Multi-Modal Search for an Intuitive UX. The future of product discovery isn't just text. The next evolution is a multi-modal system where a user can upload a photo from Instagram or a screenshot of an outfit. By using a model like CLIP, we can generate embeddings for both images and text in the same vector space. This would allow a user to search with an image, text, or a combination of both (e.g., "a dress like this, but in blue"), creating a truly seamless and powerful user experience.

4. Edge Cases: Handling Ambiguity and Intent. Our system successfully handles the "no strong match" case by using a similarity score threshold. However, a more complex challenge is query ambiguity. A user searching for "cool jacket" could mean a leather jacket, a streetwear hoodie, or a blazer. The system's limitation is that it provides a generic interpretation. A production system would address this with a personalization layer, using the user's past behavior and preferences to disambiguate the query and re-rank results to match their specific definition of "cool."