#### 1. Load enriched data & model

In [1]:
import pandas as pd
from pathlib import Path

DATA_DIR = Path("../data")

ENRICHED_FILE = DATA_DIR / "interior_materials_enriched_20260219_1714.parquet" 

df = pd.read_parquet(ENRICHED_FILE)
print(f"Loaded {len(df)} rows with embeddings")
print("Columns:", df.columns.tolist())
display(df.head(3))

Loaded 299 rows with embeddings
Columns: ['material_name', 'price_etb', 'price_text', 'unit', 'last_checked', 'detail_url', 'category', 'source', 'scraped_at', 'unit_norm', 'keywords', 'text_for_embedding', 'embedding']


Unnamed: 0,material_name,price_etb,price_text,unit,last_checked,detail_url,category,source,scraped_at,unit_norm,keywords,text_for_embedding,embedding
0,Clear Glass - 3mm thick,1202.0,"1,202.00 Brper m2",m2,"May 29, 2024",,Finishing,2merkato.com,2026-02-19 16:44:00,square meter,"[clear, color:clear, size/thickness]",Clear Glass - 3mm thick Finishing square meter...,"[0.079825744, 0.03837128, -0.014964104, -0.011..."
1,Clear Glass - 4mm thick,1537.0,"1,537.00 Brper m2",m2,"May 29, 2024",,Finishing,2merkato.com,2026-02-19 16:44:00,square meter,"[clear, color:clear, size/thickness]",Clear Glass - 4mm thick Finishing square meter...,"[0.09515599, 0.03919774, -0.01531778, -0.00241..."
2,Clear Glass - 5mm thick,1767.0,"1,767.00 Brper m2",m2,"May 29, 2024",,Finishing,2merkato.com,2026-02-19 16:44:00,square meter,"[clear, color:clear, size/thickness]",Clear Glass - 5mm thick Finishing square meter...,"[0.08462944, 0.03924823, -0.015149601, 0.00537..."


#### 2. Recommender function

In [2]:
import torch  
import torch._prims 

from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

# Load model
model = SentenceTransformer("all-MiniLM-L6-v2")
print("Model loaded successfully.")

def recommend_materials(
    query_text: str,
    df: pd.DataFrame,
    top_k: int = 5,
    min_similarity: float = 0.4 
):
    if not query_text.strip():
        return pd.DataFrame()

    query_emb = model.encode([query_text], normalize_embeddings=True)[0]
    embeddings_array = np.stack(df["embedding"].values)
    similarities = cosine_similarity([query_emb], embeddings_array)[0]

    result_df = df.copy()
    result_df["similarity"] = similarities.round(3)

    result_df = result_df[result_df["similarity"] >= min_similarity]
    result_df = result_df.sort_values("similarity", ascending=False).head(top_k)

    cols = ["material_name", "category", "price_etb", "unit_norm", "similarity"]
    if "detail_url" in result_df.columns:
        cols.append("detail_url")

    return result_df[cols]



Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Model loaded successfully.


#### 3. Test the Recommender

In [3]:
test_queries = [
    "modern blue tiles under 2000 ETB",
    "white marble for kitchen counter",
    "frosted glass partition",
    "red paint for bedroom wall",
    "cheap sanitary ware for bathroom",
    "galvanized metal sheet for ceiling",
    "clear glass 5mm thick",
    "electrical switch for home"
]

print("Testing recommender with lowered threshold (0.4)\n")

for q in test_queries:
    print(f"\n{'='*60}")
    print(f"Query: {q}")
    print(f"{'-'*60}")
    
    results = recommend_materials(
        query_text=q,
        df=df,
        top_k=8,
        min_similarity=0.4
    )
    
    if results.empty:
        print("  No matches found above 0.4 similarity.")
        print("  → Suggestions: Try a more general query or lower min_similarity to 0.35")
    else:
        display_df = results.copy()
        display_df["price_etb"] = display_df["price_etb"].apply(lambda x: f"{x:,.0f} ETB" if pd.notna(x) else "N/A")
        display_df["similarity"] = display_df["similarity"].round(3)
        
        display(display_df[["material_name", "category", "price_etb", "unit_norm", "similarity", "detail_url"]])
        
        print(f"  Top match: {display_df.iloc[0]['material_name']} (score: {display_df.iloc[0]['similarity']})")
        print(f"  Total matches: {len(display_df)}")

Testing recommender with lowered threshold (0.4)


Query: modern blue tiles under 2000 ETB
------------------------------------------------------------


Unnamed: 0,material_name,category,price_etb,unit_norm,similarity,detail_url
26,Classic 2000+ Tint colors,Sanitary,642 ETB,unknown,0.458,
157,PVC Y: φ50 double ring,Tiles & Ceramics,408 ETB,piece,0.454,
158,PVC reducer: 110-50 double ring,Tiles & Ceramics,408 ETB,piece,0.45,
15,Reflective Glass - 5mm thick ocean blue ref.,Finishing,"1,967 ETB",square meter,0.436,
187,Teflon,Tiles & Ceramics,90 ETB,piece,0.435,
159,PVC T: φ110 double ring,Tiles & Ceramics,942 ETB,piece,0.434,
138,Water Closet: high level flashing (Turkish type),Tiles & Ceramics,"4,500 ETB",piece,0.433,
149,Water tanker: 2000lt fiber glass,Tiles & Ceramics,"12,200 ETB",piece,0.432,


  Top match: Classic 2000+ Tint colors (score: 0.4580000042915344)
  Total matches: 8

Query: white marble for kitchen counter
------------------------------------------------------------


Unnamed: 0,material_name,category,price_etb,unit_norm,similarity,detail_url
36,Texture Paint (Quartz) Super White,Sanitary,109 ETB,unknown,0.519,
27,Ecosilk Artique Special Pearl paint,Sanitary,882 ETB,kilogram,0.47,
22,Quartz Paint 200 (16lit or 25kg),Sanitary,"1,550 ETB",unknown,0.451,
144,"Kitchen sink: double bowel, 50 x 120 (Milano)",Tiles & Ceramics,"5,500 ETB",piece,0.406,
136,Water Closet: low flash (Tabor Ceramic),Tiles & Ceramics,"26,185 ETB",piece,0.404,
143,"Kitchen sink: double bowel, 50 x 120 (Aqua)",Tiles & Ceramics,"3,700 ETB",piece,0.4,


  Top match: Texture Paint (Quartz) Super White (score: 0.5189999938011169)
  Total matches: 6

Query: frosted glass partition
------------------------------------------------------------


Unnamed: 0,material_name,category,price_etb,unit_norm,similarity,detail_url
11,Frosted Glass - 6mm thick,Finishing,"2,254 ETB",square meter,0.562,
8,Frosted Glass - 3mm thick,Finishing,"1,967 ETB",square meter,0.557,
9,Frosted Glass - 4mm thick,Finishing,"1,120 ETB",square meter,0.557,
10,Frosted Glass - 5mm thick,Finishing,"2,063 ETB",square meter,0.539,


  Top match: Frosted Glass - 6mm thick (score: 0.5619999766349792)
  Total matches: 4

Query: red paint for bedroom wall
------------------------------------------------------------


Unnamed: 0,material_name,category,price_etb,unit_norm,similarity,detail_url
60,Epoxy Wall Paint (3KG),Sanitary,800 ETB,kilogram,0.474,
61,Epoxy Wall Paint Hardener,Sanitary,850 ETB,kilogram,0.41,


  Top match: Epoxy Wall Paint (3KG) (score: 0.4740000069141388)
  Total matches: 2

Query: cheap sanitary ware for bathroom
------------------------------------------------------------


Unnamed: 0,material_name,category,price_etb,unit_norm,similarity,detail_url
144,"Kitchen sink: double bowel, 50 x 120 (Milano)",Tiles & Ceramics,"5,500 ETB",piece,0.486,
142,"Kitchen sink: single bowel, 50 x 120",Tiles & Ceramics,"6,000 ETB",piece,0.483,
143,"Kitchen sink: double bowel, 50 x 120 (Aqua)",Tiles & Ceramics,"3,700 ETB",piece,0.482,
38,Budget Synthetic Enamel,Sanitary,295 ETB,kilogram,0.476,
24,Budget Emulsion paint,Sanitary,93 ETB,unknown,0.471,
31,Premium Acyrylic Wall Putty,Sanitary,97 ETB,unknown,0.463,
26,Classic 2000+ Tint colors,Sanitary,642 ETB,unknown,0.458,
21,Plastic Paint,Sanitary,300 ETB,unknown,0.449,


  Top match: Kitchen sink: double bowel, 50 x 120 (Milano) (score: 0.4860000014305115)
  Total matches: 8

Query: galvanized metal sheet for ceiling
------------------------------------------------------------


Unnamed: 0,material_name,category,price_etb,unit_norm,similarity,detail_url
239,G32 Corrugated Galvanized Iron Sheet - KOSPI,Roofing & Ceiling,"1,100 ETB",piece,0.685,
238,G32 Corrugated Galvanized Iron Sheet - Akaki,Roofing & Ceiling,"1,185 ETB",piece,0.682,
236,G28 Corrugated Galvanized Iron Sheet - KOSPI,Roofing & Ceiling,"1,250 ETB",piece,0.673,
237,G30 Corrugated Galvanized Iron Sheet - Akaki,Roofing & Ceiling,"1,275 ETB",piece,0.666,
235,G28 Corrugated Galvanized Iron Sheet - Akaki,Roofing & Ceiling,"1,335 ETB",piece,0.658,
294,Metal Flashing-Galvanized Sheet G-28 for Gutte...,Roofing & Ceiling,996 ETB,piece,0.654,
241,G35 Corrugated Galvanized Iron Sheet - KOSPI,Roofing & Ceiling,900 ETB,piece,0.653,
240,G35 Corrugated Galvanized Iron Sheet - Akaki,Roofing & Ceiling,"1,000 ETB",piece,0.643,


  Top match: G32 Corrugated Galvanized Iron Sheet - KOSPI (score: 0.6850000023841858)
  Total matches: 8

Query: clear glass 5mm thick
------------------------------------------------------------


Unnamed: 0,material_name,category,price_etb,unit_norm,similarity,detail_url
2,Clear Glass - 5mm thick,Finishing,"1,767 ETB",square meter,0.739,
1,Clear Glass - 4mm thick,Finishing,"1,537 ETB",square meter,0.719,
3,Clear Glass - 6mm thick,Finishing,"2,495 ETB",square meter,0.714,
0,Clear Glass - 3mm thick,Finishing,"1,202 ETB",square meter,0.704,
18,Tinted Glass - 5mm thick,Finishing,"1,680 ETB",square meter,0.7,
19,Tinted Glass - 6mm thick,Finishing,"2,349 ETB",square meter,0.683,
17,Tinted Glass - 4mm thick,Finishing,"1,490 ETB",square meter,0.666,
6,Clear and Colored Glass: 6mm,Finishing,"1,350 ETB",square meter,0.665,


  Top match: Clear Glass - 5mm thick (score: 0.7390000224113464)
  Total matches: 8

Query: electrical switch for home
------------------------------------------------------------


Unnamed: 0,material_name,category,price_etb,unit_norm,similarity,detail_url
81,Single switch,Electrical,540 ETB,piece,0.52,
83,Single switch with bell,Electrical,650 ETB,piece,0.49,
82,Double switch,Electrical,400 ETB,piece,0.473,
84,Double switch with bell,Electrical,425 ETB,piece,0.431,


  Top match: Single switch (score: 0.5199999809265137)
  Total matches: 4
