In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# 1. Load the mock dataset
df = pd.read_json("../data/mock_artwork_metadata.json")
print("Loaded dataset")
df.head()

# 2. Initialize the text encoder (using a pretrained model from HuggingFace)
model_name = "sentence-transformers/all-MiniLM-L6-v2"
encoder = SentenceTransformer(model_name)
print(f"Loaded encoder model: {model_name}")

# 3. Create text representations (embeddings) for each artwork
texts_to_encode = (
    df['title'] + " - " + 
    df['artist'] + " - " + 
    df['description'] + " - " + 
    df['style']
).tolist()

embeddings = encoder.encode(texts_to_encode, convert_to_numpy=True, show_progress_bar=True)
print(f"Encoded {len(embeddings)} artworks")

# 4. Build a FAISS index for fast retrieval
dimension = embeddings.shape[1]  # size of embedding vectors
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
print(f"FAISS index built with {index.ntotal} items")

# 5. Define a search function
def search_artworks(query, top_k=3):
    query_embedding = encoder.encode([query], convert_to_numpy=True)
    distances, indices = index.search(query_embedding, top_k)
    results = df.iloc[indices[0]]
    return results

# 6. Test the search
user_query = "paintings about emotional night skies"
results = search_artworks(user_query, top_k=3)

print(f"\n🔎 Results for query: '{user_query}'\n")
for idx, row in results.iterrows():
    print(f"{row['title']} by {row['artist']} ({row['year']})")
    print(f"   Style: {row['style']}")
    print(f"   Description: {row['description']}")
    print()

  from .autonotebook import tqdm as notebook_tqdm


✅ Loaded dataset


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


✅ Loaded encoder model: sentence-transformers/all-MiniLM-L6-v2


Batches: 100%|████████████████████████████████████| 1/1 [00:02<00:00,  2.46s/it]


✅ Encoded 5 artworks
✅ FAISS index built with 5 items

🔎 Results for query: 'paintings about emotional night skies'

🎨 The Starry Night by Vincent van Gogh (1889)
   Style: Post-Impressionism
   Description: A swirling night sky over a quiet village, representing emotional turbulence and wonder.

🎨 Girl with a Pearl Earring by Johannes Vermeer (1665)
   Style: Baroque
   Description: A portrait of a girl turning towards the viewer, characterized by soft light and mysterious gaze.

🎨 Composition VIII by Wassily Kandinsky (1923)
   Style: Abstract Art
   Description: A complex composition of geometric shapes and lively colors, representing a symphony of abstract forms.



In [None]:
#Step	Action
#1	Loads your mock dataset from the JSON.
#2	Loads a lightweight sentence-transformer (MiniLM model).
#3	Encodes all artworks into vector embeddings.
#4	Builds a FAISS index to search artworks efficiently.
#5	Defines a simple search function you can use anytime.
#6	Runs a sample search query to retrieve artworks!