# Query Wikiart

In [1]:
import pandas as pd
from datasets import load_dataset
from langchain.chains import ConversationalRetrievalChain
from langchain.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama
from typing import List

In [2]:
# Step 1: Load and prepare the dataset
def load_art_dataset(dataset_name: str, subset_size: int = None):
    """
    Load the dataset and prepare it for processing
    """
    dataset = load_dataset(dataset_name)
    df = dataset['train'].to_pandas()
    
    if subset_size:
        df = df.head(subset_size)
    
    # Create combined text field for embeddings
    df['combined_text'] = df.apply(
        # santize the `style` field, e.g. art-nouveau-modern > art nouveau modern
        lambda x: f"Description: {x['text']}\nStyle: {x['style'].replace('-', ' ')}", 
        axis=1
    )
    
    return dataset, df

# Step 2: Set up the LLM and embeddings
def setup_llm_and_embeddings():
    """
    Set up language model and embeddings
    """
    embeddings = OllamaEmbeddings(model="llama3.2")
    llm = ChatOllama(
        model="llama3.2",
        temperature=0.8,
    )
    
    return llm, embeddings

# Step 3: Create the vector store
def create_vectorstore(df: pd.DataFrame, embeddings):
    """
    Create and populate the vector store
    """
    texts = df['combined_text'].tolist()
    metadatas = [
        {'id': str(i), 'style': style} 
        for i, style in enumerate(df['style'])
    ]
    
    vectorstore = Chroma.from_texts(
        texts=texts,
        embedding=embeddings,
        metadatas=metadatas,
        persist_directory="./chroma"
    )
    
    return vectorstore

# Step 4: Create the QA chain
def setup_qa_chain(llm, vectorstore):
    """
    Set up the question-answering chain
    """
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(
            search_kwargs={"k": 5}
        ),
        return_source_documents=True
    )
    
    return qa_chain

# Step 5: Query function
def query_artwork(query: str, qa_chain, dataset, chat_history: List = None):
    """
    Query the artwork database and return results with images
    """
    if chat_history is None:
        chat_history = []
        
    enhanced_query = f"""
    Find artworks matching this query: {query}
    Focus on the style and description of the artworks.
    """
    
    response = qa_chain.invoke({
        "question": enhanced_query, 
        "chat_history": chat_history
    })
    
    # Get images for the retrieved documents
    retrieved_images = []
    for doc in response["source_documents"]:
        image_id = int(doc.metadata.get('id'))
        try:
            image = dataset['train'][image_id]['image']
            style = dataset['train'][image_id]['style']
            retrieved_images.append((image, style))
        except Exception as e:
            print(f"Error loading image {image_id}: {e}")
    
    return {
        "answer": response["answer"],
        "images": retrieved_images,
        "source_documents": response["source_documents"]
    }

# Step 6: Display function
def display_results(result):
    """
    Display the query results and images
    """
    print("Answer:", result["answer"])
    print("\nRetrieved Artworks:")
    for img, style in result["images"]:
        img.show()
        print(f"Style: {style}\n")


In [3]:
# 1. First, load the dataset
wikiart_dataset = "jlbaker361/wikiart-subjects"
dataset, df = load_art_dataset(wikiart_dataset, subset_size=1000)  # Use small subset for testing

In [4]:
# 2. Set up the language model and embeddings
llm, embeddings = setup_llm_and_embeddings()

In [5]:
# 3. Create the vector store
vectorstore = create_vectorstore(df, embeddings)

In [6]:
# 4. Set up the QA chain
qa_chain = setup_qa_chain(llm, vectorstore)

## Queries

### 1) Featuring women

In [13]:
query = "What is some art that features women?"
result = query_artwork(query, qa_chain, dataset)
display_results(result)

Answer: Based on the provided context, I can suggest some artworks that feature women. 

The following artworks match the query:

1. A painting described as "portrait of a woman" with Style: Expressionism.

There are also two paintings of women wearing white shirts, both described as "a painting of a woman with a white shirt" and both with Style: Expressionism.

It appears that these artworks feature women prominently, but without more information, I don't have enough context to provide further details about the specific artworks.

Retrieved Artworks:
Style: expressionism

Style: baroque

Style: expressionism

Style: expressionism

Style: expressionism



### 2) Religious art

In [14]:
query = "I'd like to see some religious art"
result = query_artwork(query, qa_chain, dataset)
display_results(result)

Answer: I can help you with that! Based on your query, here are a few artworks that match your request:

1. "The Adoration of the Cross" (multiple versions) - These paintings are known for their dramatic lighting and intense emotions, characteristic of the Baroque style. 
2. "The Virgin and the Infant" - This painting is another example of the Baroque style, featuring intricate details and dynamic composition.
3. "A Woman in a Wooded Landscape" - Although not exclusively a religious work, this painting does feature a devotional theme, with the subject likely being a representation of Saint Catherine, a Christian martyr.

These artworks showcase the dramatic lighting, intense emotions, and intricate details that are typical of Baroque art.

Retrieved Artworks:
Style: baroque

Style: baroque

Style: baroque

Style: baroque

Style: baroque



### 3) Renaissance art

In [15]:
query = "Paintings from the renaissance"
result = query_artwork(query, qa_chain, dataset)
display_results(result)

Answer: Based on the provided context, here are some paintings that match your query:

1. "The Virgin and the Infant" by Person (Style: Baroque)
   This painting is a work from the Baroque period, which is often associated with the Renaissance.

2. "A Painting of Two People in a Wooded Area" (Style: Mannerism Late Renaissance)
   While not exclusively focused on the Renaissance or Baroque periods, this style description fits within those time frames.

These are the only specific artworks that match your query based on the provided context.

Retrieved Artworks:
Style: baroque

Style: mannerism-late-renaissance

Style: baroque

Style: baroque

Style: baroque



### 4) Expressionist art featuring nature

In [16]:
query = "I'd like to see some expressionist art that featrues nature, like animals or landscapes."
result = query_artwork(query, qa_chain, dataset)
display_results(result)

Answer: I don't know specific examples of expressionist art that feature nature. The descriptions provided only mention a man in different colored shirts, which seems unrelated to your query. Can you provide more context or details about the type of expressionist artwork you're interested in?

Retrieved Artworks:
Style: expressionism

Style: expressionism

Style: expressionism

Style: expressionism

Style: expressionism

