# Query Wikiart

A RAG pipeline using the wikiart-subjects dataset

In [1]:
import boto3
import os
import pandas as pd
from datasets import load_dataset
from dotenv import load_dotenv
from langchain_aws.chat_models.bedrock import ChatBedrock
from langchain_aws.embeddings.bedrock import BedrockEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.vectorstores import Chroma
from typing import List

load_dotenv()

True

In [10]:
def load_art_dataset(dataset_name: str, subset_size: int = None):
    """
    Load the dataset and prepare it for processing
    """
    dataset = load_dataset(dataset_name)
    df = dataset['train'].to_pandas()
    
    if subset_size:
        df = df.head(subset_size)
    
    # Create combined text field for embeddings
    df['combined_text'] = df.apply(
        # santize the `style` field, e.g. art-nouveau-modern > art nouveau modern
        lambda x: f"Description: {x['text']}\nStyle: {x['style'].replace('-', ' ')}", 
        axis=1
    )
    
    return dataset, df


def setup_aws_client(aws_access_key_id: str, aws_secret_access_key: str, region_name: str = 'us-east-1'):
    """
    Set up AWS session and client
    """
    session = boto3.Session(
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key,
        region_name=region_name
    )
    
    client = session.client('bedrock-runtime')
    return client


def setup_llm_and_embeddings(client):
    """
    Set up language model and embeddings using AWS Bedrock
    """
    
    # Using llama b/c I've been testing locally using llama3.2 via Ollama
    llm = ChatBedrock(
        client=client,
        model_id="anthropic.claude-3-5-sonnet-20240620-v1:0",
        model_kwargs={"temperature": 0.8}
    )
    
    # Using Titan for embeddings
    embeddings = BedrockEmbeddings(
        client=client,
        model_id="amazon.titan-embed-text-v1"
    )
    
    return llm, embeddings


def create_vectorstore(df: pd.DataFrame, embeddings):
    """
    Create and populate the vector store
    """
    texts = df['combined_text'].tolist()
    metadatas = [
        {'id': str(i), 'style': style} 
        for i, style in enumerate(df['style'])
    ]
    
    vectorstore = Chroma.from_texts(
        texts=texts,
        embedding=embeddings,
        metadatas=metadatas,
        persist_directory="./chroma"
    )
    
    return vectorstore


def setup_qa_chain(llm, vectorstore):
    """
    Set up the question-answering chain
    """
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(
            search_kwargs={"k": 5}
        ),
        return_source_documents=True
    )
    
    return qa_chain


def query_artwork(query: str, qa_chain, dataset, chat_history: List = None):
    """
    Query the artwork database and return results with images
    """
    if chat_history is None:
        chat_history = []
        
    enhanced_query = f"""
    Find artworks matching this query: {query}
    Focus on the style and description of the artworks.
    """
    
    response = qa_chain.invoke({
        "question": enhanced_query, 
        "chat_history": chat_history
    })
    
    # Get images for the retrieved documents
    retrieved_images = []
    for doc in response["source_documents"]:
        image_id = int(doc.metadata.get('id'))
        try:
            image = dataset['train'][image_id]['image']
            style = dataset['train'][image_id]['style']
            retrieved_images.append((image, style))
        except Exception as e:
            print(f"Error loading image {image_id}: {e}")
    
    return {
        "answer": response["answer"],
        "images": retrieved_images,
        "source_documents": response["source_documents"]
    }


def display_results(result):
    """
    Display the query results and images
    """
    print("Answer:", result["answer"])
    for img, _style in result["images"]:
        img.show()


In [11]:
# 1. Load the dataset
wikiart_dataset = "jlbaker361/wikiart-subjects"
dataset, df = load_art_dataset(wikiart_dataset, subset_size=1000)  # Use small subset for testing

In [12]:
#2. Set up AWS credentials and client
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")
    
client = setup_aws_client(aws_access_key_id, aws_secret_access_key)

In [13]:
# 3. Set up the language model and embeddings
llm, embeddings = setup_llm_and_embeddings(client)

In [14]:
# 4. Create the vector store
vectorstore = create_vectorstore(df, embeddings)

In [15]:
# 5. Set up the QA chain
qa_chain = setup_qa_chain(llm, vectorstore)

## Queries

### 1) Featuring women

In [16]:
query = "What is some art that features women?"
result = query_artwork(query, qa_chain, dataset)
display_results(result)

Answer: Based on the provided context, I can identify several artworks that feature women:

1. Two baroque paintings are described as featuring "a woman and a man." While these include both genders, they do prominently feature women.

2. There are two art nouveau modern paintings described as showing "a woman laying on the ground." These artworks specifically focus on a female figure.

3. A rococo painting is described as depicting "a woman in a landscape." This artwork clearly features a woman as the main subject.

These artworks span different artistic styles - baroque, art nouveau modern, and rococo - while all featuring women in their descriptions. The baroque pieces include both a man and a woman, while the art nouveau modern and rococo paintings seem to focus solely on female figures in different poses and settings.

Retrieved Artworks:


### 2) Religious art

In [17]:
query = "I'd like to see some religious art"
result = query_artwork(query, qa_chain, dataset)
display_results(result)

Answer: Based on the context provided, I can suggest a few artworks that match your query for religious art:

1. A drawing of a church with a statue
   Style: Art Nouveau Modern
   This artwork features a religious building (church) and likely includes a statue, which could be of a religious figure.

2. A painting of Jesus holding a scroll
   Style: Art Nouveau Modern
   This is a direct representation of a religious figure, Jesus, depicted holding a scroll, which is a common motif in religious art.

3. A drawing of an angel holding a bucket
   Style: Baroque
   This artwork features an angel, which is a religious figure often depicted in Christian art. The Baroque style is also frequently associated with religious artworks from the 17th and 18th centuries.

These artworks encompass different styles (Art Nouveau Modern and Baroque) and depict various religious subjects, including architecture (church), central religious figures (Jesus), and spiritual beings (angel). They all fall under

### 3) Renaissance art

In [18]:
query = "Paintings from the renaissance"
result = query_artwork(query, qa_chain, dataset)
display_results(result)

Answer: Based on the context provided, I can identify several paintings from the Renaissance period that match your query. Here are the relevant artworks:

1. High Renaissance paintings:
   - Two paintings described as "the painting of person"
   
2. Early Renaissance paintings:
   - Two paintings described as "the painting of person"
   - One painting described as "a painting of a man in a robe and hat"

All of these artworks are from the Renaissance period, either from the Early Renaissance or High Renaissance styles. The descriptions are quite general, mostly referring to paintings of people or individuals. One painting provides a bit more detail, showing a man wearing a robe and hat, which was common attire in Renaissance portraiture.

It's worth noting that without more specific information about the subjects, artists, or titles of these paintings, I can't provide more detailed descriptions. However, these examples demonstrate that there are multiple Renaissance paintings in the d

### 4) Expressionist art featuring nature

In [19]:
query = "I'd like to see some expressionist art that featrues nature, like animals or landscapes."
result = query_artwork(query, qa_chain, dataset)
display_results(result)

Answer: Based on the context provided, I can suggest a few artworks that match your query for expressionist art featuring nature:

1. There are two paintings described as "a painting of people in a forest" with the style listed as "expressionism". These would fit your request for expressionist art featuring nature, specifically a forest landscape.

2. There's also a painting described as "a painting of a man and woman in a forest" with the style listed as "expressionism". This again features a forest setting in an expressionist style.

These artworks align with your interest in expressionist art and nature themes, as they depict forest scenes. While they don't specifically mention animals, the forest setting implies a natural environment.

It's worth noting that the context doesn't provide any expressionist artworks explicitly featuring animals or other types of landscapes beyond forests. If you're looking for more variety in the nature scenes or specific depictions of animals in an ex