In [1]:
from langchain_ollama import OllamaLLM
from langchain_ollama import ChatOllama
local_llm = OllamaLLM(model="llama3")
llm = ChatOllama(model="llama3", temperature=0)
llm_json_mode = ChatOllama(model="llama3", temperature=0, format="json")

# Splitting


In [2]:
import json
from langchain_text_splitters import RecursiveJsonSplitter

with open("data/extracted_data.json") as f:
    json_data = json.load(f)

# Fix top-level list if needed
if isinstance(json_data, list):
    json_data = {str(i): item for i, item in enumerate(json_data)}

splitter = RecursiveJsonSplitter(max_chunk_size=200)

texts = splitter.split_text(json_data=json_data, convert_lists=True)


In [3]:
from langchain_core.documents import Document

# Convert each string chunk into a Document object
docs = [Document(page_content=text) for text in texts]

In [11]:
docs

[Document(metadata={}, page_content='{"0": {"Title": "Untitled III", "Identifier": "2024.25", "Subject": "Graphic Arts-Prints"}}'),
 Document(metadata={}, page_content='{"0": {"Description": "Print by the title \\"Untitled III\\" done by etching and screenprint process on paper in 1978 by American artist Adja Yunkers (1900-1983) as indicated by his signature in pencil. Marked in pencil with the edition number \\"6\\" of an edition of 40 produced.Raised black abstract image on black background."}}'),
 Document(metadata={}, page_content='{"0": {"Creator": "Adja Yunkers (American, b. Latvia, 1900-1983)", "Format": "PrintImage Size: 23 3/4 inches x 16 1/2 inches", "Date": "1978", "Medium": "Etching and screen print process on paper"}}'),
 Document(metadata={}, page_content='{"0": {"Donor": "Gift of Jeffrey L. Horrell \'75 and Rodney F. Rose"}}'),
 Document(metadata={}, page_content='{"0": {"Citation": "Adja Yunkers (American, b. Latvia, 1900-1983), \\u201cUntitled III,\\u201dRichard and Ca

# VectorStore

In [1]:
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_nomic.embeddings import NomicEmbeddings

# Create the vector store with proper document format
vectorstore = SKLearnVectorStore.from_documents(
    documents=docs,
    embedding=NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local"),
)

# Create retriever
retriever = vectorstore.as_retriever(k=3)


NameError: name 'docs' is not defined

In [5]:
retriever.invoke("Untitled")

Embedding texts: 100%|██████████| 1/1 [00:00<00:00, 18.83inputs/s]


[Document(metadata={'id': '3bcb1f51-296d-4116-a1e5-6d616f00eae6'}, page_content='{"11": {"Title": "Untitled", "Identifier": "2024.14", "Subject": "Graphic Arts-Prints"}}'),
 Document(metadata={'id': '333da6e6-01e9-4bef-a26e-f558fb2408d9'}, page_content='{"0": {"Description": "Print by the title \\"Untitled III\\" done by etching and screenprint process on paper in 1978 by American artist Adja Yunkers (1900-1983) as indicated by his signature in pencil. Marked in pencil with the edition number \\"6\\" of an edition of 40 produced.Raised black abstract image on black background."}}'),
 Document(metadata={'id': '3754382e-2c44-400e-98f6-c23e16652c9c'}, page_content='{"17": {"Title": "Untitled", "Identifier": "2024.8", "Subject": "Modern & Contemporary-Paintings-20th C. & Later"}}'),
 Document(metadata={'id': '8a9b271f-a04b-44cb-98e4-1ab578dfd9f8'}, page_content='{"0": {"Title": "Untitled III", "Identifier": "2024.25", "Subject": "Graphic Arts-Prints"}}')]

In [7]:
### Router
import json
from langchain_core.messages import HumanMessage, SystemMessage

# Prompt
router_instructions = """You are an expert at routing a user question to a vectorstore or web search.

The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.

Use the vectorstore for questions on these topics. For all else, and especially for current events, use web-search.

Return JSON with single key, datasource, that is 'websearch' or 'vectorstore' depending on the question."""

# Test router
test_web_search = llm_json_mode.invoke(
    [SystemMessage(content=router_instructions)]
    + [
        HumanMessage(
            content="Who is favored to win the NFC Championship game in the 2024 season?"
        )
    ]
)
test_web_search_2 = llm_json_mode.invoke(
    [SystemMessage(content=router_instructions)]
    + [HumanMessage(content="What are the models released today for llama3.2?")]
)
test_vector_store = llm_json_mode.invoke(
    [SystemMessage(content=router_instructions)]
    + [HumanMessage(content="What made the artwork 'Gathering of the Clans'?")]
)
print(
    json.loads(test_web_search.content),
    json.loads(test_web_search_2.content),
    json.loads(test_vector_store.content),
)

{'datasource': 'websearch'} {'datasource': 'websearch'} {'datasource': 'websearch'}


In [9]:
query = "Adja Yunkers?"
docs = retriever.get_relevant_documents(query)

for i, doc in enumerate(docs):
    print(f"\n--- Document {i+1} ---\n{doc.page_content}")


Embedding texts: 100%|██████████| 1/1 [00:00<00:00, 23.31inputs/s]


--- Document 1 ---
{"1": {"Citation": "Adja Yunkers (American, b. Latvia, 1900-1983), \u201cGathering of the Clans,\u201dRichard and Carole Cocks Art Museum at Miami University, accessed April 28, 2025,https://miamiuniversityartmuseum.omeka.net/items/show/20712."}}

--- Document 2 ---
{"0": {"Citation": "Adja Yunkers (American, b. Latvia, 1900-1983), \u201cUntitled III,\u201dRichard and Carole Cocks Art Museum at Miami University, accessed April 28, 2025,https://miamiuniversityartmuseum.omeka.net/items/show/20713."}}

--- Document 3 ---
{"1": {"Creator": "Adja Yunkers (American, b. Latvia, 1900-1983)", "Format": "PrintImage Size: 19 inches x 13 1/2 inches", "Date": "1952", "Medium": "Color woodcut process on paper", "Donor": "Gift of Jeffrey L. Horrell '75 and Rodney F. Rose"}}

--- Document 4 ---
{"0": {"Creator": "Adja Yunkers (American, b. Latvia, 1900-1983)", "Format": "PrintImage Size: 23 3/4 inches x 16 1/2 inches", "Date": "1978", "Medium": "Etching and screen print process on 




# Components