# LLM

In [None]:
from langchain_ollama import OllamaLLM
from langchain_ollama.embeddings import OllamaEmbeddings
from pprint import pprint
import json

MODEL = OllamaLLM(model="llama3")


## Loading

In [4]:
json_data_path = "data/extracted_data.json"

In [5]:
with open(json_data_path, "r", encoding="utf-8") as file:
	json_data = file.read()

json_object = json.loads(json_data)

In [6]:
from langchain_community.document_loaders import JSONLoader

json_data_path = "data/extracted_data.json"

loader = JSONLoader(
    file_path=json_data_path,
    jq_schema=".[0:100]",
    text_content=False
)

docs = loader.load()

In [7]:
docs = loader.load()

In [8]:
docs

[Document(metadata={'source': '/Users/ryan/PycharmProjects/pythonProject/College/projects/rag_omeka/data/extracted_data.json', 'seq_num': 1}, page_content='[{"Title": "Untitled III", "Identifier": "2024.25", "Subject": "Graphic Arts-Prints", "Description": "Print by the title \\"Untitled III\\" done by etching and screenprint process on paper in 1978 by American artist Adja Yunkers (1900-1983) as indicated by his signature in pencil. Marked in pencil with the edition number \\"6\\" of an edition of 40 produced.Raised black abstract image on black background.", "Creator": "Adja Yunkers (American, b. Latvia, 1900-1983)", "Format": "PrintImage Size: 23 3/4 inches x 16 1/2 inches", "Date": "1978", "Medium": "Etching and screen print process on paper", "Donor": "Gift of Jeffrey L. Horrell \'75 and Rodney F. Rose", "Citation": "Adja Yunkers (American, b. Latvia, 1900-1983), \\u201cUntitled III,\\u201dRichard and Carole Cocks Art Museum at Miami University, accessed April 16, 2025,https://mia

# Splitting

### JSON Splitter

In [9]:
from langchain_text_splitters import RecursiveJsonSplitter

In [11]:
splitter = RecursiveJsonSplitter(max_chunk_size=300)

# Step 3: Split the JSON docs — each doc has a dict in `doc.page_content`
all_chunks = []
print(f"Number of documents loaded: {len(docs)}")  # Debugging statement
for doc in docs:
    json_data = doc.page_content
    print(f"Processing document: {type(json_data)}")  # Debugging statement
    if isinstance(json_data, dict):
        chunks = splitter.split_json(json_data=json_data)
        all_chunks.extend(chunks)

# Optional: print preview
for i, chunk in enumerate(all_chunks[:3]):
    print(f"Chunk {i+1}:\n", chunk, "\n")

Number of documents loaded: 1
Processing document: <class 'str'>


In [13]:
for doc in docs[:3]:
    print(type(doc.page_content))
    print(doc.page_content)

<class 'str'>
[{"Title": "Untitled III", "Identifier": "2024.25", "Subject": "Graphic Arts-Prints", "Description": "Print by the title \"Untitled III\" done by etching and screenprint process on paper in 1978 by American artist Adja Yunkers (1900-1983) as indicated by his signature in pencil. Marked in pencil with the edition number \"6\" of an edition of 40 produced.Raised black abstract image on black background.", "Creator": "Adja Yunkers (American, b. Latvia, 1900-1983)", "Format": "PrintImage Size: 23 3/4 inches x 16 1/2 inches", "Date": "1978", "Medium": "Etching and screen print process on paper", "Donor": "Gift of Jeffrey L. Horrell '75 and Rodney F. Rose", "Citation": "Adja Yunkers (American, b. Latvia, 1900-1983), \u201cUntitled III,\u201dRichard and Carole Cocks Art Museum at Miami University, accessed April 16, 2025,https://miamiuniversityartmuseum.omeka.net/items/show/20713.", "Tags": ["20th Century", "Abstract", "Adja Yunkers", "African Oceanic and New World Cultures", "A

In [14]:
json_data_path = "data/extracted_data.json"

In [15]:
with open(json_data_path, "r", encoding="utf-8") as file:
	json_data = file.read()

json_object = json.loads(json_data)

In [16]:
from langchain_community.document_loaders import JSONLoader

json_data_path = "data/extracted_data.json"

loader = JSONLoader(
    file_path=json_data_path,
    jq_schema=".[0:100]",
    text_content=False
)

docs = loader.load()

In [17]:
docs = loader.load()

In [18]:
docs

[Document(metadata={'source': '/Users/ryan/PycharmProjects/pythonProject/College/projects/rag_omeka/data/extracted_data.json', 'seq_num': 1}, page_content='[{"Title": "Untitled III", "Identifier": "2024.25", "Subject": "Graphic Arts-Prints", "Description": "Print by the title \\"Untitled III\\" done by etching and screenprint process on paper in 1978 by American artist Adja Yunkers (1900-1983) as indicated by his signature in pencil. Marked in pencil with the edition number \\"6\\" of an edition of 40 produced.Raised black abstract image on black background.", "Creator": "Adja Yunkers (American, b. Latvia, 1900-1983)", "Format": "PrintImage Size: 23 3/4 inches x 16 1/2 inches", "Date": "1978", "Medium": "Etching and screen print process on paper", "Donor": "Gift of Jeffrey L. Horrell \'75 and Rodney F. Rose", "Citation": "Adja Yunkers (American, b. Latvia, 1900-1983), \\u201cUntitled III,\\u201dRichard and Carole Cocks Art Museum at Miami University, accessed April 16, 2025,https://mia

### Text Splitter

In [26]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

Split blog post into 390 sub-documents.


## Embedding

In [27]:
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS

# Initialize the vector store with embeddings
embeddings = OllamaEmbeddings(model="llama3")
vector_store = FAISS.from_documents(all_splits, embeddings)

In [21]:
retriever = vector_store.as_retriever()

## Generation

In [22]:
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=MODEL,
    chain_type="stuff",        
    retriever=vector_store.as_retriever(),
)
answer = qa.run("list all the artworks here?")
print(answer)

  answer = qa.run("list all the artworks here?")


Based on the provided context, the following artworks are listed:

1. "Melrose, Market" by Edward Ruscha (2001) - a soft ground etching on paper
2. "Desolate" by Claes Oldenburg (1972) - an offset lithograph process on paper with mechanical tints
3. "Palo Alto" by Robert Motherwell (1978) - a lithographic process on Arches 88 paper
4. Untitled by Nancy Mitchnick (circa 1970) - an oil painting on canvas

Note that these are the artworks mentioned in the provided context, and it's possible that there may be other artworks not listed here.


In [24]:
answer = qa.run("What is the oldest painting here")
print(answer)

I don't know. The provided context only includes information about prints, not paintings. There are descriptions of paintings in the context, but they do not provide the date of creation for those pieces.


In [None]:
with open(json_data_path, "r", encoding="utf-8") as data_file:
    data = json.load(data_file)

{'Citation': 'Adja Yunkers (American, b. Latvia, 1900-1983), “Untitled '
             'III,”Richard and Carole Cocks Art Museum at Miami University, '
             'accessed April 16, '
             '2025,https://miamiuniversityartmuseum.omeka.net/items/show/20713.',
 'Collection Link': '/collections/show/25',
 'Creator': 'Adja Yunkers (American, b. Latvia, 1900-1983)',
 'Date': '1978',
 'Description': 'Print by the title "Untitled III" done by etching and '
                'screenprint process on paper in 1978 by American artist Adja '
                'Yunkers (1900-1983) as indicated by his signature in pencil. '
                'Marked in pencil with the edition number "6" of an edition of '
                '40 produced.Raised black abstract image on black background.',
 'Donor': "Gift of Jeffrey L. Horrell '75 and Rodney F. Rose",
 'Format': 'PrintImage Size: 23 3/4 inches x 16 1/2 inches',
 'Identifier': '2024.25',
 'Medium': 'Etching and screen print process on paper',
 'Subject'

In [40]:
pprint(data[0]["Title"])

'Untitled III'


In [39]:
for i in range(len(data)):
    title = data[i]["Title"]
    print(qa.run(f"tell me about {title}"))
    print("________________________________")

I don't know the answer. There is no mention of an untitled print or artwork called "Untitled III" in the provided context. The only prints mentioned are "Palo Alto" and "Desolate".
________________________________
I don't know. The provided context does not mention "Gathering of the Clans". It appears to be a collection of information about various artworks, including prints by Robert Motherwell, Adja Yunkers, and Claes Oldenburg.
________________________________
Based on the provided context, I found an item with the title "Pink Cone" and identifier "2024.23". Here's what it says:

* Title: Pink Cone
* Identifier: 2024.23
* Subject: Graphic Arts-Prints
* Description: Print by the title "Pink Cone" done by process of screenprint in colors on paper in 1975 by American artist Ed Ruscha (1937-) as indicated by his signature and date in pencil. The print was published by Pace Editions of New York and printed by Atelier Crommelynck in Paris.
* Creator: Edward Ruscha (American, b. 1937)
* F

In [44]:
print(qa.run("Who were the donors of hte artworks?"))   

According to the provided context, the donors of the artworks are:

* Jeffrey L. Horrell '75 and Rodney F. Rose (gifted both artworks)
