In [1]:
import matplotlib.pyplot as plt 
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [2]:
browse = "https://miamiuniversityartmuseum.omeka.net/items/browse"

In [3]:
response = requests.get(browse)
if response.status_code != 200:
    print(f"Failed to retrieve data from the URL. Status code: {response.status_code}")
    exit()

soup = BeautifulSoup(response.text, 'html.parser')

In [4]:
link_on_pages = []
for link in soup.find_all('a'):

    href = link.get('href')
    if href and href.startswith('/items/show/'):
        #print(href)
        link_on_pages.append(href)

print(link_on_pages)

['/items/show/20713', '/items/show/20712', '/items/show/20711', '/items/show/20710', '/items/show/20709', '/items/show/20708', '/items/show/20707', '/items/show/20706', '/items/show/20705', '/items/show/20704']


In [5]:
soups = []

for link in link_on_pages:
    full_link = "https://miamiuniversityartmuseum.omeka.net" + link
    print(full_link)
    response = requests.get(full_link)
    if response.status_code != 200:
        print(f"Failed to retrieve data from the URL. Status code: {response.status_code}")
        exit()

    soup = BeautifulSoup(response.text, 'html.parser')
    soups.append(soup)

https://miamiuniversityartmuseum.omeka.net/items/show/20713
https://miamiuniversityartmuseum.omeka.net/items/show/20712
https://miamiuniversityartmuseum.omeka.net/items/show/20711
https://miamiuniversityartmuseum.omeka.net/items/show/20710
https://miamiuniversityartmuseum.omeka.net/items/show/20709
https://miamiuniversityartmuseum.omeka.net/items/show/20708
https://miamiuniversityartmuseum.omeka.net/items/show/20707
https://miamiuniversityartmuseum.omeka.net/items/show/20706
https://miamiuniversityartmuseum.omeka.net/items/show/20705
https://miamiuniversityartmuseum.omeka.net/items/show/20704


In [6]:
def extract_data_from_soup(soup):
    data = {}

    # Extract fields using a loop
    fields = {
        'dublin-core-title': 'Title',
        'dublin-core-identifier': 'Identifier',
        'dublin-core-subject': 'Subject',
        'dublin-core-description': 'Description',
        'dublin-core-creator': 'Creator',
        'dublin-core-format': 'Format',
        'dublin-core-date': 'Date',
        'dublin-core-medium': 'Medium',
        'physical-object-item-type-metadata-donor': 'Donor',
        'item-citation': 'Citation'
    }

    for field_id, field_name in fields.items():
        element = soup.find('div', {'id': field_id})
        if element:
            data[field_name] = element.find('div', {'class': 'element-text'}).get_text(strip=True)

    # Extract tags
    data['Tags'] = [tag.get_text(strip=True) for tag in soup.find_all('a', {'rel': 'tag'})]

    # Extract image URL
    '''
    image_element = soup.find('div', {'id': 'item-images'})
    if image_element:
        data['Image URL'] = image_element.find('a')['href']
    '''

    # Extract collection link
    collection_element = soup.find('div', {'id': 'collection'})
    if collection_element:
        data['Collection Link'] = collection_element.find('a')['href']

    return data
    print(f"{key}: {value}\n")


In [7]:
for soup in soups:
    data = extract_data_from_soup(soup)
    print(data)

{'Title': 'Untitled III', 'Identifier': '2024.25', 'Subject': 'Graphic Arts-Prints', 'Description': 'Print by the title "Untitled III" done by etching and screenprint process on paper in 1978 by American artist Adja Yunkers (1900-1983) as indicated by his signature in pencil. Marked in pencil with the edition number "6" of an edition of 40 produced.Raised black abstract image on black background.', 'Creator': 'Adja Yunkers (American, b. Latvia, 1900-1983)', 'Format': 'PrintImage Size: 23 3/4 inches x 16 1/2 inches', 'Date': '1978', 'Medium': 'Etching and screen print process on paper', 'Donor': "Gift of Jeffrey L. Horrell '75 and Rodney F. Rose", 'Citation': 'Adja Yunkers (American, b. Latvia, 1900-1983), “Untitled III,”Richard and Carole Cocks Art Museum at Miami University, accessed April 15, 2025,https://miamiuniversityartmuseum.omeka.net/items/show/20713.', 'Tags': ['20th Century', 'Abstract', 'Adja Yunkers', 'African Oceanic and New World Cultures', 'African Oceanic and New World 

In [None]:
data['Title']

'Palo Alto'

# LLM

In [46]:
from langchain_ollama import OllamaLLM
from langchain_ollama.embeddings import OllamaEmbeddings

model = OllamaLLM(model="llama3")
embeddings = OllamaEmbeddings(model="llama3")


In [29]:
# Pass the data dictionary directly to the model
response = model.invoke("what is the capital of france")
print(response)

The capital of France is Paris.


In [31]:
from langchain_community.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader("data/omeka_pdf.pdf")
pages = loader.load_and_split()
pages

[Document(metadata={'producer': 'Skia/PDF m135', 'creator': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36', 'creationdate': '2025-04-10T04:19:39+00:00', 'source': 'data/omeka_pdf.pdf', 'file_path': 'data/omeka_pdf.pdf', 'total_pages': 3, 'format': 'PDF 1.4', 'title': 'Pink Cone · Richard and Carole Cocks Art Museum at Miami University', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2025-04-10T04:19:39+00:00', 'trapped': '', 'modDate': "D:20250410041939+00'00'", 'creationDate': "D:20250410041939+00'00'", 'page': 0}, page_content='Skip to main content\nRichard and Carole Cocks Art Museum at Miami University\nMenu\nBrowse Exhibits\nBrowse Collections\nBrowse Items\nArt Museum Website\n \nSearch using this query type:\nKeyword\nBoolean\nExact match\nSearch only these record types:\n Item\n File\n Collection\n Simple Page\n Exhibit\n Exhibit Page\nAdvanced Search (Items only)\nPink Cone\nTitle\nPink Cone\nId

In [None]:
from langchain.prompts import PromptTemplate
template = """
You are a helpful assistant. You will be provided with a question and some context.
Please answer the question based on the context.
Context: {context}
Question: {question}

"""
prompt = PromptTemplate.from_template(template)

print(prompt.format(context="This is some context ", question="What is the capital of France?"))



You are a helpful assistant. You will be provided with a question and some context.
Please answer the question based on the context.
Context: This is some contexty
Question: What is the capital of France?




In [None]:
chain = prompt | model

In [42]:
chain.invoke(
    {"context": "The name I was given was Ryan SIngh", 
    "question": "What is my name?"}
)


{'context': 'The name I was given was Ryan SIngh',
 'question': 'What is my name?',
 'text': 'Based on the context, your name is Ryan Singh!'}

In [47]:
from langchain_community.vectorstores import DocArrayInMemorySearch
vectorstore = DocArrayInMemorySearch.from_documents(
    documents=pages,
    embedding= embeddings,
)

In [52]:
retriever = vectorstore.as_retriever()
retriever.invoke("Art")

[Document(metadata={'producer': 'Skia/PDF m135', 'creator': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36', 'creationdate': '2025-04-10T04:19:39+00:00', 'source': 'data/omeka_pdf.pdf', 'file_path': 'data/omeka_pdf.pdf', 'total_pages': 3, 'format': 'PDF 1.4', 'title': 'Pink Cone · Richard and Carole Cocks Art Museum at Miami University', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2025-04-10T04:19:39+00:00', 'trapped': '', 'modDate': "D:20250410041939+00'00'", 'creationDate': "D:20250410041939+00'00'", 'page': 2}, page_content="Collection\nGifts of Jeffrey L. Horrell '75 and Rodney F. Rose\nTags\n20th Century, African Oceanic and New World Cultures, African Oceanic and New World Cultures-North\nAmerica, American, Cone, etching, Gampi paper chine colle, Graphic Arts, Graphic Arts-Prints, Hard\nground etching, Ice Cream, Line, Line etching, Lines, North American, Paper, Pink, Print, United States,\nWayne

In [None]:
from operator import itemgetter
chain = (
    {
        
    "context": itemgetter("context") | retriever,
    "question": itemgetter("question")

    } | prompt | model 

)


In [63]:
print( 
    chain.invoke
    (
    {"question": "Tell me more aboiut the artist",}
    )   
)

Based on the provided context, I can tell you that the artist of the print "Pink Cone" is Wayne Thiebaud (American, 1920-2021).
