In [1]:
# import
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from IPython.display import Markdown, display
import chromadb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
import os 
load_dotenv('myenv/.env')

True

In [4]:
# set up OpenAI
import os
import openai

OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
openai.api_key = OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [5]:
import requests


def get_wikipedia_images(title):
    response = requests.get(
        "https://en.wikipedia.org/w/api.php",
        params={
            "action": "query",
            "format": "json",
            "titles": title,
            "prop": "imageinfo",
            "iiprop": "url|dimensions|mime",
            "generator": "images",
            "gimlimit": "50",
        },
    ).json()
    image_urls = []
    for page in response["query"]["pages"].values():
        if page["imageinfo"][0]["url"].endswith(".jpg") or page["imageinfo"][
            0
        ]["url"].endswith(".png"):
            image_urls.append(page["imageinfo"][0]["url"])
    return image_urls

In [6]:
from pathlib import Path
import urllib.request

image_uuid = 0
MAX_IMAGES_PER_WIKI = 20

wiki_titles = {
    "Tesla Model X",
    "Pablo Picasso",
    "Rivian",
    "The Lord of the Rings",
    "The Matrix",
    "The Simpsons",
}

data_path = Path("mixed_wiki")
if not data_path.exists():
    Path.mkdir(data_path)

for title in wiki_titles:
    response = requests.get(
        "https://en.wikipedia.org/w/api.php",
        params={
            "action": "query",
            "format": "json",
            "titles": title,
            "prop": "extracts",
            "explaintext": True,
        },
    ).json()
    page = next(iter(response["query"]["pages"].values()))
    wiki_text = page["extract"]

    with open(data_path / f"{title}.txt", "w") as fp:
        fp.write(wiki_text)

    images_per_wiki = 0
    try:
        # page_py = wikipedia.page(title)
        list_img_urls = get_wikipedia_images(title)
        # print(list_img_urls)

        for url in list_img_urls:
            if url.endswith(".jpg") or url.endswith(".png"):
                image_uuid += 1
                # image_file_name = title + "_" + url.split("/")[-1]

                urllib.request.urlretrieve(
                    url, data_path / f"{image_uuid}.jpg"
                )
                images_per_wiki += 1
                # Limit the number of images downloaded per wiki page to 15
                if images_per_wiki > MAX_IMAGES_PER_WIKI:
                    break
    except:
        print(str(Exception("No images found for Wikipedia page: ")) + title)
        continue

In [27]:
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction

# set defalut text and image embedding functions
embedding_function = OpenCLIPEmbeddingFunction()

  checkpoint = torch.load(checkpoint_path, map_location=map_location)


In [28]:
from llama_index.core.indices import MultiModalVectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import SimpleDirectoryReader, StorageContext
from chromadb.utils.data_loaders import ImageLoader

image_loader = ImageLoader()

# create client and a new collection
chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection(
    "multimodal_collection",
    embedding_function=embedding_function,
    data_loader=image_loader,
)


# load documents
documents = SimpleDirectoryReader("./JP Morgan Financial Reports/").load_data()

# set up ChromaVectorStore and load in data
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
)


UniqueConstraintError: Collection multimodal_collection already exists

In [31]:
retriever = index.as_retriever(similarity_top_k=5)
retrieval_results = retriever.retrieve("show graphs from 2015 report")

In [32]:
# print(retrieval_results)
from llama_index.core.schema import ImageNode
from llama_index.core.response.notebook_utils import (
    display_source_node,
    display_image_uris,
)


image_results = []
MAX_RES = 5
cnt = 0
for r in retrieval_results:
    if isinstance(r.node, ImageNode):
        image_results.append(r.node.metadata["file_path"])
    else:
        if cnt < MAX_RES:
            display_source_node(r)
        cnt += 1

display_image_uris(image_results, [3, 3], top_k=2)

**Node ID:** fa0eef2c-5057-41f1-b60c-07b08308f17d<br>**Similarity:** 0.691380375426886<br>**Text:** ANNUAL REPORT 2015<br>

**Node ID:** 71792270-562d-4283-957b-bf7a76c5e89a<br>**Similarity:** 0.6684604414769999<br>**Text:** ANNUAL REPORT 2016<br>

**Node ID:** 614bf055-5fc4-4700-89a7-2982264005e3<br>**Similarity:** 0.6654055022186349<br>**Text:** consistently demonstrated for decades. Our performance results are shown in 
the charts on pages ...<br>

**Node ID:** eceaf6a6-2d67-4a12-ac7f-61104d72975c<br>**Similarity:** 0.6636157600181546<br>**Text:** 316 JPMorgan Chase & Co./2015 Annual Report<br>

**Node ID:** 4b045e6b-08e1-4da5-b3ab-116858c230b8<br>**Similarity:** 0.6617793538020532<br>**Text:** ANNUAL REPORT 
2017<br>

<Figure size 1600x900 with 0 Axes>

In [23]:
retrieval_results[0].node

TextNode(id_='0636969e-9956-4ccd-9719-e1f9cf9ba065', embedding=None, metadata={'page_label': '57', 'file_name': '2015-annualreport.pdf', 'file_path': '/Users/arjiv_admin/Desktop/Multimodel/JP Morgan Financial Reports/2015-annualreport.pdf', 'file_type': 'application/pdf', 'file_size': 7019509, 'creation_date': '2024-07-25', 'last_modified_date': '2024-07-25'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='9d78fac9-2c48-4ec6-b1aa-6321adf17ae2', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '57', 'file_name': '2015-annualreport.pdf', 'file_path': '/Users/arjiv_admin/Desktop/Multimodel/JP Morgan Financial Reports/2015-annualreport.pdf', 'file_type': 'application/pdf', 'file_size': 7019509, 'c

In [20]:
retrieval_results[0].score

0.7089283493190089

In [33]:
from llama_index.multi_modal_llms.openai import OpenAIMultiModal
from llama_index.core import SimpleDirectoryReader

# put your local directore here

openai_mm_llm = OpenAIMultiModal(
    model="gpt-4o", api_key=OPENAI_API_KEY, max_new_tokens=1500
)

response_3 = openai_mm_llm.complete(
    prompt="what are other similar cars?",
    image_documents=image_documents,
)