In [1]:
import os
from dotenv import load_dotenv
import openai

# Load the environment variables from the specified .env file
dotenv_path = '/home/zihan/Desktop/Manufacturing_QA/Experimental_Code/GraphRag/.env'
load_dotenv(dotenv_path)

# Load the OpenAI API key from the environment variable
api_key = os.getenv('GRAPHRAG_API_KEY')
assert api_key is not None, "OpenAI API key not found in environment variables."

openai.api_key = api_key

In [3]:
import requests


def get_wikipedia_images(title):
    response = requests.get(
        "https://en.wikipedia.org/w/api.php",
        params={
            "action": "query",
            "format": "json",
            "titles": title,
            "prop": "imageinfo",
            "iiprop": "url|dimensions|mime",
            "generator": "images",
            "gimlimit": "50",
        },
    ).json()
    image_urls = []
    for page in response["query"]["pages"].values():
        if page["imageinfo"][0]["url"].endswith(".jpg") or page["imageinfo"][
            0
        ]["url"].endswith(".png"):
            image_urls.append(page["imageinfo"][0]["url"])
    return image_urls

In [5]:
from pathlib import Path
import requests
import urllib.request

image_uuid = 0
# image_metadata_dict stores images metadata including image uuid, filename and path
image_metadata_dict = {}
MAX_IMAGES_PER_WIKI = 20

wiki_titles = {
    "Tesla Model S",
    "Porsche Taycan",
    "Polestar",
}


data_path = Path("mixed_wiki")
if not data_path.exists():
    Path.mkdir(data_path)

for title in wiki_titles:
    response = requests.get(
        "https://en.wikipedia.org/w/api.php",
        params={
            "action": "query",
            "format": "json",
            "titles": title,
            "prop": "extracts",
            "explaintext": True,
        },
    ).json()
    page = next(iter(response["query"]["pages"].values()))
    wiki_text = page["extract"]

    with open(data_path / f"{title}.txt", "w") as fp:
        fp.write(wiki_text)

    images_per_wiki = 0
    try:
        # page_py = wikipedia.page(title)
        list_img_urls = get_wikipedia_images(title)
        # print(list_img_urls)

        for url in list_img_urls:
            if (
                url.endswith(".jpg")
                or url.endswith(".png")
                or url.endswith(".svg")
            ):
                image_uuid += 1
                # image_file_name = title + "_" + url.split("/")[-1]

                urllib.request.urlretrieve(
                    url, data_path / f"{image_uuid}.jpg"
                )
                images_per_wiki += 1
                # Limit the number of images downloaded per wiki page to 15
                if images_per_wiki > MAX_IMAGES_PER_WIKI:
                    break
    except:
        print(str(Exception("No images found for Wikipedia page: ")) + title)
        continue

In [2]:
from llama_index.core.indices import MultiModalVectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import SimpleDirectoryReader, StorageContext

import qdrant_client
from llama_index.core import SimpleDirectoryReader


# Create a local Qdrant vector store
client = qdrant_client.QdrantClient(path="qdrant_mm_db")

text_store = QdrantVectorStore(
    client=client, collection_name="text_collection"
)
image_store = QdrantVectorStore(
    client=client, collection_name="image_collection"
)

# storage_context = StorageContext.from_defaults(
#     vector_store=text_store, image_store=image_store
# )

# # Create the MultiModal index
# documents = SimpleDirectoryReader("./mixed_wiki/").load_data()
# index = MultiModalVectorStoreIndex.from_documents(
#     documents,
#     storage_context=storage_context,
# )

# # Save it
# index.storage_context.persist(persist_dir="storage")

# # Load it
from llama_index.core import load_index_from_storage

storage_context = StorageContext.from_defaults(
    vector_store=text_store, persist_dir="storage"
)
index = load_index_from_storage(storage_context, image_store=image_store)

In [3]:
from llama_index.core import PromptTemplate
from llama_index.core.query_engine import SimpleMultiModalQueryEngine
from llama_index.multi_modal_llms.openai import OpenAIMultiModal


qa_tmpl_str = (
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the query.\n"
    "Query: {query_str}\n"
    "Answer: "
)
qa_tmpl = PromptTemplate(qa_tmpl_str)


openai_mm_llm = OpenAIMultiModal(
    model="gpt-4o", api_key=api_key, max_new_tokens=1500
)


query_engine = index.as_query_engine(
    llm=openai_mm_llm, text_qa_template=qa_tmpl
)

query_str = "Tell me more about the Porsche"
response = query_engine.query(query_str)


print(str(response))

The Porsche Taycan is a battery electric luxury sports sedan and shooting brake car produced by the German automobile manufacturer Porsche. It was first introduced as a concept car named the Porsche Mission E at the 2015 Frankfurt Motor Show, and the production version was revealed four years later at the 2019 Frankfurt Motor Show. The Taycan is Porsche's first series production electric car and is built on the J1 electric car platform, which it shares with the Audi e-tron GT.

The name "Taycan" is derived from the Turkish words "tay," meaning colt or young horse, and "can," meaning lively, referencing the steed on the coat of arms of the city of Stuttgart, which is also found on the Porsche crest. Despite the use of the "Turbo" name in higher trims, these vehicles do not have turbochargers as they are electrically powered; the name is used purely for branding purposes.

The Taycan features a design influenced by the Mission E concept car, with elements such as a retractable rear spoil

In [12]:
import matplotlib.pyplot as plt
from PIL import Image
import os

def plot_images(image_paths):
    images_shown = 0
    plt.figure(figsize=(16, 9))
    for img_path in image_paths:
        if os.path.isfile(img_path):
            image = Image.open(img_path)

            plt.subplot(2, 3, images_shown + 1)
            plt.imshow(image)
            plt.xticks([])
            plt.yticks([])

            images_shown += 1
            if images_shown >= 9:
                break

# show sources
from llama_index.core.response.notebook_utils import display_source_node

for text_node in response.metadata["text_nodes"]:
    display_source_node(text_node, source_length=200)

plot_images(
    [n.metadata["file_path"] for n in response.metadata["image_nodes"]]
)

**Node ID:** 0010020c-bbff-4cd4-a5ca-1d09c5fa7a6a<br>**Similarity:** 0.8289366566526364<br>**Text:** == Specifications ==


=== Chassis ===
The Taycan's body is mainly steel and aluminium joined by different bonding techniques. The body's B pillars, side roof frame and seat cross member are made f...<br>

**Node ID:** 2d83f430-66e5-4b6f-b3a0-669290bd53b1<br>**Similarity:** 0.8287980006096716<br>**Text:** The Porsche Taycan is a battery electric luxury sports sedan and shooting brake car produced by German automobile manufacturer Porsche. The concept version of the Taycan named the Porsche Mission E...<br>

<Figure size 1600x900 with 0 Axes>

In [10]:
plot_images(
    [n.metadata["file_path"] for n in response.metadata["image_nodes"]]
)

<Figure size 1600x900 with 0 Axes>