In [1]:
import os
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, StorageContext, load_index_from_storage
import logging
import sys
from dotenv import load_dotenv
load_dotenv('/Users/jeana/.env')

logging.basicConfig(
    stream=sys.stdout, level=logging.WARNING
)  # logging.DEBUG for more verbose output
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [2]:
api_key = os.environ['OPENAI_API_KEY']
azure_endpoint = os.environ['OPENAI_DEPLOYMENT_ENDPOINT']
api_version = os.environ['OPENAI_DEPLOYMENT_VERSION']

llm = AzureOpenAI(
    model= os.environ['OPENAI_MODEL_NAME'],
    deployment_name= os.environ['OPENAI_DEPLOYMENT_NAME'],
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

# You need to deploy your own embedding model as well as your own chat completion model
embed_model = AzureOpenAIEmbedding(
    model=os.environ['OPENAI_EMBEDDING_MODEL_NAME'],
    deployment_name=os.environ['OPENAI_EMBEDDING_DEPLOYMENT_NAME'],
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

In [3]:
from llama_index import set_global_service_context

service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
)

set_global_service_context(service_context)

In [4]:
# check if storage already exists
if not os.path.exists("./storage/"+os.environ['OPENAI_EMBEDDING_MODEL_NAME']):
    # load the documents and create the index
    documents = SimpleDirectoryReader(input_files=[r"/Users/jeana/Retrieval-Augmented-Generation/LlamaIndex/paul_graham_essay.txt"] #or just indicate the fullpath of the folder containing the data
                    ).load_data()
    index = VectorStoreIndex.from_documents(documents, service_context=service_context)
    # store it for later
    index.storage_context.persist("./storage/"+os.environ['OPENAI_EMBEDDING_MODEL_NAME'])
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir="./storage/"+os.environ['OPENAI_EMBEDDING_MODEL_NAME'])
    index = load_index_from_storage(storage_context)

In [6]:
documents = SimpleDirectoryReader(
    input_files=[r"/Users/jeana/Retrieval-Augmented-Generation/LlamaIndex/paul_graham_essay.txt",
                 r"/Users/jeana/Retrieval-Augmented-Generation/2311.12399.pdf"
                 ] 
    #or just indicate the fullpath of the folder containing the data
).load_data()
index = VectorStoreIndex.from_documents(documents, service_context=service_context)

In [7]:
query = "How does LLM help graph-related tasks?"
query_engine = index.as_query_engine()
answer = query_engine.query(query)

print(answer.get_formatted_sources())
print("query was:", query)
print("answer was:", answer)

> Source (Doc id: 05682d95-1ee9-43b2-ac3d-fa1493f2f5a3): A Survey of Graph Meets Large Language Model: Progress and Future Directions
Yuhan Li1∗,Zhixun Li...

> Source (Doc id: 7336d56a-9e08-43d4-a405-fd666a131a71): ,
2023d]. While their primary focus has been on text sequences,
there is a growing interest in en...
query was: How does LLM help graph-related tasks?
answer was: LLMs help graph-related tasks by enhancing the way we interact with graphs, particularly those containing nodes associated with text attributes. The integration of LLMs with graphs has demonstrated success in various downstream tasks across different graph domains. By combining LLMs with traditional GNNs, graph learning can be enhanced. LLMs provide stronger node features that capture both structural and contextual aspects, while GNNs excel at capturing structural information. This combination allows for a more comprehensive understanding of graph data, leveraging the robust textual understanding of LLMs and the s