In [1]:
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
import logging
import sys
from collections.abc import Iterator
from sqlalchemy import make_url, create_engine, MetaData
from llama_index import ServiceContext, SimpleDirectoryReader, StorageContext
from llama_index.indices.vector_store import VectorStoreIndex
from llama_index.vector_stores import PGVectorStore
import textwrap
import openai
from llama_index import download_loader
import os

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [2]:
# constants
DEFAULT_LLM_MODEL="gpt-4"
DEFAULT_LLM_DEPLOYMENT_NAME="ailab-llm"
DEFAULT_EMBED_MODEL="text-embedding-ada-002"
DEFAULT_EMBED_DEPLOYMENT_NAME="ada"


In [4]:
api_key = os.getenv("API_KEY")
azure_endpoint = os.getenv("AZURE_ENDPOINT")
api_version = os.getenv("API_VERSION")

# create llm and embedding model apis
llm = AzureOpenAI(
    model=os.getenv("LLM_MODEL", DEFAULT_LLM_MODEL),
    deployment_name=os.getenv("LLM_DEPLOYMENT_NAME", DEFAULT_LLM_DEPLOYMENT_NAME),
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

# You need to deploy your own embedding model as well as your own chat completion model
embed_model = AzureOpenAIEmbedding(
    model=os.getenv("EMBED_MODEL", DEFAULT_EMBED_MODEL),
    deployment_name=os.getenv("EMBED_DEPLOYMENT_NAME", DEFAULT_EMBED_DEPLOYMENT_NAME),
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

In [5]:
# create service with llm and embedding model
from llama_index import set_global_service_context

service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
)
set_global_service_context(service_context)

In [None]:
# load document from directory if any. In this notebook we only connect to postgresql database as documents so it is commented out.
# This documents can add .csv, .pdf, .docx and more https://docs.llamaindex.ai/en/stable/examples/data_connectors/simple_directory_reader.html

# documents = SimpleDirectoryReader("./data/paul_graham").load_data()

In [None]:
# Connect to postgresql database
DatabaseReader = download_loader('DatabaseReader')
reader = DatabaseReader(
    scheme = os.getenv("DB_SCHEME"), # Database Scheme
    host = os.getenv("DB_HOST"), # Database Host
    port = os.getenv("DB_PORT"), # Database Port
    user = os.getenv("DB_USER"), # Database User
    password = os.getenv("DB_PASSWORD"), # Database Password
    dbname = os.getenv("DB_NAME"), # Database Name
)
# Retrieve specified data from connected postgresql database 
query = f"""
SELECT
    text_content
FROM 
    louis_v005.chunk
WHERE
    text_content IS NOT NULL;
"""
documents = reader.load_data(query=query)

In [None]:
# create index for given documents
service_context = ServiceContext.from_defaults(chunk_size=512) # control batch size 
index = VectorStoreIndex.from_documents(
    documents, service_context=service_context, show_progress=True
)

In [None]:
# make a query
query = "Hello, how do I import my cat to Canada from France?"
query_engine = index.as_query_engine()
answer = query_engine.query(query)

print(answer.get_formatted_sources())
print("query was:", query)
print("answer was:", answer)