In [2]:
import torch
import transformers

# import
import openai

from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.embeddings import OpenAIEmbedding

from langchain.llms import Ollama
import chromadb

from dotenv import dotenv_values

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
api_key = dotenv_values('.env')["OPENAI_API_KEY"]
openai.api_key = api_key

In [4]:
#Set the embedding

llm = Ollama(model="llama2")
#embed_model = OllamaEmbeddings(base_url="http://localhost:11434", model="llama2") #Local Llama 2 embedding model
embed_model = OpenAIEmbedding() #Using OpenAI's text-embed-002

In [5]:
COLLECTION = "aiprof"
PATH = './chroma'

In [6]:
# create client and a new collection
db = chromadb.PersistentClient(path=PATH)
chroma_collection = db.get_or_create_collection(COLLECTION)

In [7]:
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/kiranhk/Library/Caches/llama_index...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
# load documents
documents = SimpleDirectoryReader("data").load_data()

In [None]:
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, service_context=service_context
)

In [None]:
# load from disk
db2 = chromadb.PersistentClient(path=PATH)
chroma_collection = db2.get_or_create_collection(COLLECTION)

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
index2 = VectorStoreIndex.from_vector_store(
    vector_store,
    service_context=service_context,
)

In [None]:
query_engine = index2.as_query_engine()

In [None]:
resp = query_engine.query("What is The rational agent approach?")
print(resp.response)

In [None]:
resp = query_engine.query("Generate 3 concise questions from the PDFs")


In [None]:
resp.response.split('\n')