## Create a vector store

In [None]:
from llama_index.readers.file import UnstructuredReader
import nltk
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.file import HTMLTagReader
import os
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.llms.openai import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from llama_index.core import SimpleDirectoryReader, Settings
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core import download_loader, ServiceContext, VectorStoreIndex, StorageContext
from pinecone import Pinecone
from dotenv import load_dotenv

load_dotenv()

# Download the required NLTK resource
nltk.download('averaged_perceptron_tagger')

dir_reader = SimpleDirectoryReader(
    input_dir="../data/",
    file_extractor={".html": UnstructuredReader()},
)
documents = dir_reader.load_data()
print(f"Number of documents: {len(documents)}")


node_parser = SimpleNodeParser.from_defaults(
    chunk_size=500,
    chunk_overlap=20
)

llm = OpenAI(model="gpt-3.5-turbo", temperature=0)
embed_model = OpenAIEmbeddings(
    model="text-embedding-3-small",
    embed_batch_size=100,
)

Settings.llm = OpenAI()
Settings.embed_model = OpenAIEmbeddings()

index_name = "langchain-doc-query-app"
pc = Pinecone(
    api_key=os.environ["PINECONE_API_KEY"],
)
pinecone_index = pc.Index(name=index_name)
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(
    documents=documents,
    storage_context=storage_context,
    show_progress=True,
)
print("finished ingesting...")

## Querry from created vector store

In [11]:
import os
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.pinecone import PineconeVectorStore
from pinecone import Pinecone
from llama_index.core.callbacks import LlamaDebugHandler, CallbackManager
from llama_index.core.settings import Settings
from llama_index.llms.openai import OpenAI
from langchain_openai import OpenAIEmbeddings

load_dotenv()

pc = Pinecone(
    api_key=os.environ["PINECONE_API_KEY"],
)

llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager(handlers=[llama_debug])

Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0)
Settings.embed_model = OpenAIEmbeddings(model="text-embedding-3-small")
Settings.callback_manager = callback_manager

pinecone_index = pc.Index(name="langchain-doc-query-app")
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    show_progress=True,
)

query = "What is agents?"
res = index.as_query_engine().query(query)
print(res)

**********
Trace: index_construction
**********
**********
Trace: query
    |_query -> 3.500983 seconds
      |_retrieve -> 1.801156 seconds
        |_embedding -> 0.368373 seconds
      |_synthesize -> 1.699074 seconds
        |_templating -> 2.1e-05 seconds
        |_llm -> 1.69062 seconds
**********
Agents are different models or systems that can be used for conversational purposes, such as ChatOpenAI, ChatAnthropic, ChatVertexAI, ChatCohere, ChatFireworks, ChatGroq, ChatMistralAI, and ChatOpenAI from TogetherAI.


In [12]:
query = "How to build a chat bot"
res = index.as_query_engine().query(query)
print(res)

**********
Trace: query
    |_query -> 3.743066 seconds
      |_retrieve -> 1.022354 seconds
        |_embedding -> 0.306898 seconds
      |_synthesize -> 2.720364 seconds
        |_templating -> 1.1e-05 seconds
        |_llm -> 2.714759 seconds
**********
To build a chatbot, you can start by creating a persistence layer around the model to store and retrieve conversation data. You can then enhance the chatbot's functionality by incorporating prompt templates. Prompt templates help structure user inputs for the chatbot to process effectively. Additionally, you can introduce system messages and customize responses based on user interactions. It's also beneficial to implement a message history feature to track and manage conversations with users effectively. By following these steps and gradually adding complexity to the chatbot's capabilities, you can create a more interactive and personalized conversational experience for users.
