# Building a Knowledge Base 
Using a RAG pipeline and evaluating with LlamaIndex for question/answer pairs in e-commerce customer support

In [None]:
! pip install llama-index

In [3]:
# By applying nest_asyncio, we can run additional async functions within this existing loop without conflicts.
import nest_asyncio

nest_asyncio.apply()
from llama_index.core.evaluation import generate_question_context_pairs
from llama_index.core import VectorStoreIndex, ServiceContext
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.evaluation import RetrieverEvaluator
from llama_index.llms.openai import OpenAI
from llama_index.readers.json import JSONReader

import os
import pandas as pd
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
documents = JSONReader(is_jsonl = True).load_data("converted_conversations.jsonl")

# define an LLM (3.5 turbo)
llm = OpenAI(model="gpt-3.5-turbo")

# build index with a chunk_size of 512 -- do we need to update chunk size?
node_parser = SimpleNodeParser.from_defaults(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)
vector_index = VectorStoreIndex(nodes)

In [5]:
query_engine = vector_index.as_query_engine()

In [6]:
response_vector = query_engine.query("I have been waiting for my order for a long time. When will it get here?")
response_vector

Response(response='Your order is currently in transit and should arrive within the next two business days. If you do not receive it by then, please let us know, and we will investigate further.', source_nodes=[NodeWithScore(node=TextNode(id_='1ae6b375-575f-4423-908a-57aa79925682', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='3650e9ea-2dea-4993-ad60-a40dc9006496', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='6f220b567b742bd968381d3ea7f9c6786bfc92b7f4bd4da87973b9a4af773d17'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='bb8c6e9e-ab3a-4084-a4e6-1bb804db98cb', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='6ac01b28e1596a6c89bf24c116ce73bbda3efd25539567de219849fea394fe4d'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='7e013ae9-b4b9-442a-acb8-ad9488333af2', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='5926ece82fb4ef95e09e94e

### Example Chat Completion:
User: I have been waiting for my order for a long time. When will it get here?

Generated Response: Your order is currently in transit and should arrive within the next two business days. If you do not receive it by then, it is recommended to reach out to investigate further.

In [7]:
response_vector = query_engine.query("I have a problem with my toaster. It keeps making weird noses. I want a refund")
response_vector

Response(response="I can guide you through some basic troubleshooting steps that might help fix the issue with your toaster. If those steps don't resolve the problem, we can proceed with the return process for a refund. Would you like to try troubleshooting first?", source_nodes=[NodeWithScore(node=TextNode(id_='bb36f9dd-bbc3-4f82-a1b8-061f9b027e9c', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='e8ce9d57-d4ff-43f7-87a0-b4b35eebfd7a', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='4bbdba1b1fe75ff893e28d28338e420bb28a2366f332f540912ebf820c4e8d84'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='0046c21b-33ef-41a3-9d77-b7c69829ce69', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='fc8f41eea2364820d568942167652f1d11f935877d4ec4febf45c646b063a9ab'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='5d9edefd-300c-4ef0-883c-482b68f2fea3', node_

In [8]:
response_vector = query_engine.query("Hello!")
response_vector

Response(response='Hello! How can I assist you today?', source_nodes=[NodeWithScore(node=TextNode(id_='a460a7b0-c45f-4d24-9551-1675cab8bd76', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='b1a45f3c-9112-44f7-9381-9463e7568533', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='f71160ab841ee51bfbb4ed7b4983765eb0e040189609bc005c6fd57cb3840fde'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='95788bf8-ea27-4c68-930c-a57fb2a122cb', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='954dcdc5ea5c958b1e31566164dc9ee9faf8d9ed92f9be58ee5f9d1949757f67'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='0c5016d7-59af-4bf1-82bc-245a9d75cf20', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='957951677e61f5fec37d585d3434ecf6665c4b2ea21655ae8978aa8a119e656f')}, text='Please bear with me for a moment."\n"role": "assistant",\n"content": "Thank you for wait