# Building a Knowledge Base 
Using a RAG pipeline and evaluating with LlamaIndex for question/answer pairs in e-commerce customer support

In [None]:
! pip install llama-index

In [3]:
# By applying nest_asyncio, we can run additional async functions within this existing loop without conflicts.
import nest_asyncio

nest_asyncio.apply()
from llama_index.core.evaluation import generate_question_context_pairs
from llama_index.core import VectorStoreIndex, ServiceContext
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.evaluation import RetrieverEvaluator
from llama_index.llms.openai import OpenAI
from llama_index.readers.json import JSONReader

import os
import pandas as pd
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
documents = JSONReader(is_jsonl = True).load_data("converted_conversations.jsonl")

# define an LLM (3.5 turbo)
llm = OpenAI(model="gpt-3.5-turbo")

# build index with a chunk_size of 512 -- do we need to update chunk size?
node_parser = SimpleNodeParser.from_defaults(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)
vector_index = VectorStoreIndex(nodes)

In [5]:
query_engine = vector_index.as_query_engine()
query_engine

<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x1378fb260>

In [9]:
response_vector = query_engine.query("I have been waiting for my order for a long time. When will it get here?")
response_vector.response


'Your order is currently in transit and should arrive within the next two business days. If you do not receive it by then, please reach out to us so we can investigate further.'

### Example Chat Completion:
User: I have been waiting for my order for a long time. When will it get here?

Generated Response: Your order is currently in transit and should arrive within the next two business days. If you do not receive it by then, it is recommended to reach out to investigate further.

In [7]:
response_vector = query_engine.query("I have a problem with my toaster. It keeps making weird noses. I want a refund")
response_vector

Response(response="I can guide you through some basic troubleshooting steps that might help fix the issue with your toaster. If those steps don't resolve the problem, we can proceed with the return process. Would you be willing to try some troubleshooting first?", source_nodes=[NodeWithScore(node=TextNode(id_='48282860-1807-4157-b5a2-871e612dad40', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='c3a20af8-b33b-45fe-bbdb-e741ec7a5375', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='4bbdba1b1fe75ff893e28d28338e420bb28a2366f332f540912ebf820c4e8d84'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='b6839217-c8e3-41be-b434-37a54445f34c', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='fc8f41eea2364820d568942167652f1d11f935877d4ec4febf45c646b063a9ab'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='1d2d1cb8-337b-4836-a9fb-efe6df559c22', node_ty

In [8]:
response_vector = query_engine.query("Hello!")
response_vector

Response(response='Good day!', source_nodes=[NodeWithScore(node=TextNode(id_='66ed6277-bb77-4dfd-88f2-7eb74c5630bd', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='82f6804a-4715-40d4-8126-95aa83d57d0c', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='f71160ab841ee51bfbb4ed7b4983765eb0e040189609bc005c6fd57cb3840fde'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='44408591-825e-4219-a902-18bb648a1b33', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='954dcdc5ea5c958b1e31566164dc9ee9faf8d9ed92f9be58ee5f9d1949757f67'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='8530b2db-4477-4156-b3dc-bdcecb4d0b59', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='957951677e61f5fec37d585d3434ecf6665c4b2ea21655ae8978aa8a119e656f')}, text='Please bear with me for a moment."\n"role": "assistant",\n"content": "Thank you for waiting. I have checked with 