# Building a Knowledge Base 
Using a RAG pipeline and evaluating with LlamaIndex for question/answer pairs in e-commerce customer support

In [None]:
! pip install llama-index

In [22]:
# The nest_asyncio module enables the nesting of asynchronous functions within an already running async loop.
# This is necessary because Jupyter notebooks inherently operate in an asynchronous loop.
# By applying nest_asyncio, we can run additional async functions within this existing loop without conflicts.
import nest_asyncio

nest_asyncio.apply()
from llama_index.core.evaluation import generate_question_context_pairs
from llama_index.core import VectorStoreIndex, ServiceContext
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.evaluation import RetrieverEvaluator
from llama_index.llms.openai import OpenAI
from llama_index.readers.json import JSONReader

import os
import pandas as pd
from dotenv import load_dotenv
load_dotenv()

In [42]:
documents = JSONReader(is_jsonl = True).load_data("converted_conversations.jsonl")

# Define an LLM
llm = OpenAI(model="gpt-3.5-turbo")

# Build index with a chunk_size of 512
node_parser = SimpleNodeParser.from_defaults(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)
vector_index = VectorStoreIndex(nodes)

In [43]:
query_engine = vector_index.as_query_engine()

In [44]:
response_vector = query_engine.query("I have been waiting for my order for a long time. When will it get here?")
response_vector

Response(response='Your order is currently in transit and should arrive within the next two business days. If you do not receive it by then, it is recommended to reach out to investigate further.', source_nodes=[NodeWithScore(node=TextNode(id_='2cb24443-50bb-4df8-8c33-3033bc3252bc', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='40fe1259-6e76-4358-9b3f-0610069b7e02', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='6f220b567b742bd968381d3ea7f9c6786bfc92b7f4bd4da87973b9a4af773d17'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='c968a300-344a-459b-94ec-d7cc5644da85', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='6ac01b28e1596a6c89bf24c116ce73bbda3efd25539567de219849fea394fe4d'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='32a7f41d-8be1-42b5-a126-fe605cc22d4a', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='5926ece82fb4ef95e09e9