In [1]:
from dotenv import load_dotenv
import os
import json
from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer


  from tqdm.autonotebook import tqdm


In [6]:
with open('rag_dataset.json', 'r', encoding='utf-8') as file:
    rag_dataset = json.load(file)
rag_dataset[0]

{'source_id': 1,
 'topic': 'Setup your Amazon business',
 'url': 'https://sell.amazon.com/sell#ready-to-sell',
 'title': 'How to sell on Amazon: a guide for beginners',
 'sub_title': 'Get an overview of how to create a selling account, list products, fulfill customer orders, and more. Learn how to sell—whether you’re new to online retail or just new to selling in the Amazon store.',
 'content': '# Sign up\n\nStart here\n\nHow to sell on Amazon: a guide for beginners\n\nGet an overview of how to create a selling account, list products, fulfill customer orders, and more. Learn how to sell—whether you’re new to online retail or just new to selling in the Amazon store.\n\nWelcome to Amazon selling—where customer obsession is key\n\nAmazon is one of the most trusted brands in the US. We work hard to provide every customer with what they need when they need it—and we’re excited every time a new seller joins our community and starts providing Amazon customers with an even wider selection of p

In [7]:
pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))

In [21]:
index_name = "rag-index-384"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws', 
            region='us-east-1'
        ) 
    )

In [22]:
# Connect to the index
index = pc.Index(index_name)

In [23]:
# Initialize the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

In [24]:
# Index the documents
for doc in rag_dataset:
    text = f"{doc['title']} {doc['sub_title']} {doc['content']}"
    vector = model.encode(text).tolist()
    index.upsert([(str(doc['source_id']), vector)])

print("Indexing complete")

Indexing complete


In [32]:
def fetch_relevant_documents(query, index, model, top_k=5):
    query_vector = model.encode(query).tolist()
    results = index.query(vector=query_vector, top_k=top_k)
    return [hit['id'] for hit in results['matches']]

In [40]:
import openai

openai.api_key = os.getenv('OPENAI_API_KEY')

def generate_response(query, relevant_doc_ids, documents):
    context = ""
    for doc_id in relevant_doc_ids:
        doc = next(doc for doc in documents if doc['source_id'] == int(doc_id))
        context += f"Title: {doc['title']}\nSub Title: {doc['sub_title']}\nContent: {doc['content']}\n\n"

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Answer the following question based on the context provided:\n\nContext:\n{context}\n\nQuestion: {query}"}
        ],
        max_tokens=150
    )
    return response.choices[0].message.content.strip()


In [41]:
def chatbot():
    print("Welcome to the RAG-powered chatbot! Ask your questions about Amazon Seller Central.")
    while True:
        query = input("You: ")
        if query.lower() in ['exit', 'quit']:
            print("Goodbye!")
            break

        relevant_doc_ids = fetch_relevant_documents(query, index, model)
        answer = generate_response(query, relevant_doc_ids, rag_dataset)
        print(f"Chatbot: {answer}")


In [42]:
chatbot()

Welcome to the RAG-powered chatbot! Ask your questions about Amazon Seller Central.


APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
