In [1]:
# !pip install openai langchain pinecone-client python-dotenv

## Initialize OpenAI and Pinecone

In [None]:
import os
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
import pinecone
# import warnings 


# Load API keys from the .env file
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
pinecone_env = os.getenv("PINECONE_ENV")


In [17]:
pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENVIRONMENT")
pinecone.delete_index("rag-chatbott")

AttributeError: init is no longer a top-level attribute of the pinecone package.

Please create an instance of the Pinecone class instead.

Example:

    import os
    from pinecone import Pinecone, ServerlessSpec

    pc = Pinecone(
        api_key=os.environ.get("PINECONE_API_KEY")
    )

    # Now do stuff
    if 'my_index' not in pc.list_indexes().names():
        pc.create_index(
            name='my_index', 
            dimension=1536, 
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='aws',
                region='us-west-2'
            )
        )



In [16]:
# Initialize Pinecone
pinecone.init(api_key="f8e9567f-3bbf-44ec-b527-261c9a461e88", environment="us-east-1")
index_name = "rag-chatbott"

# Initialize the embeddings model
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

AttributeError: init is no longer a top-level attribute of the pinecone package.

Please create an instance of the Pinecone class instead.

Example:

    import os
    from pinecone import Pinecone, ServerlessSpec

    pc = Pinecone(
        api_key=os.environ.get("PINECONE_API_KEY")
    )

    # Now do stuff
    if 'my_index' not in pc.list_indexes().names():
        pc.create_index(
            name='my_index', 
            dimension=1536, 
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='aws',
                region='us-west-2'
            )
        )



In [14]:
print(len(embeddings.embed_query("test input")))

1536


## Upload Data to Pinecone

In [13]:
from langchain.docstore.document import Document

# Load data from a text file
with open("dataset/transcription.txt", "r") as f:
    text = f.read()

# Convert text to documents
documents = [Document(page_content=text)]

# Upload documents to Pinecone
vectorstore = Pinecone.from_documents(documents, embeddings, index_name=index_name)

API call failed after 3 attempts: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 25 Nov 2024 23:03:26 GMT', 'Content-Type': 'application/json', 'Content-Length': '104', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '1337', 'x-pinecone-request-id': '4089083719579180519', 'x-envoy-upstream-service-time': '3', 'server': 'envoy'})
HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 3072","details":[]}



PineconeApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 25 Nov 2024 23:03:26 GMT', 'Content-Type': 'application/json', 'Content-Length': '104', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '1337', 'x-pinecone-request-id': '4089083719579180519', 'x-envoy-upstream-service-time': '3', 'server': 'envoy'})
HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 3072","details":[]}


## Create the Chatbot with LangChain

In [None]:
# Initialize the OpenAI model
llm = ChatOpenAI(temperature=0, openai_api_key=openai_api_key)

# Create a retriever from Pinecone
retriever = vectorstore.as_retriever()

# Build the RAG chatbot
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

## Interact with the Chat

In [None]:
def ask_question(question):
    response = qa_chain({"query": question})
    answer = response["result"]
    sources = response["source_documents"]

    print("Answer:")
    print(answer)

    print("\nData Sources:")
    for source in sources:
        print(source.page_content[:200])  # Display only the first 200 characters

# Example question
ask_question("What is a RAG Chatbot?")