In [37]:
# Import necessary libraries
import os
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
from langchain.vectorstores import Pinecone as LangchainPinecone
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import pipeline

In [26]:
# Load environment variables from the .env file
load_dotenv(dotenv_path='../.env')

True

In [27]:
# Retrieve Pinecone API details from environment variables
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENV = os.getenv("PINECONE_ENV")
INDEX_NAME = "semantic-search-fast"  # Replace with your desired index name

In [28]:
# Initialize Pinecone using the new API
pc = Pinecone(
    api_key=PINECONE_API_KEY,
    serverless_spec=ServerlessSpec(
        region=PINECONE_ENV,
        cloud="aws"
    )
)

In [5]:
# # Create the index if it doesn't exist
# if INDEX_NAME not in pc.list_indexes():
#     pc.create_index(
#         name=INDEX_NAME,
#         dimension=768  # Match the dimensionality of your embeddings
#     )

In [29]:
# Connect to the Pinecone index
index = pc.Index(INDEX_NAME)

In [30]:
# Load embeddings
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

In [31]:
# Example documents to index
documents = [
    {"text": "LangChain is a framework for developing applications powered by LLMs."},
    {"text": "LLaMA is a family of large language models developed by Meta."},
    {"text": "Pinecone is a vector database for machine learning applications."}
]

In [32]:
# Add documents to Pinecone index
texts = [doc["text"] for doc in documents]
ids = [str(i) for i in range(len(documents))]
vectors = embeddings.embed_documents(texts)

for vector, id_ in zip(vectors, ids):
    index.upsert([(id_, vector)])

In [33]:
# Load the LLaMA model
model_name = "EleutherAI/gpt-neo-125M"  # Replace with your model's Hugging Face path
llama_pipeline = pipeline("text-generation", model=model_name, device=-1)
llm = HuggingFacePipeline(pipeline=llama_pipeline)


Device set to use cpu


In [38]:
# Build the Pinecone retriever
retriever = Pinecone(
    index=index,  # Use the Pinecone index
    embedding=embeddings,  # Pass the embeddings object
    text_key="text"  # Specify the key in your documents that contains the text
)

# Build the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,  # Use the retriever directly since Pinecone is already a retriever
    chain_type="stuff"
)


In [41]:
# # Test the QA system using the updated invoke method
# query = "What is LangChain?"
# response = qa_chain.invoke({"query": query})

# print(f"Question: {query}")
# print(f"Answer: {response}")


In [42]:
#### Make sure we turn off pinecone serverless