In [1]:
# Jupyter Notebook Setup for RAG and LangChain Chatbot

# ## 1. Setup Environment
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
INDEX_NAME = os.getenv("INDEX_NAME")

print("Environment variables loaded successfully.")

# ## 2. Install Required Libraries
# Make sure Pinecone and other dependencies are installed
# Already done via pip command in terminal

# ## 3. Data Ingestion Script


Environment variables loaded successfully.


In [2]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone as LangchainPinecone
from pinecone import Pinecone

# Initialize the Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Connect to the index
index = pc.Index(INDEX_NAME)

# Define the PDF file path
pdf_file_path = "data/Richtlinien Praktikum IWI-AS.pdf"  # Update this with the correct path to your PDF file

# Load PDF content
loader = PyPDFLoader(pdf_file_path)
documents = loader.load()

# Split the document into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_documents(documents)
print(f"Created {len(texts)} chunks")

# Create embeddings using OpenAI
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

# Store the embeddings in Pinecone using Langchain's Pinecone wrapper
vectorstore = LangchainPinecone.from_documents(texts, embeddings, index_name=INDEX_NAME)

print("Data ingestion completed and embeddings stored in Pinecone.")


  from tqdm.autonotebook import tqdm


Created 2 chunks


  warn_deprecated(


Data ingestion completed and embeddings stored in Pinecone.


In [3]:

# ## 4. Build a Stateless Chatbot

from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

# Initialize the OpenAI chat model
chat = ChatOpenAI(verbose=True, temperature=0, model_name="gpt-3.5-turbo")

# Create a RetrievalQA chain
qa = RetrievalQA.from_chain_type(llm=chat, chain_type="stuff", retriever=vectorstore.as_retriever())

# Ask a question
response = qa.invoke("What are the applications of generative AI according to the paper? Please number each application.")
print("Response 1:", response)

response = qa.invoke("Can you please elaborate more on application number 2?")
print("Response 2:", response)


  warn_deprecated(


Response 1: {'query': 'What are the applications of generative AI according to the paper? Please number each application.', 'result': "I don't have information on generative AI applications from the provided text."}
Response 2: {'query': 'Can you please elaborate more on application number 2?', 'result': "I'm sorry, but there is no specific application number mentioned in the provided text. If you can provide more context or details, I would be happy to help further."}


In [4]:
# ## 5. Build a Stateful Chatbot

from langchain.chains import ConversationalRetrievalChain

# Initialize chat history
chat_history = []

# Create a ConversationalRetrievalChain
qa = ConversationalRetrievalChain.from_llm(
    llm=chat, 
    retriever=vectorstore.as_retriever()
)

# Ask a question and store the chat history
res = qa({"question": "What are the applications of generative AI according to the paper? Please number each application.", "chat_history": chat_history})
print("Response 1:", res["answer"])

# Store the question-answer pair in chat history
history = (res["question"], res["answer"])
chat_history.append(history)

# Follow-up question
res = qa({"question": "Can you please elaborate more on application number 2?", "chat_history": chat_history})
print("Response 2:", res["answer"])

# Store the follow-up question and answer in chat history
history = (res["question"], res["answer"])
chat_history.append(history)


  warn_deprecated(


Response 1: I don't have information on the applications of generative AI from the provided text.
Response 2: I don't have that information.


In [5]:
# Define an interactive function to ask questions
def ask_question(qa_chain, chat_history):
    while True:
        # Get user input
        question = input("You: ")
        if question.lower() in ['exit', 'quit']:
            print("Ending conversation.")
            break

        # Pass the question to the ConversationalRetrievalChain
        response = qa_chain({"question": question, "chat_history": chat_history})
        
        # Print the chatbot's response
        print("Chatbot:", response["answer"])
        
        # Store the question-answer pair in chat history
        chat_history.append((question, response["answer"]))

In [6]:
# Initialize chat history
chat_history = []

# Create the ConversationalRetrievalChain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=chat, 
    retriever=vectorstore.as_retriever()
)

# Start the interactive session
ask_question(qa_chain, chat_history)