## This is a basic notebook has  a ChatBot code, a simple UI and a logging file for each dialogue session. It also includes a basic Retrieval Augmented Generation part that uses a file you will upload. 


In [1]:
# adding the required libraries, including tokenisation, facebook ai, pdf analysis
%%capture
!pip install langchain-openai openai faiss-cpu langchain-community tiktoken pdfplumber

In [2]:
# Import necessary libraries
import os
import pdfplumber #for data extraction from the PDf
import tiktoken #OpenAI's lirary for tokenising text
import openai #Official OpenAI python library
import time
import textwrap #for interface
import ipywidgets as widgets #for interface
import IPython                                                          #Interactive Python Shell
from IPython.display import display, Markdown

In [3]:
# LangChain imports
from langchain.embeddings import OpenAIEmbeddings                       #test to numerical vectors - embeddings
from langchain.vectorstores import FAISS                                #similarity search for vectors
from langchain.text_splitter import RecursiveCharacterTextSplitter      #splits large text chunks into smaller
from langchain_openai import ChatOpenAI                                 #imports OpenAI chat models
from langchain.chains import RetrievalQA                                #pre-built chain for document retrieval and question answering
from langchain.prompts import PromptTemplate                            #PromptTemplates for LangChain - Persona, Task, Communication
from langchain.memory import ConversationBufferMemory                   #Memory Handling for LangChain
from langchain.schema.runnable import RunnableMap, RunnableSequence     #Schema mapping and sequence for LangChain

In [4]:
#OpenAI Imports
from openai.types import Completion, CompletionChoice, CompletionUsage

In [5]:
# Set up OpenAI API key
import os
os.environ["OPENAI_API_KEY"] = ***insert your API KEY here***

In [6]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    """Extract text content from a PDF file."""
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            extracted = page.extract_text()
            if extracted:  # Avoid NoneType errors
                text += extracted + "\n"
    return text

In [None]:
# Upload PDF file
from google.colab import files
print("Please upload your PDF document:")
uploaded = files.upload()

In [None]:
# Extract text from the first uploaded PDF
pdf_filename = list(uploaded.keys())[0]
pdf_path = f"/content/{pdf_filename}"
pdf_text = extract_text_from_pdf(pdf_path)

In [10]:
# Split the document into chunks for embedding
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = text_splitter.create_documents([pdf_text])

# Create vector embeddings and store in FAISS
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(documents, embeddings)
retriever = vectorstore.as_retriever()

# Initialize the chat model
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)

In [11]:
# Create the RAG (Retrieval Augmented Generation) chain
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

In [12]:
# Define the prompt template for our marketing ChatBot
prompt_template = PromptTemplate.from_template("""
<Persona>
You are a very technical python coder with expertise in geometry and topology, and in particular topologicPy and Industrial Foundation Classes strategies.
</Persona>

<Task>
The conversation is about helping junior python coders to develop code using Topologicpy API
Please use few-shot strategy to benchmark your responses when the questions are difficult.
Use the retrieved context when it's relevant to answer the user's question.
Communicate sources for your answers when needed.
</Task>

<Communication>
Respond in detailed python codes and explanations.
Keep the dialogue on track.
Never reveal you are an AI or LLM.
If questioned further provide an explanation of at least one paragraph with ten sentences as an explanation of your thinking.
Ensure your answers are data-driven when possible, drawing from the context provided.
</Communication>

<Context>
{context}
</Context>

Conversation history:
{history}

User: {user_input}
ChatBot:
""")

In [None]:
# Initialize memory for conversation history
memory = ConversationBufferMemory(return_messages=True, max_token_limit=500)

# Create a function to process user input using both RAG and the conversational prompt
def process_user_input(user_input, history):
    # First, use RAG to retrieve relevant context
    rag_response = rag_chain({"query": user_input})
    relevant_context = rag_response.get("result", "")

    # Format the conversation history
    formatted_history = "\n".join([f"User: {h['user_input']}\nChatBot: {h['assistant']}" for h in history])

    # Use the prompt template with the retrieved context
    response = llm.invoke(prompt_template.format(
        context=relevant_context,
        history=formatted_history,
        user_input=user_input
    ))

    return response.content

# UI Elements
chat_output = widgets.Output()
user_input_box = widgets.Textarea(
    placeholder="Enter your message here...",
    description="User:",
    style={'description_width': 'initial'},
    layout=widgets.Layout(width="80%", height="50px")
)
end_chat_button = widgets.Button(description="End Chat Session", button_style="danger")

# Display UI Elements
display(chat_output, user_input_box, end_chat_button)

# Initialize conversation history
history = []

# Open log file in append mode
log_filename = "rag_chat_history.txt"
log_file = open(log_filename, "a", encoding="utf-8")

# Show initial message
with chat_output:
    print("Welcome! I'm your marketing strategy assistant. How can I help you today?")

def handle_input():
    """Handles user input when Enter is pressed."""
    user_input = user_input_box.value.strip()

    if not user_input:
        return  # Ignore empty input

    if user_input.lower() in ["exit", "quit"]:
        stop_chat()
        return

    # Display user input
    with chat_output:
        print(f"User: {user_input}")

    # Process response using combined approach
    try:
        response_text = process_user_input(user_input, history)
        wrapped_response = textwrap.fill(response_text, width=120)

        with chat_output:
            print(f"ChatBot: {wrapped_response}")

        # Update conversation history
        history.append({"user_input": user_input, "assistant": response_text})

        # Log conversation to file
        log_file.write(f"User: {user_input}\n")
        log_file.write(f"ChatBot: {response_text}\n\n")
        log_file.flush()  # Ensure data is written immediately

    except Exception as e:
        with chat_output:
            print(f"Error: {e}")

    # Clear input box for next message
    user_input_box.value = ""

def handle_keypress(change):
    """Detect Enter and submit input."""
    if change["name"] == "value" and change["new"].endswith("\n"):  # Detect newlines
        handle_input()

def stop_chat(_=None):
    """Ends the chat, saves dialogue history and closes the log file"""
    global log_file
    log_file.close()  # Close file properly

    with chat_output:
        print("\nGoodbye! Chat history saved to 'rag_chat_history.txt'.")

    disable_input()

def disable_input():
    """Disables input box and chat button after chat ends."""
    user_input_box.close()
    end_chat_button.disabled = True

# Bind buttons and input events
end_chat_button.on_click(stop_chat)

# Attach event listener for Enter
user_input_box.observe(handle_keypress, names="value")

# Function to analyze token usage (for debugging/optimization)
def count_tokens(text, model="gpt-4o-mini"):
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))

more documentation here:
https://python.langchain.com/docs/integrations/chat/openai/

### Hint: in LangChain you can have RAG via RetrievalQA and use FAISS