In [19]:
# Cell 1: Fixed Imports and Environment Setup
import os
from dotenv import load_dotenv
import panel as pn  # GUI

# Enable panel extensions for Jupyter
pn.extension('tabulator')

# Load environment variables
load_dotenv()

# Get Azure OpenAI configuration from environment variables
azure_openai_api_key = os.getenv("AZURE_OPENAI_KEY")
azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_api_version = os.getenv("AZURE_OPENAI_VERSION")
azure_openai_deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
azure_openai_embedding_deployment_name = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")  # Fixed typo

print("Libraries imported successfully!")

Libraries imported successfully!


In [20]:
# Cell 2: Fixed Vector Store Import
from langchain_chroma import Chroma  # Fixed import
from langchain_openai import AzureOpenAIEmbeddings

persist_directory = 'docs/chroma/'

# Initialize embeddings
embedding = AzureOpenAIEmbeddings(
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_api_key,
    api_version=azure_openai_api_version,
    azure_deployment=azure_openai_embedding_deployment_name  # Fixed parameter name
)

vectordb = Chroma(
    persist_directory=persist_directory,
    embedding_function=embedding
)

print(f"Vector DB loaded with {vectordb._collection.count()} documents")

Vector DB loaded with 208 documents


In [21]:
question = "What are major topics for this class?"
docs = vectordb.similarity_search(question,k=3)
len(docs)

3

In [22]:
# Cell 3: Fixed LLM Setup
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    azure_endpoint=azure_openai_endpoint,  # Added missing endpoint
    azure_deployment=azure_openai_deployment_name,
    api_version=azure_openai_api_version,
    api_key=azure_openai_api_key,
    temperature=0
)

print("LLM initialized successfully")

LLM initialized successfully


In [23]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
from langchain.chains import RetrievalQA
question = "Is probability a class topic?"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectordb.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})


result = qa_chain.invoke({"query": question})
result["result"]

'Yes, probability is a class topic, as the instructor assumes familiarity with basic probability and statistics and uses probabilistic interpretations in deriving learning algorithms. Refresher sessions on probability are also mentioned for those needing review. Thanks for asking!'

In [24]:
#Adding and Testing memory for a chatbot
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [25]:
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [10]:
question = "Is probability a class topic?"
result = qa({"question": question})

In [11]:
result['answer']

'Yes, probability is a class topic. The instructor mentions that the class assumes familiarity with basic probability and statistics, including concepts like random variables, expectation, and variance. Additionally, the probabilistic interpretation of linear regression is discussed, and probability is used to derive learning algorithms, such as classification algorithms.'

In [12]:
question = "why are those prerequesites needed?"
result = qa({"question": question})

In [13]:
result['answer']

'Familiarity with basic probability and statistics, including concepts like random variables, expectation, and variance, is required as prerequisites for the class because these concepts form the foundation for understanding many machine learning algorithms and techniques. Machine learning often involves working with data and making predictions or decisions under uncertainty, which inherently relies on statistical principles.\n\nFor example:\n\n1. **Random Variables**: Machine learning models often deal with data that can be thought of as random variables. Understanding random variables helps in modeling and analyzing data distributions.\n\n2. **Expectation**: The expectation (or mean) is a fundamental concept used in evaluating the performance of models, such as calculating expected error or loss.\n\n3. **Variance**: Variance is crucial for understanding the variability in data and model predictions. It plays a key role in concepts like bias-variance tradeoff, which is central to opti

In [26]:
#Create chatbot for the set of documents now
from langchain_openai import AzureOpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA,  ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_openai import AzureChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader

def load_db(file, chain_type, k):
    # load documents
    loader = PyPDFLoader(file)
    documents = loader.load()

    # split documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    docs = text_splitter.split_documents(documents)

    # define embedding
    embeddings = AzureOpenAIEmbeddings(
        azure_endpoint=azure_openai_endpoint,
        api_key=azure_openai_api_key,
        api_version=azure_openai_api_version,
        deployment=azure_openai_embedding_deployment_name
    )

    # create vector database from data
    db = DocArrayInMemorySearch.from_documents(docs, embeddings)

    # define retriever
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
    
    # create a chatbot chain. Memory is managed externally.
    qa = ConversationalRetrievalChain.from_llm(
        llm=AzureChatOpenAI(
            azure_endpoint=azure_openai_endpoint,
            api_key=azure_openai_api_key,
            api_version=azure_openai_api_version,
            deployment=azure_openai_deployment_name
        ),
        chain_type=chain_type,
        retriever=retriever,
        return_source_documents=True,
        return_generated_question=True,
    )
    return qa 


In [27]:
# Cell 4: Fixed Chatbot Class
from langchain_openai import AzureOpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import DocArrayInMemorySearch  # Fixed import
from langchain_community.document_loaders import PyPDFLoader  # Fixed import
from langchain.chains import ConversationalRetrievalChain
import panel as pn
import param

class cbfs(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    db_query = param.String("")
    db_response = param.List([])
    
    def __init__(self, **params):
        super(cbfs, self).__init__(**params)
        self.panels = []
        self.loaded_file = "docs/MachineLearning-Lecture01.pdf"
        self.qa = self.load_db(self.loaded_file, "stuff", 4)
        self.file_input = None  # Will be set later
        self.inp = None  # Will be set later
    
    def load_db(self, file, chain_type, k):
        """Load database from PDF file"""
        try:
            # Load documents
            loader = PyPDFLoader(file)
            documents = loader.load()
            
            # Split documents
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=1000, 
                chunk_overlap=150
            )
            docs = text_splitter.split_documents(documents)
            
            # Define embedding
            embeddings = AzureOpenAIEmbeddings(
                azure_endpoint=azure_openai_endpoint,
                api_key=azure_openai_api_key,
                api_version=azure_openai_api_version,
                azure_deployment=azure_openai_embedding_deployment_name
            )
            
            # Create vector database
            db = DocArrayInMemorySearch.from_documents(docs, embeddings)
            
            # Define retriever
            retriever = db.as_retriever(
                search_type="similarity", 
                search_kwargs={"k": k}
            )
            
            # Create chatbot chain
            qa = ConversationalRetrievalChain.from_llm(
                llm=AzureChatOpenAI(
                    azure_endpoint=azure_openai_endpoint,
                    api_key=azure_openai_api_key,
                    api_version=azure_openai_api_version,
                    azure_deployment=azure_openai_deployment_name,
                    temperature=0
                ),
                chain_type=chain_type,
                retriever=retriever,
                return_source_documents=True,
                return_generated_question=True,
            )
            return qa
        except Exception as e:
            print(f"Error loading database: {e}")
            return None
    
    def call_load_db(self, count):
        """Load new PDF file"""
        if count == 0 or self.file_input.value is None:
            return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
        else:
            try:
                self.file_input.save("temp.pdf")
                self.loaded_file = self.file_input.filename
                self.qa = self.load_db("temp.pdf", "stuff", 4)
                self.clr_history()
                return pn.pane.Markdown(f"✅ Loaded File: {self.loaded_file}")
            except Exception as e:
                return pn.pane.Markdown(f"❌ Error loading file: {e}")
    
    def convchain(self, query):
        """Process conversation query"""
        if not query:
            return pn.WidgetBox(
                pn.Row('User:', pn.pane.Markdown("", width=600)), 
                scroll=True
            )
        
        try:
            result = self.qa({
                "question": query, 
                "chat_history": self.chat_history
            })
            
            self.chat_history.extend([(query, result["answer"])])
            self.db_query = result.get("generated_question", "")
            self.db_response = result.get("source_documents", [])
            self.answer = result['answer']
            
            self.panels.extend([
                pn.Row('User:', pn.pane.Markdown(query, width=600)),
                pn.Row('ChatBot:', pn.pane.Markdown(
                    self.answer, 
                    width=600, 
                    styles={'background-color': '#F6F6F6'}
                ))
            ])
            
            if self.inp:
                self.inp.value = ''  # Clear input
                
            return pn.WidgetBox(*self.panels, scroll=True)
        except Exception as e:
            return pn.WidgetBox(
                pn.Row('Error:', pn.pane.Markdown(f"❌ {str(e)}", width=600)),
                scroll=True
            )
    
    @param.depends('db_query')
    def get_lquest(self):
        """Get last database query"""
        if not self.db_query:
            return pn.Column(
                pn.Row(pn.pane.Markdown(f"Last question to DB:", 
                      styles={'background-color': '#F6F6F6'})),
                pn.Row(pn.pane.Str("No DB accesses so far"))
            )
        return pn.Column(
            pn.Row(pn.pane.Markdown(f"DB query:", 
                  styles={'background-color': '#F6F6F6'})),
            pn.pane.Str(self.db_query)
        )
    
    @param.depends('db_response')
    def get_sources(self):
        """Get source documents"""
        if not self.db_response:
            return pn.pane.Markdown("No sources yet")
        
        rlist = [pn.Row(pn.pane.Markdown(f"Result of DB lookup:", 
                       styles={'background-color': '#F6F6F6'}))]
        for doc in self.db_response:
            rlist.append(pn.Row(pn.pane.Str(doc)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)
    
    @param.depends('convchain', 'clr_history')
    def get_chats(self):
        """Get chat history"""
        if not self.chat_history:
            return pn.WidgetBox(
                pn.Row(pn.pane.Str("No History Yet")), 
                width=600, 
                scroll=True
            )
        
        rlist = [pn.Row(pn.pane.Markdown(f"Current Chat History", 
                       styles={'background-color': '#F6F6F6'}))]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)
    
    def clr_history(self, count=0):
        """Clear chat history"""
        self.chat_history = []
        self.panels = []
        return

In [29]:
# Cell 5: Create Dashboard with Fixed References
# Create chatbot instance
cb = cbfs()

# Create widgets
file_input = pn.widgets.FileInput(accept='.pdf')
button_load = pn.widgets.Button(name="Load DB", button_type='primary')
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
inp = pn.widgets.TextInput(placeholder='Enter text here…')

# Set references in cb instance
cb.file_input = file_input
cb.inp = inp

# Bind callbacks
button_clearhistory.on_click(cb.clr_history)
bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp)

# Create image pane (make sure the image exists or comment out)
try:
    jpg_pane = pn.pane.Image('./img/convchain.jpg')
except:
    jpg_pane = pn.pane.Markdown("*Image not found*")

# Create tabs
tab1 = pn.Column(
    pn.Row(inp),
    pn.layout.Divider(),
    pn.panel(conversation, loading_indicator=True, height=300),
    pn.layout.Divider(),
)

tab2 = pn.Column(
    pn.panel(cb.get_lquest),
    pn.layout.Divider(),
    pn.panel(cb.get_sources),
)

tab3 = pn.Column(
    pn.panel(cb.get_chats),
    pn.layout.Divider(),
)

tab4 = pn.Column(
    pn.Row(file_input, button_load, bound_button_load),
    pn.Row(button_clearhistory, 
           pn.pane.Markdown("Clears chat history. Can use to start a new topic")),
    pn.layout.Divider(),
    pn.Row(jpg_pane.clone(width=400) if jpg_pane else pn.pane.Markdown(""))
)

# Create dashboard
dashboard = pn.Column(
    pn.Row(pn.pane.Markdown('# ChatWithYourData_Bot')),
    pn.Tabs(
        ('Conversation', tab1), 
        ('Database', tab2), 
        ('Chat History', tab3),
        ('Configure', tab4)
    )
)

# Display dashboard
dashboard.servable()
dashboard

BokehModel(combine_events=True, render_bundle={'docs_json': {'4c1bf69d-2d06-40ec-a3d9-b69a0378ba2b': {'version…