In [1]:
# TODO
import os 
import openai 
from dotenv import load_dotenv 

load_dotenv() 

openai.api_key = os.getenv("OPENAI_API_KEY") 

In [4]:
# Constants 
llm_name = "gpt-3.5-turbo" 
index_name = "deeplearningai-langchain"

In [5]:
from langchain_pinecone import PineconeVectorStore 
from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings() 
vectorstore = PineconeVectorStore(embedding=embedding, index_name=index_name)

In [6]:
question = "What are major topics for this class?" 
docs = vectorstore.similarity_search(question, k=3) 
docs

[Document(metadata={'page': 5.0, 'source': 'data/MachineLearning-Lecture01.pdf'}, page_content="So all right, online resources. The class has a home page, so it's in on the handouts. I \nwon't write on the chalkboard — http:// cs229.stanford.edu. And so when there are \nhomework assignments or things like that, we  usually won't sort of — in the mission of \nsaving trees, we will usually not give out many handouts in class. So homework \nassignments, homework solutions will be posted online at the course home page.  \nAs far as this class, I've also written, a nd I guess I've also revised every year a set of \nfairly detailed lecture notes that cover the te chnical content of this  class. And so if you \nvisit the course homepage, you'll also find the detailed lecture notes that go over in detail \nall the math and equations and so on  that I'll be doing in class.  \nThere's also a newsgroup, su.class.cs229, also written on the handout. This is a \nnewsgroup that's sort of a forum for 

In [7]:
from langchain_openai import ChatOpenAI 
llm = ChatOpenAI(model_name=llm_name, temperature=0) 
llm.predict("Hello world!")

'Hello! How can I assist you today?'

In [9]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
from langchain.chains import RetrievalQA
question = "Is probability a class topic?"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectorstore.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})


result = qa_chain({"query": question})
result["result"]

'Yes, probability is a class topic. Thanks for asking!'

### Memory

In [11]:
# Implement Memory 
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

### ConversationalRetrievalChain

In [12]:
from langchain.chains import ConversationalRetrievalChain 
retriever = vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm, 
    retriever=retriever,
    memory=memory
)

In [13]:
question = "Is probability a class topic?"
result = qa({"question": question})
result

{'question': 'Is probability a class topic?',
 'chat_history': [HumanMessage(content='Is probability a class topic?'),
  AIMessage(content='Yes, probability is a class topic in the course being discussed. The instructor assumes familiarity with basic probability and statistics, so it will likely be covered in the class.')],
 'answer': 'Yes, probability is a class topic in the course being discussed. The instructor assumes familiarity with basic probability and statistics, so it will likely be covered in the class.'}

In [14]:
result['answer']

'Yes, probability is a class topic in the course being discussed. The instructor assumes familiarity with basic probability and statistics, so it will likely be covered in the class.'

In [15]:
question = "Why are those prerequisites needed?"

result = qa({"question": question})
result

{'question': 'Why are those prerequisites needed?',
 'chat_history': [HumanMessage(content='Is probability a class topic?'),
  AIMessage(content='Yes, probability is a class topic in the course being discussed. The instructor assumes familiarity with basic probability and statistics, so it will likely be covered in the class.'),
  HumanMessage(content='Why are those prerequisites needed?'),
  AIMessage(content='The prerequisites for the course being discussed include familiarity with basic probability and statistics, as well as basic linear algebra. For probability and statistics, knowledge of random variables, expectation, variance, and basic concepts is assumed. For linear algebra, understanding matrices, vectors, matrix multiplication, matrix inverse, and eigenvectors is expected. Undergraduate courses like Stat 116 for statistics and Math 51, 103, Math 113, or CS205 for linear algebra are considered sufficient preparation.')],
 'answer': 'The prerequisites for the course being disc

In [16]:
result['answer']

'The prerequisites for the course being discussed include familiarity with basic probability and statistics, as well as basic linear algebra. For probability and statistics, knowledge of random variables, expectation, variance, and basic concepts is assumed. For linear algebra, understanding matrices, vectors, matrix multiplication, matrix inverse, and eigenvectors is expected. Undergraduate courses like Stat 116 for statistics and Math 51, 103, Math 113, or CS205 for linear algebra are considered sufficient preparation.'

### Create a chatbot that works on your documents

In [26]:
# pip install docarray jupyter-bokeh

In [27]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch 
from langchain.document_loaders import TextLoader, PyPDFLoader
from langchain.chains import RetrievalQA, ConversationalRetrievalChain 
from langchain.memory import ConversationBufferMemory 

In [28]:
def load_db(file: str, chain_type, k): 
    # Load documents
    loader = PyPDFLoader(file) 
    documents = loader.load() 

    # Split documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    docs = text_splitter.split_documents(documents)

    # Define embeddings 
    embeddings = OpenAIEmbeddings()

    # Create vector database from data 
    db = DocArrayInMemorySearch.from_documents(docs, embedding=embeddings) 

    # Define retriever 
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k}) 

    # Create a chatbot chain. Memory is managed separately 
    qa = ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(model_name=llm_name, temperature=0), 
        chain_type=chain_type, 
        retriever=retriever, 
        return_source_documents=True, 
        return_generated_question=True
    )
    return qa
    


In [33]:
import panel as pn
import param

class cbfs(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    db_query  = param.String("")
    db_response = param.List([])
    
    def __init__(self,  **params):
        super(cbfs, self).__init__( **params)
        self.panels = []
        self.loaded_file = "data/MachineLearning-Lecture01.pdf"
        self.qa = load_db(self.loaded_file,"stuff", 4)
    
    def call_load_db(self, count):
        if count == 0 or file_input.value is None:  # init or no file specified :
            return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
        else:
            file_input.save("temp.pdf")  # local copy
            self.loaded_file = file_input.filename
            button_load.button_style="outline"
            self.qa = load_db("temp.pdf", "stuff", 4)
            button_load.button_style="solid"
        self.clr_history()
        return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")

    def convchain(self, query):
        if not query:
            return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
        result = self.qa({"question": query, "chat_history": self.chat_history})
        self.chat_history.extend([(query, result["answer"])])
        self.db_query = result["generated_question"]
        self.db_response = result["source_documents"]
        self.answer = result['answer'] 
        self.panels.extend([
            pn.Row('User:', pn.pane.Markdown(query, width=600)),
            pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
        ])
        inp.value = ''  #clears loading indicator when cleared
        return pn.WidgetBox(*self.panels,scroll=True)

    @param.depends('db_query ', )
    def get_lquest(self):
        if not self.db_query :
            return pn.Column(
                pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})),
                pn.Row(pn.pane.Str("no DB accesses so far"))
            )
        return pn.Column(
            pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),
            pn.pane.Str(self.db_query )
        )

    @param.depends('db_response', )
    def get_sources(self):
        if not self.db_response:
            return 
        rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))]
        for doc in self.db_response:
            rlist.append(pn.Row(pn.pane.Str(doc)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    @param.depends('convchain', 'clr_history') 
    def get_chats(self):
        if not self.chat_history:
            return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
        rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    def clr_history(self,count=0):
        self.chat_history = []
        return 

In [34]:
cb = cbfs()

file_input = pn.widgets.FileInput(accept='.pdf')
button_load = pn.widgets.Button(name="Load DB", button_type='primary')
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
button_clearhistory.on_click(cb.clr_history)
inp = pn.widgets.TextInput( placeholder='Enter text here…')

bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp) 

jpg_pane = pn.pane.Image( './img/convchain.jpg')

tab1 = pn.Column(
    pn.Row(inp),
    pn.layout.Divider(),
    pn.panel(conversation,  loading_indicator=True, height=300),
    pn.layout.Divider(),
)
tab2= pn.Column(
    pn.panel(cb.get_lquest),
    pn.layout.Divider(),
    pn.panel(cb.get_sources ),
)
tab3= pn.Column(
    pn.panel(cb.get_chats),
    pn.layout.Divider(),
)
tab4=pn.Column(
    pn.Row( file_input, button_load, bound_button_load),
    pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
    pn.layout.Divider(),
    pn.Row(jpg_pane.clone(width=400))
)
dashboard = pn.Column(
    pn.Row(pn.pane.Markdown('# ChatWithYourData_Bot')),
    pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
)
pn.extension()
dashboard

BokehModel(combine_events=True, render_bundle={'docs_json': {'7242e05c-8776-43ef-9214-80aba4a61bfe': {'version…

  pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
  pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
  pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
  pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
