In [1]:
# Import libraries
import os
import chromadb
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.embeddings import OpenAIEmbeddings

# Ollama
from langchain_community.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.embeddings import OllamaEmbeddings
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader

from langchain.agents import Tool, AgentExecutor
# from langchain.agents import create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [2]:
# Set the API key when using OpenAI embeddings
# os.environ["OPENAI_API_KEY"] = "sk-..."

# Load the  embeddings and the model
For this notebook i will use the Mistral 7B model and the Ollama embeddings. You can also use the OpenAI embeddings.

In [2]:
# Ollama embeddings
embeddings_open = OllamaEmbeddings(model = "mistral")

# OpenAI embeddings
# embedding = OpenAIEmbeddings()

llm_open = Ollama(
    model = "mistral",
    # model = 'Llama2',
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]))

# Load the data 
Load the data from the directory and split it into chunks. 

In [3]:
from langchain_community.document_loaders import DirectoryLoader

# Print number of txt files in directory
# loader = DirectoryLoader('../data/langchain_doc_small', glob="./*.txt")
loader = DirectoryLoader('/Users/davenull311/Projects/GenAI/data/langchain_doc_small', glob = "./*.txt")


# load pdfs from directory and print number of pdfs
# loader = PyPDFLoader('../data/PDFs/How_to_build_your_carreer_in_AI.pdf')
# loader = PyPDFLoader('/Users/davenull311/Projects/GenAI/data/PDFs/How_to_build_your_carreer_in_AI.pdf')


# load another file directly
# loader = DirectoryLoader('/your/path/to/file.txt')

doc = loader.load ( )
len(doc)

41

# Print the first document

In [4]:
print(doc[0])

page_content='Aleph Alpha\n\nAleph Alpha# The Luminous series is a family of large language models. This example goes over how to use LangChain to interact with Aleph Alpha models\n\n# Install the package\n\n!pip install aleph\n\nalpha\n\nclient\n\n# create a new token: https://docs.aleph-alpha.com/docs/account/#create-a-new-token\n\nfrom getpass import getpass\n\nALEPH_ALPHA_API_KEY = getpass()\n\nfrom langchain.llms import AlephAlpha from langchain import PromptTemplate, LLMChain\n\ntemplate = """Q: {question}\n\nA:"""\n\nprompt = PromptTemplate(template=template, input_variables=["question"])\n\nllm = AlephAlpha(model="luminous\n\nextended", maximum_tokens=20, stop_sequences=["Q:"], aleph_alpha_api_key=ALEPH_ALPHA_API_KEY)\n\nllm_chain = LLMChain(prompt=prompt, llm=llm)\n\nquestion = "What is AI?"\n\nllm_chain.run(question)\n\n\' Artificial Intelligence (AI) is the simulation of human intelligence processes by machines, especially computer systems.\\n\'\n\nprevious\n\nAI21\n\nnext\n

# Split the text into chunks

In [5]:
# Splitting the text into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500, 
    chunk_overlap = 50,
    is_separator_regex = False)
texts = text_splitter.split_documents(doc)

# Count the number of chunks

In [6]:
len(texts)

377

# Print the first chunk

In [7]:
texts[0]

Document(page_content='Aleph Alpha\n\nAleph Alpha# The Luminous series is a family of large language models. This example goes over how to use LangChain to interact with Aleph Alpha models\n\n# Install the package\n\n!pip install aleph\n\nalpha\n\nclient\n\n# create a new token: https://docs.aleph-alpha.com/docs/account/#create-a-new-token\n\nfrom getpass import getpass\n\nALEPH_ALPHA_API_KEY = getpass()\n\nfrom langchain.llms import AlephAlpha from langchain import PromptTemplate, LLMChain\n\ntemplate = """Q: {question}\n\nA:"""', metadata={'source': '/Users/davenull311/Projects/GenAI/data/langchain_doc_small/20_Aleph_Alpha_Aleph.txt'})

# Embed and store the texts
Supplying a persist_directory will store the embeddings on disk, so that they can be loaded later.

In [8]:
# PDFs from directory
# persist_directory = 'PDFs_How_to_build_your_carreer_in_AI'
print ('PDFs done')

# Langchain documentation
persist_directory = '/Users/davenull311/Projects/GenAI/data/'
print ('Texts done')

# Your documents 
# persist_directory = 'your_new_database'



PDFs done
Texts done


In [10]:
client = chromadb.Client()

In [11]:
vectordb1 = Chroma.from_documents(texts[:5], embeddings_open)

In [13]:
# import pdb

# pdb.set_trace()

persist_directory = '/Users/davenull311/Projects/GenAI/data/'

vectordb = Chroma.from_documents(
    documents = texts[:3], 
    embedding = embeddings_open, 
    persist_directory = persist_directory)

# Save to disc


In [14]:
# Persist the db to disk
vectordb.persist()
vectordb = None

# Now we can load the persisted database from disk, and use it as normal.

In [15]:
vectordb = Chroma(
    persist_directory=persist_directory,
    embedding_function=embeddings_open
    #embedding_function=embedding
    )

# Create the retriever

In [17]:
retriever = vectordb.as_retriever()
docs = retriever.get_relevant_documents("What is this document about?")


Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


# Print the number of documents that are returned

In [18]:
docs
len(docs)

3

In [19]:
docs

[Document(page_content='template = """Q: {question}\n\nA:"""\n\nprompt = PromptTemplate(template=template, input_variables=["question"])\n\nllm = AlephAlpha(model="luminous\n\nextended", maximum_tokens=20, stop_sequences=["Q:"], aleph_alpha_api_key=ALEPH_ALPHA_API_KEY)\n\nllm_chain = LLMChain(prompt=prompt, llm=llm)\n\nquestion = "What is AI?"\n\nllm_chain.run(question)\n\n\' Artificial Intelligence (AI) is the simulation of human intelligence processes by machines, especially computer systems.\\n\'\n\nprevious\n\nAI21\n\nnext\n\nAnyscale', metadata={'source': '/Users/davenull311/Projects/GenAI/data/langchain_doc_small/20_Aleph_Alpha_Aleph.txt'}),
 Document(page_content='Aleph Alpha\n\nAleph Alpha# The Luminous series is a family of large language models. This example goes over how to use LangChain to interact with Aleph Alpha models\n\n# Install the package\n\n!pip install aleph\n\nalpha\n\nclient\n\n# create a new token: https://docs.aleph-alpha.com/docs/account/#create-a-new-token\n

In [20]:
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

# Create the chain to answer questions

In [21]:
qa_chain = RetrievalQA.from_chain_type(llm=llm_open,
                                  chain_type="stuff",
                                  retriever=vectordb.as_retriever(),
                                  return_source_documents=True,
                                  verbose=True)

# Question

In [22]:
# Question
query = "What is this document about?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m


Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


 This document is about using the LangChain library with the Aleph Alpha Luminous extended model to interact with the Aleph Alpha API and ask questions. It includes instructions on how to install the necessary packages, create an API key, and use a PromptTemplate and LLMChain to run the query. The example question asked in this document is "What is AI?" and the expected answer is "Artificial Intelligence (AI) is the simulation of human intelligence processes by machines, especially computer systems."
[1m> Finished chain.[0m
 This document is about using the LangChain library with the Aleph Alpha Luminous extended model to interact with the Aleph Alpha API and ask questions. It includes instructions on how to install the necessary packages, create an API key, and use a PromptTemplate and LLMChain to run the query. The example question asked in this document is "What is AI?" and the expected answer is "Artificial Intelligence (AI) is the simulation of human intelligence processes by ma

# Create a prompt template to use in the chain 

In [65]:
def build_prompt(template_num="template_1"):
    template = """ You are a helpful chatbot, named RSLT. You answer the questions of the customers giving a lot of details based on what you find in the context.
Do not say anything that is not in the website
You are to act as though you're having a conversation with a human.
You are only able to answer questions, guide and assist, and provide recommendations to users. You cannot perform any other tasks outside of this.
Your tone should be professional and friendly.
Your purpose is to answer questions people might have, however if the question is unethical you can choose not to answer it.
Your responses should always be one paragraph long or less.
    Context: {context}
    Question: {question}
    Helpful Answer:"""

    template2 = """You are a helpful chatbot, named RSLT. You answer the questions of the customers giving a lot of details based on what you find in the context. 
    Your responses should always be one paragraph long or less.
    Question: {question}
    Helpful Answer:"""

    if template_num == "template_1":
        prompt = PromptTemplate(input_variables=["context", "question"], template=template)
        return prompt

    elif template_num == "template_2":
        prompt = PromptTemplate(input_variables=["question"], template=template2)
        return prompt

    else:
        print("Please choose a valid template")

# Create the chain to answer questions

In [66]:
qa_chain = RetrievalQA.from_chain_type(llm=llm_open,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True,
                                  verbose=True,
                                  chain_type_kwargs={"prompt": build_prompt("template_1")})

# Question


In [67]:
query = "What is this document about?"
llm_response = qa_chain(query)
process_llm_response(llm_response)



[1m> Entering new RetrievalQA chain...[0m
This document appears to be a guide for users who want to quickly get started with a specific software or system. The guide is divided into several sections, including Getting Started, Modules, Use Cases, Reference Docs, Ecosystem, and Additional Resources. It seems that the guide provides an overview of the features and functionalities of the software, as well as some practical examples of how to use it in different contexts. The author of the guide is Harrison Chase, and it was last updated on June 14, 2023.
[1m> Finished chain.[0m
This document appears to be a guide for users who want to quickly get started with a specific software or system. The guide is divided into several sections, including Getting Started, Modules, Use Cases, Reference Docs, Ecosystem, and Additional Resources. It seems that the guide provides an overview of the features and functionalities of the software, as well as some practical examples of how to use it in d

# Continue question

In [68]:
query = "What is Lanchain?"
llm_response = qa_chain(query)
process_llm_response(llm_response)



[1m> Entering new RetrievalQA chain...[0m
LangChain is a framework for developing applications powered by language models. It enables developers to build data-aware and agentic applications that can call out to a language model, connect it to other sources of data, and allow it to interact with its environment. LangChain includes a variety of modules, use cases, reference documents, an ecosystem, and additional resources to support the development of these applications.
[1m> Finished chain.[0m
LangChain is a framework for developing applications powered by language models. It enables developers to build data-aware and agentic applications that can call out to a language model, connect it to other sources of data, and allow it to interact with its environment. LangChain includes a variety of modules, use cases, reference documents, an ecosystem, and additional resources to support the development of these applications.


Sources:
/Users/erictak/PycharmProjects/freya/data/langchain