In [1]:
from langchain.document_loaders import WebBaseLoader


In [2]:
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [3]:

from langchain.vectorstores import Chroma
from langchain.embeddings import GPT4AllEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())

100%|████████████████████████████████████████████████████████████████████| 45.5M/45.5M [00:00<00:00, 98.9MiB/s]


Model downloaded at:  /home/brettin/.cache/gpt4all/ggml-all-MiniLM-L6-v2-f16.bin


In [4]:
question = "What are the approaches to Task Decomposition?"
docs = vectorstore.similarity_search(question)
len(docs)

4

In [5]:
docs[0]

Document(page_content='Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"})

In [6]:
### Model
### Download a GGML converted model
###

In [7]:
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [8]:
n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/rbscratch/brettin/.cache/llama-2-7b-chat.ggmlv3.q5_1.bin",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=2048,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=True,
)

ggml_init_cublas: found 8 CUDA devices:
  Device 0: NVIDIA A100-SXM4-40GB, compute capability 8.0
  Device 1: NVIDIA A100-SXM4-40GB, compute capability 8.0
  Device 2: NVIDIA A100-SXM4-40GB, compute capability 8.0
  Device 3: NVIDIA A100-SXM4-40GB, compute capability 8.0
  Device 4: NVIDIA A100-SXM4-40GB, compute capability 8.0
  Device 5: NVIDIA A100-SXM4-40GB, compute capability 8.0
  Device 6: NVIDIA A100-SXM4-40GB, compute capability 8.0
  Device 7: NVIDIA A100-SXM4-40GB, compute capability 8.0
llama.cpp: loading model from /rbscratch/brettin/.cache/llama-2-7b-chat.ggmlv3.q5_1.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 12

In [None]:
prompt = """
Question: A rap battle between Stephen Colbert and John Oliver
"""
llm(prompt)



Stephen Colbert:
Yo, John, you're a funny guy
But when it comes to politics, you're as dry as the Sahara
You talk about issues with a straight face
While I bring the laughs and the heat in this race

John Oliver:
Oh, Stephen, you think you're

In [None]:
#
# A future demo using a different local model that runs on CPU only
# from langchain.llms import GPT4All
# 

In [None]:
# Run an LLMChain (see here) with either model by passing in the retrieved docs and a simple prompt.

An LLMChain is a simple chain that adds some functionality around language models. It is used widely throughout LangChain, including in other chains and agents.

An LLMChain consists of a PromptTemplate and a language model (either an LLM or chat model). It formats the prompt template using the input key values provided (and also memory key values, if available), passes the formatted string to LLM and returns the LLM output.

In [None]:
from langchain import PromptTemplate, LLMChain

# Prompt
prompt = PromptTemplate.from_template(
    "Summarize the main themes in these retrieved docs: {docs}"
)

# Chain
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Run
question = "What are the approaches to Task Decomposition?"
docs = vectorstore.similarity_search(question)
result = llm_chain(docs)

# Output
result["text"]

In [15]:
from langchain.retrievers import PubMedRetriever

In [18]:
retriever = PubMedRetriever(doc_content_chars_max=10000)
retriever.get_relevant_documents("deep learning approaches to antibiotic discovery")

Too Many Requests, waiting for 0.20 seconds...


[Document(page_content='', metadata={'uid': '37493633', 'Title': {'i': 'Pseudomonas aeruginosa', '#text': 'Molecular determinants of avoidance and inhibition of  MexB efflux pump.'}, 'Published': '2023-07-26', 'Copyright Information': ''}),
 Document(page_content='', metadata={'uid': '37369638', 'Title': 'iAMPCN: a deep-learning approach for identifying antimicrobial peptides and their functional activities.', 'Published': '--', 'Copyright Information': '© The Author(s) 2023. Published by Oxford University Press. All rights reserved. For Permissions, please email: journals.permissions@oup.com.'}),
 Document(page_content='', metadata={'uid': '37368803', 'Title': 'DeepTPpred: A Deep Learning Approach with Matrix Factorization for Predicting Therapeutic Peptides by Integrating Length Information.', 'Published': '2023-06-27', 'Copyright Information': ''})]

In [None]:
retriever.