From e6af460153bf9e26aad9481b611e659663c81160 Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Tue, 14 Nov 2023 08:54:17 -0800 Subject: [PATCH 1/2] proposed new format for integrations --- docs/integrations/langchain.md | 102 ++++++++++++++++++-------------- docs/integrations/langchain2.md | 56 ++++++++++++++++++ 2 files changed, 112 insertions(+), 46 deletions(-) create mode 100644 docs/integrations/langchain2.md diff --git a/docs/integrations/langchain.md b/docs/integrations/langchain.md index c5d9eb1..b68c4d1 100644 --- a/docs/integrations/langchain.md +++ b/docs/integrations/langchain.md @@ -3,54 +3,64 @@ slug: /integrations/langchain title: 🦜️🔗 Langchain --- -## Langchain - Python +LangChain is a popular open-source framework for developing applications powered by language models. -- [LangChain + Chroma](https://blog.langchain.dev/langchain-chroma/) on the LangChain blog -- [Harrison's `chroma-langchain` demo repo](https://github.com/hwchase17/chroma-langchain) - - [question answering over documents](https://github.com/hwchase17/chroma-langchain/blob/master/qa.ipynb) - ([Replit version](https://replit.com/@swyx/LangChainChromaStarter#main.py)) - - [to use Chroma as a persistent database](https://github.com/hwchase17/chroma-langchain/blob/master/persistent-qa.ipynb) -- Tutorials - - [Chroma and LangChain tutorial](https://github.com/grumpyp/chroma-langchain-tutorial) - The demo showcases how to pull data from the English Wikipedia using their API. The project also demonstrates how to vectorize data in chunks and get embeddings using OpenAI embeddings model. - - [Create a Voice-based ChatGPT Clone That Can Search on the Internet and local files](https://betterprogramming.pub/how-to-create-a-voice-based-chatgpt-clone-that-can-search-on-the-internet-24d7f570ea8) -- [LangChain's Chroma Documentation](https://python.langchain.com/en/latest/modules/indexes/vectorstores.html?highlight=chroma#langchain.vectorstores.Chroma) +> Insert star counter... other stuff like that + +### Install + +`pip install langchain` / `yarn add langchain` + +### Main Benefits + +- 1 +- 2 +- 3 + +### Simple Example + +#### Python + +```python +import chromadb +from langchain.vectorstores import Chroma +from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings + +# Chroma code +persistent_client = chromadb.PersistentClient() +collection = persistent_client.get_or_create_collection("collection_name") +collection.add(ids=["1", "2", "3"], documents=["a", "b", "c"]) +# LangChain Code +embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") -## Langchain - JS - -Here is an [example in LangChainJS](https://github.com/hwchase17/langchainjs/blob/main/examples/src/chains/chat_vector_db_chroma.ts) - -```javascript -import { OpenAI } from "langchain/llms/openai"; -import { ConversationalRetrievalQAChain } from "langchain/chains"; -import { Chroma } from "langchain/vectorstores/chroma"; -import { OpenAIEmbeddings } from "langchain/embeddings/openai"; -import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; -import * as fs from "fs"; - -// to run this first run a chroma server with `chroma run --path /path/to/data` - -export const run = async () => { - /* Initialize the LLM to use to answer the question */ - const model = new OpenAI(); - /* Load in the file we want to do question answering over */ - const text = fs.readFileSync("state_of_the_union.txt", "utf8"); - /* Split the text into chunks */ - const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 }); - const docs = await textSplitter.createDocuments([text]); - /* Create the vectorstore */ - const vectorStore = await Chroma.fromDocuments(docs, new OpenAIEmbeddings(), { - collectionName: "state_of_the_union", - }); - /* Create the chain */ - const chain = ConversationalRetrievalQAChain.fromLLM( - model, - vectorStore.asRetriever() - ); - /* Ask it a question */ - const question = "What did the president say about Justice Breyer?"; - const res = await chain.call({ question, chat_history: [] }); - console.log(res); -}; +langchain_chroma = Chroma( + client=persistent_client, + collection_name="collection_name", + embedding_function=embedding_function, +) +# Important! - the embedding functiion passed to langchain is their wrapper, not Chroma's + + +print("There are", langchain_chroma._collection.count(), "in the collection") ``` -- [LangChainJS Chroma Documentation](https://js.langchain.com/docs/modules/indexes/vector_stores/integrations/chroma) \ No newline at end of file +#### Javascript + +```js +// stuff goes here +``` + +### Resources + +- [LangChain + Chroma Announcement Post](https://blog.langchain.dev/langchain-chroma/) on the LangChain blog +- [LangChain's Chroma Documentation](https://python.langchain.com/en/latest/modules/indexes/vectorstores.html?highlight=chroma#langchain.vectorstores.Chroma) + +#### Tutorials + + - [Chroma and LangChain tutorial](https://github.com/grumpyp/chroma-langchain-tutorial) - The demo showcases how to pull data from the English Wikipedia using their API. The project also demonstrates how to vectorize data in chunks and get embeddings using OpenAI embeddings model. + - [Create a Voice-based ChatGPT Clone That Can Search on the Internet and local files](https://betterprogramming.pub/how-to-create-a-voice-based-chatgpt-clone-that-can-search-on-the-internet-24d7f570ea8) +- [Harrison's `chroma-langchain` demo repo](https://github.com/hwchase17/chroma-langchain) + - [question answering over documents](https://github.com/hwchase17/chroma-langchain/blob/master/qa.ipynb) - ([Replit version](https://replit.com/@swyx/LangChainChromaStarter#main.py)) + - [to use Chroma as a persistent database](https://github.com/hwchase17/chroma-langchain/blob/master/persistent-qa.ipynb) + \ No newline at end of file diff --git a/docs/integrations/langchain2.md b/docs/integrations/langchain2.md new file mode 100644 index 0000000..c5d9eb1 --- /dev/null +++ b/docs/integrations/langchain2.md @@ -0,0 +1,56 @@ +--- +slug: /integrations/langchain +title: 🦜️🔗 Langchain +--- + +## Langchain - Python + +- [LangChain + Chroma](https://blog.langchain.dev/langchain-chroma/) on the LangChain blog +- [Harrison's `chroma-langchain` demo repo](https://github.com/hwchase17/chroma-langchain) + - [question answering over documents](https://github.com/hwchase17/chroma-langchain/blob/master/qa.ipynb) - ([Replit version](https://replit.com/@swyx/LangChainChromaStarter#main.py)) + - [to use Chroma as a persistent database](https://github.com/hwchase17/chroma-langchain/blob/master/persistent-qa.ipynb) +- Tutorials + - [Chroma and LangChain tutorial](https://github.com/grumpyp/chroma-langchain-tutorial) - The demo showcases how to pull data from the English Wikipedia using their API. The project also demonstrates how to vectorize data in chunks and get embeddings using OpenAI embeddings model. + - [Create a Voice-based ChatGPT Clone That Can Search on the Internet and local files](https://betterprogramming.pub/how-to-create-a-voice-based-chatgpt-clone-that-can-search-on-the-internet-24d7f570ea8) +- [LangChain's Chroma Documentation](https://python.langchain.com/en/latest/modules/indexes/vectorstores.html?highlight=chroma#langchain.vectorstores.Chroma) + + +## Langchain - JS + +Here is an [example in LangChainJS](https://github.com/hwchase17/langchainjs/blob/main/examples/src/chains/chat_vector_db_chroma.ts) + +```javascript +import { OpenAI } from "langchain/llms/openai"; +import { ConversationalRetrievalQAChain } from "langchain/chains"; +import { Chroma } from "langchain/vectorstores/chroma"; +import { OpenAIEmbeddings } from "langchain/embeddings/openai"; +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; +import * as fs from "fs"; + +// to run this first run a chroma server with `chroma run --path /path/to/data` + +export const run = async () => { + /* Initialize the LLM to use to answer the question */ + const model = new OpenAI(); + /* Load in the file we want to do question answering over */ + const text = fs.readFileSync("state_of_the_union.txt", "utf8"); + /* Split the text into chunks */ + const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 }); + const docs = await textSplitter.createDocuments([text]); + /* Create the vectorstore */ + const vectorStore = await Chroma.fromDocuments(docs, new OpenAIEmbeddings(), { + collectionName: "state_of_the_union", + }); + /* Create the chain */ + const chain = ConversationalRetrievalQAChain.fromLLM( + model, + vectorStore.asRetriever() + ); + /* Ask it a question */ + const question = "What did the president say about Justice Breyer?"; + const res = await chain.call({ question, chat_history: [] }); + console.log(res); +}; +``` + +- [LangChainJS Chroma Documentation](https://js.langchain.com/docs/modules/indexes/vector_stores/integrations/chroma) \ No newline at end of file From a7100999aeb9d768b631e3e540e9b17e75cc63cb Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Tue, 19 Dec 2023 14:45:49 -0800 Subject: [PATCH 2/2] more work --- docs/integrations/langchain.md | 49 +++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/docs/integrations/langchain.md b/docs/integrations/langchain.md index b68c4d1..e389ff1 100644 --- a/docs/integrations/langchain.md +++ b/docs/integrations/langchain.md @@ -1,11 +1,18 @@ --- slug: /integrations/langchain -title: 🦜️🔗 Langchain +title: 🦜️🔗 LangChain --- LangChain is a popular open-source framework for developing applications powered by language models. -> Insert star counter... other stuff like that + + +[MIT License](https://github.com/langchain-ai/langchain/blob/master/LICENSE)  • [Site](https://www.langchain.com/) + +| Languages | Docs | Github | +|---|---|--|--| +|Python | [Docs](https://python.langchain.com/docs/get_started/introduction) | [Code](https://github.com/langchain-ai/langchain) +|JS | [Docs](https://js.langchain.com/docs/get_started/introduction) | [Code](https://github.com/langchain-ai/langchainjs) ### Install @@ -13,9 +20,9 @@ LangChain is a popular open-source framework for developing applications powered ### Main Benefits -- 1 -- 2 -- 3 +- Common Patterns for chain-of-thought and prompt templating +- Many integrations and data loaders +- Deep integration to LangSmith monitoring (developed by the same team) ### Simple Example @@ -48,7 +55,37 @@ print("There are", langchain_chroma._collection.count(), "in the collection") #### Javascript ```js -// stuff goes here +import { OpenAI } from "langchain/llms/openai"; +import { ConversationalRetrievalQAChain } from "langchain/chains"; +import { Chroma } from "langchain/vectorstores/chroma"; +import { OpenAIEmbeddings } from "langchain/embeddings/openai"; +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; +import * as fs from "fs"; + +// to run this first run a chroma server with `chroma run --path /path/to/data` + +export const run = async () => { + /* Initialize the LLM to use to answer the question */ + const model = new OpenAI(); + /* Load in the file we want to do question answering over */ + const text = fs.readFileSync("state_of_the_union.txt", "utf8"); + /* Split the text into chunks */ + const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 }); + const docs = await textSplitter.createDocuments([text]); + /* Create the vectorstore */ + const vectorStore = await Chroma.fromDocuments(docs, new OpenAIEmbeddings(), { + collectionName: "state_of_the_union", + }); + /* Create the chain */ + const chain = ConversationalRetrievalQAChain.fromLLM( + model, + vectorStore.asRetriever() + ); + /* Ask it a question */ + const question = "What did the president say about Justice Breyer?"; + const res = await chain.call({ question, chat_history: [] }); + console.log(res); +}; ``` ### Resources