In [2]:
# 安装 langchain-airbyte 包
# 使用 %pip install -qU 命令来安装，-q 参数表示安静模式，不显示安装过程中的详细信息
# -U 参数表示升级已安装的包到最新版本
%pip install -qU langchain-airbyte

Note: you may need to restart the kernel to use updated packages.


In [3]:
# 导入 getpass 模块
import getpass

# 使用 getpass 模块中的 getpass 函数获取用户输入的 GitHub token，并将其赋值给变量 GITHUB_TOKEN
GITHUB_TOKEN = getpass.getpass()

In [12]:
from langchain_airbyte import AirbyteLoader
from langchain_core.prompts import PromptTemplate

loader = AirbyteLoader(
    source="source-github",
    stream="pull_requests",
    config={
        "credentials": {"personal_access_token": GITHUB_TOKEN},
        "repositories": ["langchain-ai/langchain"],
    },
    template=PromptTemplate.from_template(
        """# {title}
by {user[login]}

{body}"""
    ),
    include_metadata=False,
)
docs = loader.load()

In [19]:
# 打印倒数第二个元素的页面内容
print(docs[-2].page_content)

# Updated partners/ibm README
by williamdevena

## PR title
partners: changed the README file for the IBM Watson AI integration in the libs/partners/ibm folder.

## PR message
Description: Changed the README file of partners/ibm following the docs on https://python.langchain.com/docs/integrations/llms/ibm_watsonx

The README includes:

- Brief description
- Installation
- Setting-up instructions (API key, project id, ...)
- Basic usage:
  - Loading the model
  - Direct inference
  - Chain invoking
  - Streaming the model output
  
Issue: https://github.com/langchain-ai/langchain/issues/17545

Dependencies: None

Twitter handle: None


In [39]:
# 获取docs列表的长度
len(docs)

10283

In [29]:
import tiktoken
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

enc = tiktoken.get_encoding("cl100k_base")

vectorstore = Chroma.from_documents(
    docs,
    embedding=OpenAIEmbeddings(
        disallowed_special=(enc.special_tokens_set - {"<|endofprompt|>"})
    ),
)

In [40]:
# 将vectorstore对象转换为retriever对象
retriever = vectorstore.as_retriever()

In [42]:

# 调用retriever模块的invoke函数，并传入参数"pull requests related to IBM"
retriever.invoke("pull requests related to IBM")

[Document(page_content='# Updated partners/ibm README\nby williamdevena\n\n## PR title\r\npartners: changed the README file for the IBM Watson AI integration in the libs/partners/ibm folder.\r\n\r\n## PR message\r\nDescription: Changed the README file of partners/ibm following the docs on https://python.langchain.com/docs/integrations/llms/ibm_watsonx\r\n\r\nThe README includes:\r\n\r\n- Brief description\r\n- Installation\r\n- Setting-up instructions (API key, project id, ...)\r\n- Basic usage:\r\n  - Loading the model\r\n  - Direct inference\r\n  - Chain invoking\r\n  - Streaming the model output\r\n  \r\nIssue: https://github.com/langchain-ai/langchain/issues/17545\r\n\r\nDependencies: None\r\n\r\nTwitter handle: None'),
 Document(page_content='# Updated partners/ibm README\nby williamdevena\n\n## PR title\r\npartners: changed the README file for the IBM Watson AI integration in the `libs/partners/ibm` folder. \r\n\r\n\r\n\r\n## PR message\r\n- **Description:** Changed the README fi