## VectorDB

In [2]:
from dotenv import load_dotenv

load_dotenv(dotenv_path="../.env", override=True)

True

In [3]:
%load_ext autoreload
%autoreload 2
import os
import sys

# 获取当前 notebook 的工作目录（通常是 .ipynb 所在目录）
notebook_dir = os.getcwd()
target_path = os.path.abspath(os.path.join(notebook_dir, "../"))
if target_path not in sys.path:
    sys.path.append(target_path)

from src.vectorstore import get_vector_store
from src.configuration import Configuration
from src.embeddings import get_embeddings_model

collection_name = "langchain"


In [4]:
config = Configuration()
embedding = get_embeddings_model(config.embedding_model)
store = get_vector_store(
    provider=config.retriever_provider,
    storage_type=config.storage_type,
    collection_name=collection_name,
    embedding=embedding,
)
collection = store._collection
print("Number of vectors in the collection: ", collection.count())

Number of vectors in the collection:  32038


In [7]:
collections = store._client.list_collections()
for collection in collections:
    print(collection.name)

langchain
test_collection


In [6]:
results = store.similarity_search_with_score(
    "return_uuids from Weaviate", k=1, filter={"type": "code"}
)

In [23]:
from langchain_core.documents.base import Document


doc: Document = results[0][0]
doc.__dict__

{'id': 'e12ff880-c500-5114-865e-06c3c865b703',
 'metadata': {'title': 'Source code for langchain_community.retrievers.weaviate_hybrid_search',
  'type': 'code',
  'source': 'https://python.langchain.com/api_reference/_modules/langchain_community/retrievers/weaviate_hybrid_search.html',
  'lang': 'python'},
 'page_content': '# Source code for langchain_community.retrievers.weaviate_hybrid_search\n\n```\n\nfrom __future__ import annotations\n\nfrom typing import Any, Dict, List, Optional, cast\nfrom uuid import uuid4\n\nfrom langchain_core._api import deprecated\nfrom langchain_core.callbacks import CallbackManagerForRetrieverRun\nfrom langchain_core.documents import Document\nfrom langchain_core.retrievers import BaseRetriever\nfrom pydantic import ConfigDict, model_validator\n\n@deprecated(\n    since="0.3.18",\n    removal="1.0",\n    alternative_import="langchain_weaviate.WeaviateVectorStore",\n)\nclass WeaviateHybridSearchRetriever(BaseRetriever):\n    """`Weaviate hybrid search` re

In [15]:
from rich.console import Console
from rich.markdown import Markdown
from rich.pretty import pprint

for res, score in results[0:1]:
    pprint(res.id)
    pprint(f"Similarity: {score:3f}")
    pprint(res.metadata)
    retrieved_md = Markdown(res.page_content)
    Console().print(retrieved_md)

## Langchain graph smith

### LangGraph Loader Test

In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
import requests
from bs4 import BeautifulSoup

# 获取当前 notebook 的工作目录（通常是 .ipynb 所在目录）
notebook_dir = os.getcwd()
target_path = os.path.abspath(os.path.join(notebook_dir, "../"))
if target_path not in sys.path:
    sys.path.append(target_path)
from src.ingest.parsers.langgraph_recursive_url import (
    langgraph_recursive_url_extractor,
    langgraph_recursive_url_metadata_extractor,
)


url = "https://langchain-ai.github.io/langgraph/reference/graphs/"

response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
metadata = langgraph_recursive_url_metadata_extractor(
    raw_html=response.text,
    url=url,
    response=response,
    type="api",
    lang="python",
)
print(metadata)
doc = langgraph_recursive_url_extractor(soup)
print(doc)

{'source': 'https://langchain-ai.github.io/langgraph/reference/graphs/', 'title': 'Graphs', 'type': 'api', 'lang': 'python'}
Table of contents
- [class StateGraph](#langgraph.graph.state.StateGraph)
  - [meth add_node](#langgraph.graph.state.StateGraph.add_node)
  - [meth add_edge](#langgraph.graph.state.StateGraph.add_edge)
  - [meth add_conditional_edges](#langgraph.graph.state.StateGraph.add_conditional_edges)
  - [meth add_sequence](#langgraph.graph.state.StateGraph.add_sequence)
  - [meth compile](#langgraph.graph.state.StateGraph.compile)
- [class CompiledStateGraph](#langgraph.graph.state.CompiledStateGraph)
  - [meth stream](#langgraph.graph.state.CompiledStateGraph.stream)
  - [meth astream](#langgraph.graph.state.CompiledStateGraph.astream)
  - [meth invoke](#langgraph.graph.state.CompiledStateGraph.invoke)
  - [meth ainvoke](#langgraph.graph.state.CompiledStateGraph.ainvoke)
  - [meth get_state](#langgraph.graph.state.CompiledStateGraph.get_state)
  - [meth aget_state](#lang

### LangChain API Loader Test

exclude dir https://python.langchain.com/api_reference/_modules/

In [8]:
%load_ext autoreload
%autoreload 2
import os
import sys
import requests
from bs4 import BeautifulSoup

# 获取当前 notebook 的工作目录（通常是 .ipynb 所在目录）
notebook_dir = os.getcwd()
target_path = os.path.abspath(os.path.join(notebook_dir, "../"))
if target_path not in sys.path:
    sys.path.append(target_path)
from src.ingest.parsers.langchain_recursive_url import (
    langchain_recursive_url_extractor,
    langchain_recursive_url_metadata_extractor,
)


url = "https://python.langchain.com/api_reference/deepseek/chat_models/langchain_deepseek.chat_models.ChatDeepSeek.html#langchain_deepseek.chat_models.ChatDeepSeek"

response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
metadata = langchain_recursive_url_metadata_extractor(
    raw_html=response.text,
    url=url,
    response=response,
    type="api",
    lang="python",
)
print(metadata)
doc = langchain_recursive_url_extractor(soup)
print(doc)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
{'source': 'https://python.langchain.com/api_reference/deepseek/chat_models/langchain_deepseek.chat_models.ChatDeepSeek.html#langchain_deepseek.chat_models.ChatDeepSeek', 'title': 'ChatDeepSeek — 🦜🔗 LangChain  documentation', 'type': 'api', 'lang': 'python'}
# ChatDeepSeek#

class langchain_deepseek.chat_models.ChatDeepSeek[source]#

Bases: [BaseChatOpenAI](../../openai/chat_models/langchain_openai.chat_models.base.BaseChatOpenAI.html#langchain_openai.chat_models.base.BaseChatOpenAI)

DeepSeek chat model integration to access models hosted in DeepSeek’s API.

Setup:
Install langchain-deepseek and set environment variable DEEPSEEK_API_KEY.

```bash
pip install -U langchain-deepseek
export DEEPSEEK_API_KEY="your-api-key"

```

Key init args — completion params:

model: str
Name of DeepSeek model to use, e.g. “deepseek-chat”.

temperature: float
Sampling temperature.

max_tokens: Optional[int]
Max numb

### LangChain Doc Loader Test

In [None]:
%load_ext autoreload
%autoreload 2
import os
import sys
import requests
from bs4 import BeautifulSoup

# 获取当前 notebook 的工作目录（通常是 .ipynb 所在目录）
notebook_dir = os.getcwd()
target_path = os.path.abspath(os.path.join(notebook_dir, "../"))
if target_path not in sys.path:
    sys.path.append(target_path)
from src.ingest.parsers.langchain_recursive_url import (
    langchain_recursive_url_extractor,
    langchain_recursive_url_metadata_extractor,
)


url = "https://python.langchain.com/docs/integrations/chat/"
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
metadata = langchain_recursive_url_metadata_extractor(
    raw_html=response.text,
    url=url,
    response=response,
    type="doc",
    lang="python",
)
print(metadata)
doc = langchain_recursive_url_extractor(soup)
print(doc)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
{'source': 'https://python.langchain.com/docs/integrations/chat/', 'title': 'Chat models | 🦜️🔗 LangChain', 'type': 'api', 'lang': 'python'}
[](https://github.com/langchain-ai/langchain/blob/master/docs/docs/integrations/chat/index.mdx)# Chat models

[Chat models](/docs/concepts/chat_models/) are language models that use a sequence of [messages](/docs/concepts/messages/) as inputs and return messages as outputs (as opposed to using plain text). These are generally newer models.

infoIf you'd like to write your own chat model, see [this how-to](/docs/how_to/custom_chat_model/).
If you'd like to contribute an integration, see [Contributing integrations](/docs/contributing/how_to/integrations/).

 
Select [chat model](/docs/integrations/chat/):Google Gemini▾OpenAIAnthropicAzureGoogle GeminiGoogle VertexAWSGroqCohereNVIDIAFireworks AIMistral AITogether AIIBM watsonxDatabricksxAIPerplexity
```bash
pip ins

### LangChain Code Loader Test

In [11]:
%load_ext autoreload
%autoreload 2
import os
import sys
import requests
from bs4 import BeautifulSoup

# 获取当前 notebook 的工作目录（通常是 .ipynb 所在目录）
notebook_dir = os.getcwd()
target_path = os.path.abspath(os.path.join(notebook_dir, "../"))
if target_path not in sys.path:
    sys.path.append(target_path)
from src.ingest.parsers.langchain_recursive_url import (
    langchain_recursive_url_extractor,
    langchain_recursive_url_metadata_extractor,
)


url = "https://python.langchain.com/api_reference/_modules/langchain/agents/conversational_chat/output_parser.html"
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
metadata = langchain_recursive_url_metadata_extractor(
    raw_html=response.text,
    url=url,
    response=response,
    type="code",
    lang="python",
)
print(metadata)
doc = langchain_recursive_url_extractor(soup)
print(doc)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
{'source': 'https://python.langchain.com/api_reference/_modules/langchain/agents/conversational_chat/output_parser.html', 'title': 'langchain.agents.conversational_chat.output_parser — 🦜🔗 LangChain  documentation', 'type': 'code', 'lang': 'python'}
# Source code for langchain.agents.conversational_chat.output_parser

```

from __future__ import annotations

from typing import Union

from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.exceptions import OutputParserException
from langchain_core.utils.json import parse_json_markdown

from langchain.agents import AgentOutputParser
from langchain.agents.conversational_chat.prompt import FORMAT_INSTRUCTIONS

# Define a class that parses output for conversational agents

class ConvoOutputParser(AgentOutputParser):
    """Output parser for the conversational agent."""

    format_instructions: str = FORMAT_INSTRUCTIONS
    """Defa

### Langsmith

doc

api

code

## Shadcn UI test

## Tailwind CSS test

## Pytorch

## Prisma