In [1]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

True

In [2]:
import certifi

os.environ['SSL_CERT_FILE'] = certifi.where()
os.environ['REQUESTS_CA_BUNDLE'] = certifi.where()

import truststore
truststore.inject_into_ssl()

import requests
url = "https://example.com"
requests.get(url)

<Response [200]>

In [4]:
from langchain_openai import ChatOpenAI
import os

tongyi_chat = ChatOpenAI(
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
    model="qwen-plus",
    # other params...
)

In [5]:
messages = [
    ("system", "你是一名专业的翻译家，可以将用户的中文翻译为英文。"),
    ("human", "我喜欢编程。"),
]
tongyi_chat.invoke(messages)


AIMessage(content='I like programming.', response_metadata={'token_usage': {'completion_tokens': 4, 'prompt_tokens': 30, 'total_tokens': 34, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'qwen-plus', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-632b0e8b-611c-402f-9264-eb3bdf567ed5-0')

In [6]:
import os
import hashlib
from typing import Callable
from langchain.document_loaders import DirectoryLoader
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS
from langchain.llms import Ollama
from langchain.chains import ConversationalRetrievalChain
from langgraph import LangGraph

# Step 1: Abstract Component Interfaces
class EmbeddingModel:
    def __init__(self, model_name: str):
        self.model_name = model_name
        self.embedding_model = SentenceTransformer(model_name=model_name)

    def generate_embeddings(self, documents):
        return FAISS.from_documents(documents, self.embedding_model)

class LocalLLM:
    def __init__(self, model_name: str, host: str):
        self.model_name = model_name
        self.llm = Ollama(model=model_name, host=host)

    def generate_response(self, prompt: str) -> str:
        return self.llm(prompt)


# Step 2: Incremental Update Logic
class KnowledgeLibrary:
    def __init__(self, vectorstore_path: str, embedding_model: EmbeddingModel):
        self.vectorstore_path = vectorstore_path
        self.embedding_model = embedding_model
        self.vectorstore = self.load_vectorstore()

    def load_vectorstore(self):
        if os.path.exists(self.vectorstore_path):
            return FAISS.load_local(self.vectorstore_path, self.embedding_model.embedding_model)
        return None

    def hash_document(self, content: str) -> str:
        return hashlib.sha256(content.encode("utf-8")).hexdigest()

    def build_or_update(self, documents):
        updated = False
        hashes = set()
        new_docs = []

        # Load existing hashes
        if self.vectorstore:
            hashes = set(self.vectorstore.docstore.get_all_metadata())

        # Check for new or updated documents
        for doc in documents:
            doc_hash = self.hash_document(doc.page_content)
            if doc_hash not in hashes:
                new_docs.append(doc)
                updated = True

        if updated:
            new_store = self.embedding_model.generate_embeddings(new_docs)
            if self.vectorstore:
                self.vectorstore.merge_from(new_store)
            else:
                self.vectorstore = new_store
            self.vectorstore.save_local(self.vectorstore_path)

    def get_retriever(self):
        return self.vectorstore.as_retriever() if self.vectorstore else None


# Step 3: Workflow and Summarization
def build_workflow(retriever, llm):
    chain = ConversationalRetrievalChain.from_llm(llm.llm, retriever=retriever)
    graph = LangGraph()
    graph.add_node("retrieve_and_qa", chain)
    return graph


def summarize_domain_fsd(retriever, llm, domain: str) -> str:
    documents = retriever.get_relevant_documents(domain)
    summary = "\n\n".join([llm.generate_response(f"Summarize this: {doc.page_content}") for doc in documents])
    return summary

ImportError: cannot import name 'SentenceTransformers' from 'langchain.embeddings' (C:\Users\feng.z\AppData\Roaming\Python\Python310\site-packages\langchain\embeddings\__init__.py)

In [None]:
# Step 4: Main Function
def main():
    # Paths and configurations
    fsd_directory = "./fsd_documents"
    vectorstore_path = "./vectorstore/faiss_index"
    business_domain = "Retail Lending"

    # Instantiate components
    embedding_model = EmbeddingModel(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
    llm = LocalLLM(model_name="llama", host="http://localhost:11434")
    knowledge_library = KnowledgeLibrary(vectorstore_path=vectorstore_path, embedding_model=embedding_model)

    # Load FSD documents
    loader = DirectoryLoader(fsd_directory, glob="*.txt")
    documents = loader.load()

    # Build or update knowledge library
    knowledge_library.build_or_update(documents)
    retriever = knowledge_library.get_retriever()

    if retriever:
        # Build workflow
        graph = build_workflow(retriever, llm)

        # Summarize domain-specific FSDs
        summary = summarize_domain_fsd(retriever, llm, business_domain)
        print(f"Summary for {business_domain}:\n{summary}")

        # Example Q&A
        query = "What are the key requirements for retail lending?"
        answer = graph.run("retrieve_and_qa", query=query)
        print(f"Q&A Result:\n{answer}")
    else:
        print("Knowledge library is empty. Please add documents.")

main()
