In [1]:
from qdrant_client import QdrantClient, models
from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter, HTMLSectionSplitter
from langchain.storage import LocalFileStore
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage._lc_store import create_kv_docstore
from astro_chart import full_chart_generation


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
#Seach Chinese embedding model
# import json
# for model in TextEmbedding.list_supported_models():
#     if "Chinese" in model["description"]:
#         print(json.dumps(model, indent=2))


# Embedding model 
model_handle = "jinaai/jina-embeddings-v2-base-zh"
embeddings = FastEmbedEmbeddings(model_name=model_handle)
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/BM25")

In [None]:
#Create vector store and doc store
url = "http://localhost:6333"
collection_name = "zhiwei_DAG"

#for 1st time
# docs = []  # put docs here
# client  = QdrantClient('http://localhost:6333')

# vectorstore = QdrantVectorStore.from_documents(
#      docs,
#      embeddings,
#      sparse_embedding=sparse_embeddings,
#      url=url,
#      prefer_grpc=True,
#      collection_name=collection_name,
#      retrieval_mode=RetrievalMode.HYBRID
#  )


client = QdrantClient(url=url, prefer_grpc=True)
vectorstore = QdrantVectorStore(
    embedding=embeddings,
    client=client,
    collection_name=collection_name,
    sparse_embedding=sparse_embeddings,
    retrieval_mode=RetrievalMode.HYBRID
)


fs = LocalFileStore("./store_location")
doc_store = create_kv_docstore(fs)

In [None]:
#Chunking
# Read html file (actually not need but I just pratice)
with open('ziweidoushu book2.html','r',encoding='utf-8') as  f_in:
    text_html = f_in.read()

# Define the Splitters ---
# Parent chunk splitter
headers_to_split_on = [("h1", "Header 1")]
html_splitter = HTMLSectionSplitter(headers_to_split_on)
parent_docs  = html_splitter.split_text(text_html)
#parent_docs = parent_docs[0:10]

# Child chunk splitter
child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
#splits = child_splitter.split_documents(parent_docs)

retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=doc_store,
    child_splitter=child_splitter,
    search_kwargs={"k": 10},
    id_key="source_id"
)

In [None]:
#Add id for each parent docs
import hashlib
for doc in parent_docs:
    header_tmp = doc.metadata['Header 1'].strip()
    hash_object = hashlib.md5(header_tmp.encode())
    hash_hex = hash_object.hexdigest()
    id_tmp =  hash_hex[:8]
    doc.metadata["id"] = id_tmp

[Document(metadata={'Header 1': '☆1、紫微星 (命宫)(命宫)', 'id': '5e1ed843'}, page_content='☆1、紫微星 (命宫)(命宫) \n △ \\triangle \n星性解释：紫微星，属己土，象阴，为南北斗中至尊至贵之星，故名帝座，即古时的皇帝星，在现代为国家元首；为官禄主（即事业主管之星，亦即官和禄的主管，入官禄宫较入命宫为好，入命宫不一定能贵显），专司官贵地位，掌造化之机枢，人生之主宰。紫微星代表尊贵、权贵、首脑、高尚，有化解刑灾之功，落诸宫均能降福消灾，解诸星之恶，威制七杀为权，降服火铃为善（旺吉有力才论）。限年逢之，凶则减凶，吉则更吉，或遇高人贵人。代表人物：周文王。 \n 紫微星虽具有以上高贵的功能，但必须有得力的“百官朝拱”才能显达，且功能的强弱还与落宫的庙旺程度有关。紫微以天府为财禄的府库，天相为行使命令的印绶（府相为成对星），以辅佐星曜的左右为宰相（在百官中最为得力，直接影响紫微格局的高低），昌曲为随从文官，魁钺为太监传令官，日月为照明分司，禄马为掌管爵禄的机关。至少得上述诸对星中的一对在三方四正 \n 守照会拱或在左右邻宫相夹者即为得“百官朝拱”，无上述诸对星或仅得其中之一单星守照会拱夹者则为“孤君”。紫微星既有属于精神的性质（如重荣誉、主观、自尊、多疑等），又有属于物质的性质（如爱物质享受、有领导和组织能力等），本质上偏向于精神性；在分析与周围及行运星曜相配合的时候，就看是加强精神性还是加强物质性；由于本质的关系，一般不喜增强精神性，尤其是原官为精神性偏强时，喜增强物质性的星曜来调和。武、贪、府属物质性（化吉增物质性，化忌则相反而增精神性），廉相属精神性（化忌增精神性，化吉及成财荫夹则增物质性）；逢百官朝拱，尤其得左右加会者，多能加强物质性；逢昌曲、空亡星和桃花星者则增加精神性。 \n 紫微星须入命宫的三方四正，且三方四正左右邻宫有“百官朝拱”，即有府相及六吉星（左右、昌曲、魁钺）等对星出现时才为得力显达，可有坚定的立场和骨气，一生必有作为，有贵气成就，即使同时加会一点煞星亦仅主事业上有些阻力及为人较奸滑而已。 \n 紫微星不喜入兄弟、子女、奴仆、疾厄、父母等闲宫，主为人势利逢迎，一生辛劳奔波，作事难成，纵遇吉星，六亲富贵，虽然得助，为福也不大；但命宫及三方吉者仍有富贵，唯较辛