## Load data from a directory or specified list of files paths

In [37]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage
from llama_index.core.settings import Settings
from llama_index.vector_stores.chroma import ChromaVectorStore               
from llama_index.llms.siliconflow import SiliconFlow
from llama_index.embeddings.siliconflow import SiliconFlowEmbedding
import os
import chromadb

Settings.embed_model = SiliconFlowEmbedding(api_key=os.getenv("SILLICONFLOW_API_KEY"), 
                                            model="BAAI/bge-m3", 
                                            embed_batch_size=100)

Settings.llm = SiliconFlow(api_key=os.getenv("SILLICONFLOW_API_KEY"),
                           model="Qwen/Qwen3-8B", 
                           temperature=0.0)

# 加载文档并构建索引
documents = SimpleDirectoryReader(input_files=["./data/AI_Information.pdf"]).load_data()

path = "./database/chroma_db"
db = chromadb.PersistentClient(path=path)
chroma_collection = db.get_or_create_collection("ai_info")
vector_store = ChromaVectorStore(chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)


# 创建查询引擎
# query_engine = index.as_query_engine()
# response = query_engine.query("What are applications of AI?")
# print(response)

In [38]:
elasticsearch_url = "http://localhost:9200"
elasticsearch_index_name = "ai_info"
elasticsearch_username = "elastic"
elasticsearch_password = "infini_rag_flow"

In [39]:
from elasticsearch import AsyncElasticsearch
from llama_index.vector_stores.elasticsearch import ElasticsearchStore

es_client = AsyncElasticsearch(
    elasticsearch_url,
    basic_auth=(elasticsearch_username, elasticsearch_password),
)

es_vector_store = ElasticsearchStore(
    es_client= es_client,
    index_name = elasticsearch_index_name,
    es_strategy = "bm25",
    es_password = elasticsearch_password,
    es_username = elasticsearch_username,
)
es_storage_context = StorageContext.from_defaults(vector_store=es_vector_store)
es_index = VectorStoreIndex.from_documents(documents, storage_context=es_storage_context)


In [40]:
for doc in documents:
    print(doc.id_)


2566f879-342e-4cc9-8376-111eeb3e1a6f
c847bff2-55c9-467a-9469-da240d3593fb
cf6e162b-0c59-448c-b4f1-0f621d33cb4d
24aff263-5e9d-4255-976e-33094ebe48a3
b07c317b-d69e-4ba6-8ec7-75438616b7a6
f5414305-488c-41da-9c69-af978dda7f3d
6baac9b1-1ed3-4aaf-87af-3fe82a325833
2c60107b-4e7e-4bba-b658-f89584a2b282
77344f17-ac8a-43a0-a93b-a8a1e4921c4f
6f7d5825-d770-420c-9565-232d08947672
f77c89de-14cc-4a96-b76d-9b82d5946e01
8993d2e5-f28d-4f9d-97ff-be572ddf5f56
9d273d12-77ff-4336-8d15-94d4785f6a27
acf8a880-3ba8-4627-a06c-aa8bfc3289fa
fe93ad16-cd04-4d21-a84d-17b4b19df1db


In [32]:
search_id = "ff741968-4667-4efe-a2e2-7e7567f08c90"

print("###### search_id:", search_id)
result = await es_client.search(
    index = elasticsearch_index_name, 
    body = {
        "query": {
            "match": {
                "metadata.document_id": search_id
            }
        }
    }
)
print("result:", result)
if result['hits']['total']['value'] > 0:
    print(f"Document {doc.doc_id} already in index")

###### search_id: ff741968-4667-4efe-a2e2-7e7567f08c90
result: {'took': 5, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 2.3671236, 'hits': [{'_index': 'ai_info', '_id': 'c5415be4-fcfc-47d6-b9d8-0f8f46e88bc0', '_score': 2.3671236, '_ignored': ['content.keyword', 'metadata._node_content.keyword'], '_source': {'content': 'AI for Social Good \nAI is increasingly being used to address social and environmental challenges, such as climate \nchange, poverty, and healthcare disparities. AI for social good initiatives aim to leverage AI for \npositive impact. \nRegulation and Governance \nAs AI becomes more pervasive, there will be a growing need for regulation and governance to \nensure responsible development and deployment. This includes establishing ethical guidelines, \naddressing bias and fairness, and protecting privacy and security.  \nInternational collaborations on standards will 

## Add document to existed index

In [33]:
import asyncio


index_exists = await es_client.indices.exists(index = elasticsearch_index_name)
if index_exists:
    # check documents already in index
    for doc in documents:
        # check if document already in index
        #2ba3fae2-78fe-4807-9535-588e275428cf
        search_id = doc.doc_id
        print("###### search_id:", search_id)
        result = await es_client.search(
            index = elasticsearch_index_name, 
            body = {
                "query": {
                    "match": {
                        "metadata.document_id": search_id
                    }
                }
            }
        )
        print("result:", result)
        if result['hits']['total']['value'] > 0:
            print(f"Document {doc.doc_id} already in index")
        else:
            # 添加新文档
            print("insert doc:", doc)
            es_index.insert(doc)
else:
    # 创建新索引
    es_index = VectorStoreIndex.from_documents(documents, storage_context=es_storage_context)


###### search_id: 5b6794ed-c8e0-4777-afd8-a8ffa214e7c7
result: {'took': 6, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 2.3671236, 'hits': [{'_index': 'ai_info', '_id': 'aa2edf93-3307-4766-8e4c-23a2cc5ec5f6', '_score': 2.3671236, '_ignored': ['content.keyword', 'metadata._node_content.keyword'], '_source': {'content': 'Understanding Artificial Intelligence \nChapter 1: Introduction to Artificial Intelligence \nArtificial intelligence (AI) refers to the ability of a digital computer or computer -controlled robot \nto perform tasks commonly associated with intelligent beings. The term is frequently applied to \nthe project of developing systems endowed with the intelle ctual processes characteristic of \nhumans, such as the ability to reason, discover meaning, generalize, or learn from past \nexperience. Over the past few decades, advancements in computing power and data availabilit

## WorkFlow samples in llamaIndex framework

In [42]:
from llama_index.core.workflow import (
    Event,
    StartEvent,
    StopEvent,
    Workflow,
    step,
)

In [48]:
class JokeEvent(Event):
    joke: str

class JokeWorkflow(Workflow):
    llm = Settings.llm
    @step
    async def generate_joke(self, event: StartEvent) -> JokeEvent:
        topic = event.topic
        prompt = f"Generate a joke about {topic}"
        response = self.llm.complete(prompt)
        return JokeEvent(joke=str(response))
    
    @step
    async def critique_joke(self, event: JokeEvent) -> StopEvent:
        joke = event.joke
        prompt = f"Give thorough analysis and critique of the joke: {joke}"
        response = self.llm.complete(prompt)
        return StopEvent(result=str(response))
    
        
        

In [None]:
w = JokeWorkflow(timeout=60, verbose=False)
result = await w.run(topic="chicken")
print(result)





**Thorough Analysis and Critique of the Joke:**  
*"What do you call fake chicken? A *fowl* imitation!"*  

---

### **1. Structure and Mechanism**  
The joke follows a classic **pun structure**, leveraging **homonyms** (words with identical pronunciation but different meanings) to create surprise and humor. The setup ("What do you call fake chicken?") primes listeners to expect an answer involving deception or mimicry (since fake implies imitation). The punchline ("a *fowl* imitation!") subverts expectations by using **dual meanings** of *fowl*:  
- **Literal**: A bird (chicken).  
- **Figurative**: Dishonesty (*e.g., foul play*, *foul language*).  

This juxtaposition creates irony: *fake chicken* becomes *a fowl imitation*, suggesting both literal mimicry (*imitation chicken*) and moral impropriety (*a foul act*).  

---

### **2. Humor Mechanics**  
- **Wordplay**: The core humor hinges on **homophonic ambiguity**, requiring listeners to recognize both definitions of *fowl*. This