Skip to content

Commit

Permalink
Merge pull request #6 from felipearosr/1.Streaming-Memory-Sources
Browse files Browse the repository at this point in the history
1.Streaming - Memory - Sources
  • Loading branch information
felipearosr committed Apr 12, 2024
2 parents e714718 + 4d0b5b8 commit 89f94b8
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 16 deletions.
4 changes: 1 addition & 3 deletions 1.Streaming - Memory - Sources/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,7 @@ Sources refer to the documents or materials returned by the retrieval system, wh
This is a basic implementation of sources, you can also separate them by file types, using the metadata of the source_nodes.

```python
@cl.on_message
async def main(message: cl.Message):
# rest of your code
async def set_sources(response, response_message):
label_list = []
count = 1

Expand Down
27 changes: 14 additions & 13 deletions 5.Intent Detection Agent/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core.extractors import (
TitleExtractor,
#QuestionsAnsweredExtractor,
#SummaryExtractor,
#KeywordExtractor,
# QuestionsAnsweredExtractor,
# SummaryExtractor,
# KeywordExtractor,
)
from llama_index.core.node_parser import SentenceSplitter
from llama_parse import LlamaParse
Expand Down Expand Up @@ -53,7 +53,9 @@ def create_pinecone_pod(pc, index_name):


def get_documents(input_dir):
llama_parser = LlamaParse(api_key=llama_parse_api_key, result_type="markdown", verbose=True)
llama_parser = LlamaParse(
api_key=llama_parse_api_key, result_type="markdown", verbose=True
)

UnstructuredReader = download_loader("UnstructuredReader")

Expand All @@ -75,20 +77,19 @@ def run_pipeline(documents, vector_store, llm, num_workers):
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(chunk_size=512, chunk_overlap=126),

TitleExtractor(llm=llm, num_workers=num_workers),
#QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers),
#SummaryExtractor(
# QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers),
# SummaryExtractor(
# summaries=["prev", "self"], llm=llm, num_workers=num_workers
#),
#KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers),
# ),
# KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers),
OpenAIEmbedding(model=EMBEDDING),
],
vector_store=vector_store,
)
for doc in documents: # Small patch to remove last_accessed_date from metadata
k=vars(doc)
del k['metadata']['last_accessed_date']
for doc in documents: # Small patch to remove last_accessed_date from metadata
k = vars(doc)
del k["metadata"]["last_accessed_date"]
pipeline.run(documents=documents, show_progress=True, num_workers=num_workers)


Expand Down Expand Up @@ -117,4 +118,4 @@ async def main():


if __name__ == "__main__":
asyncio.run(main())
asyncio.run(main())

0 comments on commit 89f94b8

Please sign in to comment.