In [None]:
from unstructured.partition.auto import partition

elements = partition(filename="example-docs/eml/fake-email.eml")
print("\n\n".join([str(el) for el in elements]))

In [None]:
import os

from unstructured_ingest.v2.pipeline.pipeline import Pipeline
from unstructured_ingest.v2.interfaces import ProcessorConfig
from unstructured_ingest.v2.processes.connectors.fsspec.s3 import (
    S3IndexerConfig,
    S3DownloaderConfig,
    S3ConnectionConfig,
    S3AccessConfig
)
from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
from unstructured_ingest.v2.processes.chunker import ChunkerConfig
from unstructured_ingest.v2.processes.embedder import EmbedderConfig
from unstructured_ingest.v2.processes.connectors.local import LocalUploaderConfig

from load_dotenv import load_dotenv
load_dotenv()
# Chunking and embedding are optional.


if __name__ == "__main__":
    print(os.getenv("AWS_S3_URL"))
    print(os.getenv("AWS_ACCESS_KEY_ID"))
    print(os.getenv("AWS_SECRET_ACCESS_KEY"))
    print(os.getenv("MINIO_API_ENDPOINT"))


    Pipeline.from_configs(
        context=ProcessorConfig(),
        indexer_config=S3IndexerConfig(remote_url=os.getenv("AWS_S3_URL")),
        downloader_config=S3DownloaderConfig(download_dir=os.getenv("LOCAL_FILE_DOWNLOAD_DIR")),
        source_connection_config=S3ConnectionConfig(
            access_config=S3AccessConfig(
                key=os.getenv("AWS_ACCESS_KEY_ID"),
                secret=os.getenv("AWS_SECRET_ACCESS_KEY")
            ),
            endpoint_url=os.getenv("MINIO_API_ENDPOINT")
        ),
        partitioner_config=PartitionerConfig(
            partition_by_api=True,
            api_key=os.getenv("UNSTRUCTURED_API_KEY"),
            partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"),
            strategy="hi_res",
            additional_partition_args={
                "split_pdf_page": True,
                "split_pdf_allow_failed": True,
                "split_pdf_concurrency_level": 15
            }
        ),
        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
        # embedder_config=EmbedderConfig(embedding_provider="huggingface"),
        uploader_config=LocalUploaderConfig(output_dir=os.getenv("LOCAL_FILE_OUTPUT_DIR"))        

        # destination_connection_config=S3ConnectionConfig(
        #     access_config=S3AccessConfig(
        #         key=os.getenv("AWS_ACCESS_KEY_ID"),
        #         secret=os.getenv("AWS_SECRET_ACCESS_KEY")
        #     )
        # ),
        # uploader_config=S3UploaderConfig(remote_url=os.getenv("AWS_S3_URL"))
    ).run()