Skip to content

Commit

Permalink
Fix #294 (tested)
Browse files Browse the repository at this point in the history
  • Loading branch information
mdeweerd committed May 19, 2023
1 parent a862ff2 commit 4cda348
Showing 1 changed file with 22 additions and 18 deletions.
40 changes: 22 additions & 18 deletions ingest.py
Expand Up @@ -24,6 +24,28 @@
from constants import CHROMA_SETTINGS


class MyElmLoader(UnstructuredEmailLoader):
"""Wrapper to fallback to text/plain when default does not work"""

def load(self) -> List[Document]:
"""Wrapper adding fallback for elm without html"""
try:
try:
doc = UnstructuredEmailLoader.load(self)
except ValueError as e:
if 'text/html content not found in email' in str(e):
# Try plain text
self.unstructured_kwargs["content_source"]="text/plain"
doc = UnstructuredEmailLoader.load(self)
else:
raise
except Exception as e:
# Add file_path to exception message
raise type(e)(f"{self.file_path}: {e}") from e

return doc


# Map file extensions to document loaders and their arguments
LOADER_MAPPING = {
".csv": (CSVLoader, {}),
Expand All @@ -47,24 +69,6 @@
load_dotenv()


class MyElmLoader(UnstructuredEmailLoader):
"""Wrapper to fallback to text/plain when default does not work"""

def load(self) -> List[Document]:
"""Wrapper adding fallback for elm without html"""
try:
doc = UnstructuredEmailLoader.load()
except ValueError as e:
if 'text/html content not found in email' in str(e):
# Try plain text
self.unstructured_kwargs["content_source"]="text/plain"
doc = UnstructuredEmailLoader.load()
else:
raise

return doc


def load_single_document(file_path: str) -> Document:
ext = "." + file_path.rsplit(".", 1)[-1]
if ext in LOADER_MAPPING:
Expand Down

0 comments on commit 4cda348

Please sign in to comment.