# `TextFileToDocument`


In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## On its own


In [7]:
from pathlib import Path
from haystack.components.converters import TextFileToDocument

converter = TextFileToDocument()

docs = converter.run(sources=[Path("sample.txt")])

In [8]:
docs

{'documents': [Document(id=0e5c05f81234ecd65fbe5caa7c5eddecec37e836c752d67c04088b89fa977887, content: 'Cuong Duong Manh', meta: {'file_path': 'sample.txt'})]}

## In Pipeline

In [9]:
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.converters import TextFileToDocument
from haystack.components.preprocessors import DocumentCleaner
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter

In [10]:
document_store = InMemoryDocumentStore()

pipeline = Pipeline()
pipeline.add_component("converter", TextFileToDocument())
pipeline.add_component("cleaner", DocumentCleaner())
pipeline.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=5))
pipeline.add_component("writer", DocumentWriter(document_store=document_store))
pipeline.connect("converter", "cleaner")
pipeline.connect("cleaner", "splitter")
pipeline.connect("splitter", "writer")


file_names = ["sample.txt"]
pipeline.run({"converter": {"sources": file_names}})

{'writer': {'documents_written': 1}}

In [11]:
for doc in document_store.filter_documents():
    print(doc.content[:100])

Cuong Duong Manh
