# `CSVToDocument`

In [1]:
%load_ext autoreload
%autoreload 2

## On its own

In [2]:
from haystack.components.converters.csv import CSVToDocument
from datetime import datetime

In [3]:
converter = CSVToDocument()
results = converter.run(sources=["sample.csv"], meta={"date_added": datetime.now().isoformat()})
documents = results["documents"]

In [4]:
print(documents[0].content)

Year,Industry_aggregation_NZSIOC,Industry_code_NZSIOC,Industry_name_NZSIOC,Units,Variable_code,Variable_name,Variable_category,Value,Industry_code_ANZSIC06
2023,Level 1,99999,All industries,Dollars (millions),H01,Total income,Financial performance,930995,"ANZSIC06 divisions A-S (excluding classes K6330, L6711, O7552, O760, O771, O772, S9540, S9601, S9602, and S9603)"
2023,Level 1,99999,All industries,Dollars (millions),H04,"Sales, government funding, grants and subsidies",Financial performance,821630,"ANZSIC06 divisions A-S (excluding classes K6330, L6711, O7552, O760, O771, O772, S9540, S9601, S9602, and S9603)"
2023,Level 1,99999,All industries,Dollars (millions),H05,"Interest, dividends and donations",Financial performance,84354,"ANZSIC06 divisions A-S (excluding classes K6330, L6711, O7552, O760, O771, O772, S9540, S9601, S9602, and S9603)"
2023,Level 1,99999,All industries,Dollars (millions),H07,Non-operating income,Financial performance,25010,"ANZSIC06 divisions A-S (excluding cl

## In pipeline

In [5]:
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.converters import CSVToDocument
from haystack.components.preprocessors import DocumentCleaner
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter

In [6]:
document_store = InMemoryDocumentStore()

In [7]:
pipeline = Pipeline()
pipeline.add_component("converter", CSVToDocument())
pipeline.add_component("cleaner", DocumentCleaner())
pipeline.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=5))
pipeline.add_component("writer", DocumentWriter(document_store=document_store))
pipeline.connect("converter", "cleaner")
pipeline.connect("cleaner", "splitter")
pipeline.connect("splitter", "writer")

file_names = ["sample.csv"]
pipeline.run({"converter": {"sources": file_names}})

{'writer': {'documents_written': 1}}

In [8]:
document_store.filter_documents()

[Document(id=8d6cd0ef6a7cd57e83f6e67ec44a961b43a6eb8757f89f7b47e885d8dd5cc927, content: 'Year,Industry_aggregation_NZSIOC,Industry_code_NZSIOC,Industry_name_NZSIOC,Units,Variable_code,Varia...', meta: {'file_path': 'sample.csv', 'source_id': '85482f6b1972fc1e44702312a32d6dd75c168e8fab1bb96cf08135f025f8dcf2', 'page_number': 1, 'split_id': 0, 'split_idx_start': 0})]