## CSV

https://python.langchain.com/v0.1/docs/modules/data_connection/document_loaders/csv/


In [1]:
import sys
sys.path.append('../../')

from document_loaders.load_document import load_document, DocumentLoader

### Using load_document

In [2]:
docs = load_document('./files/mlb_teams_2012.csv', text_splitter="auto")

print(len(docs))

docs[0]

30


Document(metadata={'source': './files/mlb_teams_2012.csv', 'row': 0}, page_content='Team: Nationals\n"Payroll (millions)": 81.34\n"Wins": 98')

#### Customizing the CSV parsing and loading

See the csv module documentation for more information of what csv args are supported:
https://docs.python.org/3/library/csv.html

In [3]:
docs = load_document(
            './files/mlb_teams_2012.csv',
            csv_args={
                'delimiter': ',',
                'quotechar': '"',
                'fieldnames': ['MLB Team', 'Payroll in millions', 'Wins']
            }
        )

print(len(docs))

docs[0]

31


Document(metadata={'source': './files/mlb_teams_2012.csv', 'row': 0}, page_content='MLB Team: Team\nPayroll in millions: "Payroll (millions)"\nWins: "Wins"')

#### Specify a column to identify the document source

In [4]:
docs = load_document('./files/mlb_teams_2012.csv', source_column="Team")

print(len(docs))

docs[0]

30


Document(metadata={'source': 'Nationals', 'row': 0}, page_content='Team: Nationals\n"Payroll (millions)": 81.34\n"Wins": 98')

### Using DocumentLoader

In [5]:
loader = DocumentLoader('./files/mlb_teams_2012.csv', text_splitter="auto")

In [6]:
count = 0
for doc in loader.lazy_load():
    count += 1
print(count)

doc

30


Document(metadata={'source': './files/mlb_teams_2012.csv', 'row': 29}, page_content='Team: Astros\n"Payroll (millions)": 60.65\n"Wins": 55')

#### Customizing the CSV parsing and loading¶
See the csv module documentation for more information of what csv args are supported: https://docs.python.org/3/library/csv.html

In [7]:
loader = DocumentLoader(
            './files/mlb_teams_2012.csv',
            csv_args={
                'delimiter': ',',
                'quotechar': '"',
                'fieldnames': ['MLB Team', 'Payroll in millions', 'Wins']
            },
        )

In [8]:
count = 0
for doc in loader.lazy_load():
    count += 1
print(count)

doc

30


Document(metadata={'source': './files/mlb_teams_2012.csv', 'row': 29}, page_content='Team: Astros\n"Payroll (millions)": 60.65\n"Wins": 55')

#### Specify a column to identify the document source

In [9]:
loader = DocumentLoader('./files/mlb_teams_2012.csv', source_column="Team")

In [10]:
count = 0
for doc in loader.lazy_load():
    count += 1
print(count)

doc

30


Document(metadata={'source': './files/mlb_teams_2012.csv', 'row': 29}, page_content='Team: Astros\n"Payroll (millions)": 60.65\n"Wins": 55')