## CSV Processing

In [1]:
import pandas as pd
import os
from langchain_community.document_loaders import (
    CSVLoader, 
    UnstructuredCSVLoader
)
from typing import List, Any
from langchain_core.documents import Document

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
## Method 1- Using csvloader
csv_loader = CSVLoader(
    file_path="data/structured_files/products.csv",
    encoding='utf-8',
    csv_args={
        'delimiter': ',',
        'quotechar': '"'
    }
)
csv_docs = csv_loader.load()
print(f"length: {len(csv_docs)}")
print(f"content previre: {csv_docs[0].page_content[:200]}")
print(f"metadata: {csv_docs[0].metadata}")

length: 5
content previre: Product: Laptop
Category: Electronics
Price: 999.99
Stock: 50
Description: High-performance laptop with 16GB RAM and 512GB SSD
metadata: {'source': 'data/structured_files/products.csv', 'row': 0}


In [3]:
## Method 2- Custom csv

def process_CSV(filepath:str) -> List[Document]:
    df = pd.read_csv(filepath)
    documents = []

    for index, row in df.iterrows():
        content = f"""
        Name: {row['Product']},
        Category: {row['Category']},
        Price: {row['Price']},
        Stock: {row['Stock']},
        Description: {row['Description']}
        """

        doc = Document(
            page_content=content,
            metadata={
                'source': filepath,
                'row_index': index,
                'product_name': row['Product'],
                'category': row['Category'],
                'rpice': row['Price'],
                'data_type': 'product_info'
            }
        )
        documents.append(doc)
    return documents

In [4]:
process_CSV(filepath="data/structured_files/products.csv")

[Document(metadata={'source': 'data/structured_files/products.csv', 'row_index': 0, 'product_name': 'Laptop', 'category': 'Electronics', 'rpice': 999.99, 'data_type': 'product_info'}, page_content='\n        Name: Laptop,\n        Category: Electronics,\n        Price: 999.99,\n        Stock: 50,\n        Description: High-performance laptop with 16GB RAM and 512GB SSD\n        '),
 Document(metadata={'source': 'data/structured_files/products.csv', 'row_index': 1, 'product_name': 'Mouse', 'category': 'Accessories', 'rpice': 29.99, 'data_type': 'product_info'}, page_content='\n        Name: Mouse,\n        Category: Accessories,\n        Price: 29.99,\n        Stock: 200,\n        Description: Wireless optical mouse with ergonomic design\n        '),
 Document(metadata={'source': 'data/structured_files/products.csv', 'row_index': 2, 'product_name': 'Keyboard', 'category': 'Accessories', 'rpice': 79.99, 'data_type': 'product_info'}, page_content='\n        Name: Keyboard,\n        Catego