# LangChain Data Loader

```shell
pip install langchain-community beautifulsoup4
```

In [1]:
from langchain_openai import ChatOpenAI
from langchain_ollama.chat_models import ChatOllama
from os import getenv
from dotenv import load_dotenv

load_dotenv()

llm = ChatOpenAI(
    openai_api_key=getenv("OPENROUTER_API_KEY"),
    openai_api_base="https://openrouter.ai/api/v1",
    model="meta-llama/llama-3.2-3b-instruct:free",
    temperature=0.5,
    # max_tokens=512,

)
# llm = ChatOllama(model='qwen2.5:0.5b', temperature=0.5, max_tokens=512)

result = llm.invoke("Hello, how are you today?")
print(result.content)

I'm just a language model, so I don't have emotions or feelings like humans do, but I'm functioning properly and ready to help you with any questions or tasks you have. How can I assist you today?


# CSV Data Loader



In [8]:
from langchain.document_loaders import CSVLoader
# https://support.staffbase.com/hc/en-us/article_attachments/360009197031/username.csv
loader = CSVLoader('data/sample.csv')

data = loader.load()

print(type(data))

print(data[0])

print(data[1].page_content)

<class 'list'>
page_content='Username; Identifier;First name;Last name: booker12;9012;Rachel;Booker' metadata={'source': 'data/sample.csv', 'row': 0}
Username; Identifier;First name;Last name: grey07;2070;Laura;Grey


# Web Loaders
## HTML Data Loader
``` shell
pip install beautifulsoup4
```

In [14]:
from langchain.document_loaders import BSHTMLLoader
from pprint import pp, pprint
# http://help.websiteos.com/websiteos/example_of_a_simple_html_page.htm

loader = BSHTMLLoader('data/sample.html')

data = loader.load()

print(type(data))

print(data)

print(data[0].page_content)

<class 'list'>
[Document(metadata={'source': 'data/sample.html', 'title': 'HTML Element Selection Example'}, page_content='\n\n\n\nHTML Element Selection Example\n\n\n\n\nHTML Element Selection Demo\n\n\nSection 1\nSection 2\nSection 3\n\n\n\n\n\nSection 1\nThis is a highlighted paragraph.\nClick Me\n\n\nSection 2\n\nItem 1\nItem 2\nItem 3\n\n\nSubmit\n\n\nSection 3\n\nBox 1\nBox 2\nBox 3\n\n\n\n\nCreated by Your Name. Follow me on Twitter.\n\n\n')]




HTML Element Selection Example




HTML Element Selection Demo


Section 1
Section 2
Section 3





Section 1
This is a highlighted paragraph.
Click Me


Section 2

Item 1
Item 2
Item 3


Submit


Section 3

Box 1
Box 2
Box 3




Created by Your Name. Follow me on Twitter.





In [45]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(['https://python.langchain.com/docs', 'https://docs.python.org/3.9'])

docs = loader.load()

print(type(docs))

print(docs[0].metadata)
print(docs[1].metadata)
print(docs[0].page_content)

<class 'list'>
{'source': 'https://python.langchain.com/docs', 'title': 'Introduction | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain', 'description': 'LangChain is a framework for developing applications powered by large language models (LLMs).', 'language': 'en'}
{'source': 'https://docs.python.org/3.9', 'title': '3.9.20 Documentation', 'language': 'No language found.'}





Introduction | ü¶úÔ∏èüîó LangChain






Skip to main contentIntegrationsAPI ReferenceMoreContributingPeopleError referenceLangSmithLangGraphLangChain HubLangChain JS/TSv0.3v0.3v0.2v0.1üí¨SearchIntroductionTutorialsBuild a Question Answering application over a Graph DatabaseTutorialsBuild a Simple LLM ApplicationBuild a Query Analysis SystemBuild a ChatbotConversational RAGBuild an Extraction ChainBuild an AgentTaggingdata_generationBuild a Local RAG ApplicationBuild a PDF ingestion and Question/Answering systemBuild a Retrieval Augmented Generation (RAG) AppVector stores and retrieversBuild a Question/Answering system over

## RecursiveUrlLoader Data Loader


In [39]:
from langchain_community.document_loaders import RecursiveUrlLoader
loader = RecursiveUrlLoader(
    "https://docs.python.org/3.9/",
    # max_depth=2,
    # use_async=False,
    # extractor=None,
    # metadata_extractor=None,
    # exclude_dirs=(),
    # timeout=10,
    # check_response_status=True,
    # continue_on_failure=True,
    # prevent_outside=True,
    # base_url=None,
    # ...
)

docs = loader.load()
print(docs[0].metadata)
print(print(docs[0].page_content[:300]))


  k = self.parse_starttag(i)


{'source': 'https://docs.python.org/3.9/', 'content_type': 'text/html', 'title': '3.9.20 Documentation', 'language': None}

<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <meta charset="utf-8" /><title>3.9.20 Documentation</title><meta name="viewport" content="width=device-width, initial-scale=1.0">
    
    <link rel="stylesheet" href="_static/pydoctheme.css" type="text/css" />
    <link rel=
None


# PDF Data Loader
## PyPDF Data Loader
``` shell
pip install pypdf
```

In [36]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader('data/constitution.pdf')

data = loader.load()


print(type(data))

# print(data)


print(data[0].metadata)
print(data[1].page_content[:200])



<class 'list'>
{'source': 'data/constitution.pdf', 'page': 0}
C O N S T I T U T I O N O F T H E U N I T E D S T A T E S   
 
 
 
We the People of the United States, in Order to form a 
more perfect Union, establish Justice, insure domestic 
Tranquility, provide 


In [None]:
pages = []
for doc in loader.lazy_load():
    pages.append(doc)
    if len(pages) >= 10:
        # do some paged operation, e.g.
        # index.upsert(page)

        pages = []


print(pages[0].metadata)


{'source': 'data/constitution.pdf', 'page': 10}
C O


C O N S T I T U T I O N O F T H E U N I T E D S T A T E S  Amendment   VI. 
In all criminal prosecutions, the accused shall enjoy the  
right to a speedy and public  trial, by an impartial jury of 
th


## PyPDFDirectoryLoader Data Loader



In [46]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

loader = PyPDFDirectoryLoader('data/')


data = loader.load()

print(type(data))


print(data[0].metadata)


print(data[1].page_content[:200])



<class 'list'>
{'source': 'data/constitution.pdf', 'page': 0}
C O N S T I T U T I O N O F T H E U N I T E D S T A T E S   
 
 
 
We the People of the United States, in Order to form a 
more perfect Union, establish Justice, insure domestic 
Tranquility, provide 
