In [None]:
## Data Ingestion from a txt file
from langchain_community.document_loaders import TextLoader
loader = TextLoader("prompt_engineering.txt")
text_documents = loader.load()

In [22]:
## Load env variables
import os 
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")

In [None]:
## Data Ingestion from a web page
from langchain_community.document_loaders import WebBaseLoader
import bs4

## load , chunk and index
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-title", "post-content", "post-header")
        )
    ),
)
web_documents = loader.load()

In [18]:
## Data Ingestion from a pdf file
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("Introduction to Machine Learning with Python ( PDFDrive.com )-min.pdf")
pdf_documents = loader.load()

In [49]:
## Split data into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 20)
documents = text_splitter.split_documents(pdf_documents)

In [None]:
## Vector embedings and Vector Stores, Model - llama3.2:1b, downloaded locally using ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

vector_db = FAISS.from_documents(documents, OllamaEmbeddings(model="llama3.2:1b"))

In [None]:
## Query your vector store db
query = "Who was the author of the book : Introduction to Machine Learning with Python?"
retireved_results = vector_db.similarity_search(query)
print(retireved_results[0].page_content)