### **Import required libraries**

In [None]:
import os
import shutil
import pickle

from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

from langchain_community.vectorstores import FAISS

### **Approach 1: Saving the FAISS Vector Store as a File**

#### **Load and Split Documents**

In [3]:
def load_and_split_documents(directory_path: str = "data", chunk_size: int = 500, chunk_overlap: int = 0):
    """
    Load PDF documents from a directory and split them into chunks.

    Args:
        chunk_size (int): Size of each chunk.
        chunk_overlap (int): Overlap between chunks.

    Returns:
        list: List of document chunks.
    """

    if not os.path.isdir(directory_path):
        raise ValueError(f"Directory path not found: {directory_path}")
    
    pdf_loader = DirectoryLoader(directory_path, glob="**/*.pdf", loader_cls=PyPDFLoader)
    pdf_documents = pdf_loader.load()

    chunk_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, 
                                                    chunk_overlap=chunk_overlap,
                                                    add_start_index=True,)
    document_chunks = chunk_splitter.split_documents(pdf_documents)

    return document_chunks

#### **Save to FAISS**

In [9]:
def save_to_faiss(document_chunks: list, vectorstore_filename: str = "faiss_vectorstore.pkl"):
    """
    Initialize the language model and vector store.

    Args:
        document_chunks (list): List of document chunks.
        db_path (str): path to store data in FAISS database locally.
    """

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': "cpu"})
    vectorstore_contents = FAISS.from_documents(document_chunks, embeddings)

    with open(vectorstore_filename, "wb") as f:
        pickle.dump(vectorstore_contents, f)

#### **Load FAISS vector store**

In [10]:
def load_faiss_vector_store(vectorstore_filename: str = "faiss_vectorstore.pkl"):
    """
    Load FAISS vector store.

    Args:
        db_directory_path (str): path to FAISS database.
    Returns:
        FAISS vector store
    """
    try:

        with open(vectorstore_filename, "rb") as f:
            vector_store = pickle.load(f)

        retriever=vector_store.as_retriever()

        return retriever
    
    except Exception as e:
        raise e

In [11]:
retriever = load_faiss_vector_store()

In [12]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001C2FE305300>, search_kwargs={})

### **Approach 2: Saving the FAISS Vector Store as a Directory**

#### **Save to FAISS**

In [None]:
def save_to_faiss(document_chunks: list, db_path: str = "faiss_db"):
    """
    Initialize the language model and vector store.

    Args:
        document_chunks (list): List of document chunks.
        db_path (str): path to store data in FAISS database locally.
    """
    # Clear out the existing database directory if it exists
    if os.path.exists(db_path):
        shutil.rmtre

    embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5", model_kwargs={'device': "cpu"})
    faiss_vector_database = FAISS.from_documents(document_chunks, embeddings)

    faiss_vector_database.save_local(db_path)

In [None]:
chunks = load_and_split_documents()
save_to_faiss(chunks)

#### **Load FAISS Vector Store**

In [13]:
def load_faiss_vector_store(db_directory_path: str = "faiss_db"):
    """
    Load FAISS vector store.

    Args:
        db_directory_path (str): path to FAISS database.
    Returns:
        FAISS vector store
    """
    try:
        embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5", model_kwargs={'device': "cpu"})
        vector_store = FAISS.load_local(db_directory_path, embeddings, allow_dangerous_deserialization=True)

        retriever=vector_store.as_retriever()

        return retriever
    
    except Exception as e:
        raise e

In [14]:
retriever = load_faiss_vector_store()
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001C2CAC96020>, search_kwargs={})