# Retriever and Chain in LangChain

In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

## Built Vector DB
# Load from pdf file
loader = PyPDFLoader("2D3MF.pdf")
docs = loader.load()

# Split file into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
docs_split = text_splitter.split_documents(docs)

# Embedding to Vetor Store
db = FAISS.from_documents(docs_split, OpenAIEmbeddings())


In [5]:
# Query test
query="What is 2D3MF model"
result=db.similarity_search(query)
result[0].page_content

'Fig. 2 . High-level overview of our 2D3MF with audio and video inputs fused using Self and Cross-Attention middle fusion via\ntransformer attention.\nand speaker identification. These networks extract useful fa-\ncial and speech representations, proving essential for the ef-\nfectiveness of DVD tasks.\nIn this work, we study the utility of audio-visual emo-\ntion speaker embeddings, representations extracted from pre-\ntrained audio and video networks, as robust features for the\nDVD task. To the best of our knowledge, we are the first\nto leverage abstract representations of emotions in the audio-\nvisual domain to highlight and detect inconsistencies in fake\nvideos. We propose 2D3MF (Deepfake Detection with Multi\nModal Middle Fusion), which is a novel middle fusion strat-\negy where audio and visual data are synergistically analyzed\nto capture discrepancies in emotional expressions, and vocal\ntones. These features reveal the subtle yet critical flaws inher-'

In [8]:
# Define LLM
from langchain_community.llms import Ollama

# Load Ollama llama3 model
llm = Ollama(model="llama3")

In [9]:
## Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
                                          Answer the following question based only on the provided context. 
                                          Think step by step before providing a detailed answer. 
                                          I will tip you $1000 if the user finds the answer helpful. 
                                          <context>
                                          {context}
                                          </context>
                                          Question: {input}""")

In [10]:
## Chain Introduction
## Create Stuff Document Chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm, prompt)

In [12]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000220D3618E50>)

In [13]:
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""
from langchain.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [15]:
response = retrieval_chain.invoke({"input":"What does a 2D3MF model do?"})
response["answer"]

'A well-crafted question!\n\nAfter carefully reading the provided context, I will answer your question step by step:\n\nThe text describes a novel model called 2D3MF (Deepfake Detection with Multi-Modal Middle Fusion). According to the abstract and subsequent sections, this model is designed for **deepfake detection**, which is the task of detecting videos that have been generated or manipulated using deep learning.\n\nIn more detail, the 2D3MF model uses a middle fusion strategy, fusing audio and visual data synergistically to capture discrepancies in emotional expressions and vocal tones. This fusion is achieved through Self-Attention and Cross-Attention transformer blocks.\n\nTherefore, I conclude that a 2D3MF model **detects deepfakes** by leveraging the relationship between emotions conveyed in audio and video for multi-modal deepfake detection.\n\nI hope this answer meets your expectations!'