## First Document (ml.pdf)

In [1]:
from langchain_community.document_loaders import PDFMinerLoader


pdf_loader = PDFMinerLoader("./data/ml.pdf")
documents = pdf_loader.load()

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
chunked_documents = text_splitter.split_documents(documents)

len(chunked_documents)

2460

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS



embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [4]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from uuid import uuid4

index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

uuids = [str(uuid4()) for _ in range(len(chunked_documents))]
vector_store.add_documents(documents=chunked_documents, ids=uuids)
vector_store.save_local("faiss_index1")

## Second Document (ml2.pdf)

In [5]:
from langchain_community.document_loaders import PDFMinerLoader


pdf_loader2 = PDFMinerLoader("./data/xai.pdf")
documents2 = pdf_loader2.load()

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter2 = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
chunked_documents2 = text_splitter2.split_documents(documents2)

len(chunked_documents2)

704

In [7]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS



embeddings2 = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [8]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from uuid import uuid4

index2 = faiss.IndexFlatL2(len(embeddings2.embed_query("hello world")))

vector_store2 = FAISS(
    embedding_function=embeddings2,
    index=index2,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

uuids2 = [str(uuid4()) for _ in range(len(chunked_documents2))]
vector_store2.add_documents(documents=chunked_documents2, ids=uuids2)
vector_store2.save_local("faiss_index2")

## Generation Phase

In [9]:
vector_store = FAISS.load_local("faiss_index1", embeddings, allow_dangerous_deserialization=True)
vector_store2 = FAISS.load_local("faiss_index2", embeddings2, allow_dangerous_deserialization=True)

In [10]:
question = "How to add explainability to convolution and attention layers?"
results = vector_store.similarity_search(
    question,
    k=400
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

print("="*50)

results2 = vector_store2.similarity_search(
    question,
    k=400
)
for res in results2:
    print(f"* {res.page_content} [{res.metadata}]")

* In  some  applications,  explainability  is  not  just  a  tool  to  debug  a  model;  it  can  be  a
legal  requirement  (think  of  a  system  deciding  whether  or  not  it  should  grant  you  a
loan).

18 This is a part of figure 3 from the paper. It is reproduced with the kind authorization of the authors.

19 Marco Tulio Ribeiro et al., “‘Why Should I Trust You?’: Explaining the Predictions of Any Classifier,” Proceed‐
ings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (2016):
1135–1144.

Attention Mechanisms 

| 

553

Attention  mechanisms  are  so  powerful  that  you  can  actually  build  state-of-the-art
models using only attention mechanisms. [{'source': './data/ml.pdf', 'start_index': 1201072}]
* Explainability
One extra benefit of attention mechanisms is that they make it easier to understand
what led the model to produce its output. This is called explainability. It can be espe‐
cially useful when the model makes a mistake: f

In [210]:
results = "\n".join(map(lambda i: i.page_content, results))
results2 = "\n".join(map(lambda i: i.page_content, results2))

In [None]:
from langchain_huggingface import HuggingFacePipeline
from transformers import pipeline


model_id = "gpt2"
hf_pipeline = pipeline("text-generation", model=model_id, max_new_tokens=1000000)

hf = HuggingFacePipeline(pipeline=hf_pipeline)


Device set to use cpu


In [212]:
import json

template = f"""SYSTEM: 
{results}
RAG 2 Answers 2:
{results2}

USER: {question}

ASSISTANT:
"""

result = hf.invoke(template)

print(result['result'])
print("Source Documentsresult:", result['source_documents'])
result

## GPT‌4o Tests

In [213]:
from dotenv import load_dotenv


load_dotenv()

True

In [None]:
vector_store = FAISS.load_local("faiss_index1", embeddings, allow_dangerous_deserialization=True)
vector_store2 = FAISS.load_local("faiss_index2", embeddings2, allow_dangerous_deserialization=True)

In [None]:
question = "How to add explainability to convolution and attention layers?"
results = vector_store.similarity_search(
    question,
    k=400
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

print("="*50)

results2 = vector_store2.similarity_search(
    question,
    k=400
)
for res in results2:
    print(f"* {res.page_content} [{res.metadata}]")

In [None]:
results = "\n".join(map(lambda i: i.page_content, results2))
results2 = "\n".join(map(lambda i: i.page_content, results2))

In [214]:
from openai import OpenAI


client = OpenAI()
template = f"""
Answer the request using only the context below.
Context:
{results}

{results2}


Request: {question}
"""

completion = client.chat.completions.create(
    model="gpt-4o-mini",
    store=True,
    messages=[
        {"role": "system", "content": "Combine only Context section provided and answer based on it only, summerizer it and dont use of your knowlegde in answer."},
        {"role": "user", "content": template}
    ]
)

BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 128000 tokens. However, your messages resulted in 150850 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}

In [208]:
print(completion.choices[0].message.content)

Incorporating explainability into convolutional and attention layers can be achieved through various techniques. For convolutional layers, attention mechanisms enhance interpretability by highlighting which parts of an input image contributed most to the network's predictions. Techniques such as visual attention can be used to track what the model focuses on when making predictions, generating heat maps that indicate areas of importance.

For attention layers, models can utilize integrated gradients or saliency maps to provide insights into how input features influence the model’s output. Additionally, gradient-based methods can be employed to visualize the contribution of different input components to the final decision made by the model.

Incorporating these techniques effectively aids in understanding the decision-making process of the neural network, making it easier to identify why specific inputs lead to certain outputs. This is particularly useful in debugging models and ensurin

## Generation phase of new architecture

<img src='./preface.jpg'>

In [11]:
from dotenv import load_dotenv


load_dotenv()

True

In [12]:
vector_store = FAISS.load_local("faiss_index1", embeddings, allow_dangerous_deserialization=True)
vector_store2 = FAISS.load_local("faiss_index2", embeddings2, allow_dangerous_deserialization=True)

In [34]:
question = "How to add explainability to convolution and attention layers?"
results = vector_store.similarity_search(
    question,
    k=40
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

print("="*50)

results2 = vector_store2.similarity_search(
    question,
    k=40
)
for res in results2:
    print(f"* {res.page_content} [{res.metadata}]")

* In  some  applications,  explainability  is  not  just  a  tool  to  debug  a  model;  it  can  be  a
legal  requirement  (think  of  a  system  deciding  whether  or  not  it  should  grant  you  a
loan).

18 This is a part of figure 3 from the paper. It is reproduced with the kind authorization of the authors.

19 Marco Tulio Ribeiro et al., “‘Why Should I Trust You?’: Explaining the Predictions of Any Classifier,” Proceed‐
ings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (2016):
1135–1144.

Attention Mechanisms 

| 

553

Attention  mechanisms  are  so  powerful  that  you  can  actually  build  state-of-the-art
models using only attention mechanisms. [{'source': './data/ml.pdf', 'start_index': 1201072}]
* Explainability
One extra benefit of attention mechanisms is that they make it easier to understand
what led the model to produce its output. This is called explainability. It can be espe‐
cially useful when the model makes a mistake: f

In [40]:
from openai import OpenAI


def query(question, results, client):
    template = f"""
    Answer the request using only the context below.
    Context:
    
    {"\n".join(results)}


    Request: {question}
    """

    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        store=True,
        messages=[
            {"role": "system", "content": "Combine only Context section provided and answer based on it only, summerizer it and dont use of your knowlegde in answer."},
            {"role": "user", "content": template}
        ]
    )
    return completion.choices[0].message.content

In [41]:
import time

client = OpenAI()


def group_elements(input_list, group_size):
    return ["\n".join(map(lambda l: l.page_content, input_list[i:i + group_size])) for i in range(0, len(input_list), group_size)]


input1 = group_elements(results, 10)
input2 = group_elements(results2, 10)
output = []
for res in zip(input1, input2):
    output.append(query(question, res, client))
    time.sleep(30)

print(output)

["To add explainability to convolution and attention layers, the following strategies can be employed:\n\n1. **Attention Mechanisms**: Incorporate attention mechanisms in the model, as they enhance understanding by showing which parts of the input the model focuses on when making predictions. For example, in visual tasks, attention helps identify which areas of an image are influencing the model's output, allowing for insights into model behavior during errors.\n\n2. **Contrastive Explanations**: Utilize contrastive explanations that highlight differences between an instance and a hypothetical scenario where a different prediction might occur. This helps clarify why specific outputs were generated based on particular input features.\n\n3. **Model-Agnostic Methods**: Apply model-agnostic interpretability techniques that can provide insights into any machine learning model. These methods can help in understanding the relationships between input data and model predictions across various l

In [52]:
import time

client = OpenAI()


def group_elements(input_list, group_size):
    return ["\n".join(map(lambda l: l.page_content, input_list[i:i + group_size])) for i in range(0, len(input_list), group_size)]


input1 = group_elements(results, 10)
input2 = group_elements(results2, 10)
input1.extend(input2)
output = []
for i in range(0, len(input1), 2):
    output.append(query(question, input1[i: i + 2], client))
    time.sleep(10)

print(output)

["Explainability in models utilizing convolutional and attention layers can be enhanced through attention mechanisms. These mechanisms help in understanding model outputs by identifying which parts of the input (images or textual data) the model focused on when making predictions. For example, in a scenario where an image of a dog is incorrectly labeled as a wolf, attention mechanisms allow the user to analyze the model's focus, revealing its reasoning behind the classification.\n\nIn practice, when applying attention mechanisms, you can implement methods such as visual attention, where a convolutional neural network processes an image and provides feature maps to a decoder. The decoder, equipped with an attention mechanism, generates outputs (like captions) one word at a time while focusing on relevant parts of the input image.\n\nAdditionally, models like the Transformer architecture, which relies solely on attention mechanisms without using recurrent or convolutional layers, showcas

In [54]:
query(question, output, client)

"To enhance explainability in convolutional and attention layers, attention mechanisms can be applied. These mechanisms allow for an understanding of model outputs by identifying which parts of the input (such as images or text) the model concentrated on during predictions. For instance, if an image is misclassified, attention mechanisms can help analyze the model's focus, clarifying its reasoning.\n\nIn practice, visual attention can be incorporated, where a convolutional neural network processes an image and delivers feature maps to a decoder. The decoder, using an attention mechanism, generates outputs (like captions) sequentially while emphasizing relevant aspects of the input image.\n\nAdditionally, the Transformer architecture exemplifies the effectiveness of solely using attention mechanisms for understanding and explaining model predictions. By including attention layers, models become more interpretable, offering insights into their decision-making processes."

In [55]:
from pprint import pprint
pprint("To enhance explainability in convolutional and attention layers, attention mechanisms can be applied. These mechanisms allow for an understanding of model outputs by identifying which parts of the input (such as images or text) the model concentrated on during predictions. For instance, if an image is misclassified, attention mechanisms can help analyze the model's focus, clarifying its reasoning.\n\nIn practice, visual attention can be incorporated, where a convolutional neural network processes an image and delivers feature maps to a decoder. The decoder, using an attention mechanism, generates outputs (like captions) sequentially while emphasizing relevant aspects of the input image.\n\nAdditionally, the Transformer architecture exemplifies the effectiveness of solely using attention mechanisms for understanding and explaining model predictions. By including attention layers, models become more interpretable, offering insights into their decision-making processes.")

('To enhance explainability in convolutional and attention layers, attention '
 'mechanisms can be applied. These mechanisms allow for an understanding of '
 'model outputs by identifying which parts of the input (such as images or '
 'text) the model concentrated on during predictions. For instance, if an '
 "image is misclassified, attention mechanisms can help analyze the model's "
 'focus, clarifying its reasoning.\n'
 '\n'
 'In practice, visual attention can be incorporated, where a convolutional '
 'neural network processes an image and delivers feature maps to a decoder. '
 'The decoder, using an attention mechanism, generates outputs (like captions) '
 'sequentially while emphasizing relevant aspects of the input image.\n'
 '\n'
 'Additionally, the Transformer architecture exemplifies the effectiveness of '
 'solely using attention mechanisms for understanding and explaining model '
 'predictions. By including attention layers, models become more '
 'interpretable, offering ins