# Import Packages

In [3]:
from uuid import uuid4
import os
from dotenv import load_dotenv

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import Qdrant
from langchain import hub
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate
from langchain.callbacks import StdOutCallbackHandler

from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams
from qdrant_client.models import PointStruct

from crewai_tools import BaseTool
from crewai import Agent, Task, Crew

In [18]:
# Load environmental variables from a .env file
load_dotenv()

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
QDRANT_API_KEY = os.getenv('QDRANT_API_KEY')
QDRANT_URL = os.getenv('QDRANT_URL')

# Load Documents

In [12]:
# Load documents

loader = PyPDFLoader("attention.pdf")

In [13]:
# Split documents

text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
document = loader.load_and_split(text_splitter)

In [14]:
document[0]

Document(page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗', metadata={'source': 'attention.pdf', 'page': 0})

In [15]:
len(document)

92

In [48]:
document[10]

Document(page_content='tion models in various tasks, allowing modeling of dependencies without regard to their distance in\nthe input or output sequences [ 2,19]. In all but a few cases [ 27], however, such attention mechanisms\nare used in conjunction with a recurrent network.\nIn this work we propose the Transformer, a model architecture eschewing recurrence and instead\nrelying entirely on an attention mechanism to draw global dependencies between input and output.', metadata={'source': 'attention.pdf', 'page': 1})

# Vector Store

In [24]:
# Create collection in vector store

client = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY)


client.create_collection(
    collection_name="attention",
    vectors_config={
        "content": VectorParams(size=1536, distance=Distance.COSINE)
        }
    )

In [25]:
# Upsert documents in vector store

embedding = OpenAIEmbeddings()

def chunked_metadata(data, client=client, collection_name="attention"):

    chunked_metadata = []

    for item in data:

      id = str(uuid4())
      content = item.page_content
      source = item.metadata["source"]
      page = item.metadata["page"]

      content_vector = embedding.embed_documents([content])[0]
      vector_dict = {"content": content_vector}

      payload = {
           "page_content": content,
           "metadata": {
                        "id": id,
                        "page_content": content,
                        "source": source,
                        "page": page,
                        }
            }


      metadata = PointStruct(id=id, vector=vector_dict, payload=payload)
      chunked_metadata.append(metadata)

    client.upsert(
                  collection_name=collection_name,
                  wait=True,
                  points=chunked_metadata)

In [26]:
chunked_metadata(document[:10])

In [27]:
client.get_collections()

CollectionsResponse(collections=[CollectionDescription(name='attention')])

In [28]:
attention_collection = client.get_collection("attention")

print(f"Points: {attention_collection.points_count} ")

Points: 90 


In [12]:
# client.delete_collection(collection_name="attention")

In [29]:
# Search most similar documents to "Positional Encoding"

client.search(
    collection_name="attention",
    query_vector=("content", embedding.embed_documents(["Positional Encoding"])[0]),
    with_payload=["page_content", "source"],
    limit=5
)

[ScoredPoint(id='511d4b4b-72ea-42e4-954a-d5fa8decc60c', version=0, score=0.77504903, payload={'page_content': 'architectures [38, 24, 15].\nRecurrent models typically factor computation along the symbol positions of the input and output\nsequences. Aligning the positions to steps in computation time, they generate a sequence of hidden\nstates ht, as a function of the previous hidden state ht−1and the input for position t. This inherently\nsequential nature precludes parallelization within training examples, which becomes critical at longer'}, vector=None, shard_key=None),
 ScoredPoint(id='0b73789c-151b-42e8-bc58-57aa00c299dd', version=1, score=0.77504903, payload={'page_content': 'architectures [38, 24, 15].\nRecurrent models typically factor computation along the symbol positions of the input and output\nsequences. Aligning the positions to steps in computation time, they generate a sequence of hidden\nstates ht, as a function of the previous hidden state ht−1and the input for positio

# Initialize Vector Store

In [30]:
# Initialize vector store

vectorstore = Qdrant(client=client,
                     collection_name="attention",
                     embeddings=embedding,
                     vector_name="content")

In [31]:
# Downlad prompt template

prompt = hub.pull("rlm/rag-prompt")

In [32]:
print(prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]


In [33]:
print(prompt.messages[0].prompt.template)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:


# Define RAG Pipeline

In [34]:
# Main parameters

template = """

You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \


Question: {question}
Context: {context}

Answer:

"""

retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_template(template)

handler =  StdOutCallbackHandler()

In [37]:
# LLM gpt-3.5-turbo

llm35 = ChatOpenAI(temperature=0.0,
                 model="gpt-3.5-turbo",
                 max_tokens=512)


# Retrieval qa chain
qa_d35 = RetrievalQA.from_chain_type(
                                    llm=llm35,
                                    chain_type="stuff",
                                    callbacks=[handler],
                                    retriever=retriever)

In [38]:
# Invoke qa_chain gpt-3.5-turbo

qa_d35.invoke({"query": "What is the attention mechanism?"})["result"]



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'The attention mechanism is a component used in sequence modeling and transduction tasks. It allows models to focus on different parts of the input sequence when making predictions, giving more weight to relevant information. This mechanism has become essential for improving the performance of models dealing with sequential data.'

In [39]:
# LLM gpt-4o

llm4o = ChatOpenAI(temperature=0.0,
                 model="gpt-4o",
                 max_tokens=512)

# Retrieval qa chain
qa_db4o = RetrievalQA.from_chain_type(
                                    llm=llm4o,
                                    chain_type="stuff",
                                    callbacks=[handler],
                                    retriever=retriever)

In [40]:
print(qa_db4o.invoke({"query": "What is the attention mechanism?"})["result"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
The attention mechanism is a technique used in sequence modeling and transduction tasks, such as machine translation, text summarization, and image captioning. It allows the model to focus on different parts of the input sequence when generating each part of the output sequence. This mechanism helps the model to selectively concentrate on relevant parts of the input, thereby improving performance and efficiency.

In essence, the attention mechanism assigns different weights to different parts of the input sequence, indicating their importance for the current step of the output sequence generation. This way, the model can dynamically adjust its focus based on the context, leading to better handling of long-range dependencies and more accurate predictions.

Attention mechanisms have become a fundamental component in many state-of-the-art models, including the Transformer architecture, which has been highly successfu

In [41]:
print(qa_db4o.invoke({"query": "Can you analyze the research paper?"})["result"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Certainly! The research paper "Attention Is All You Need" by Ashish Vaswani et al. introduces the Transformer model, which has become a foundational architecture in the field of natural language processing (NLP). Here are some key points and analysis of the paper:

### Key Contributions:
1. **Transformer Architecture**:
   - The paper introduces the Transformer model, which relies entirely on self-attention mechanisms, dispensing with recurrence and convolutions entirely.
   - This architecture allows for more parallelization during training, leading to significant speed improvements.

2. **Self-Attention Mechanism**:
   - The core innovation is the self-attention mechanism, which computes the representation of a word in a sequence by attending to all other words in the sequence.
   - This mechanism allows the model to capture long-range dependencies more effectively than RNNs or CNNs.

3. **Positional Encoding**:

# CrewAI Agents and Tools

The following steps are performed:

- Search the research paper in the vector store as tool 
- Provide a detailed report with the researcher agent
- Provide a short summary with the writer agent

In [57]:
from crewai_tools import BaseTool

In [62]:
# Create custom tool to use the vector store

class MyCustomTool(BaseTool):
    name: str = "Research Paper"
    description: str = "Research Paper"

    def _run(self, argument: str) -> str:
        return qa_db4o.run("Can you analyze the research paper?")

tool_instance = MyCustomTool()

In [63]:
# Agents and Tools
from crewai import Agent, Task, Crew

### RESEARCHER ###

# Agent
reasarcher = Agent(
    role="Computer Science Researcher",
    goal='Understanding the published research paper and extract the main insights',
    backstory="""You work at a Computer Science school.
                  A new paper has been published and you need to extract
                  the main information and provide the main insights to
                  the students.You have a knack for dissecting complex
                  research papers""",
    verbose=True,
    tools=[tool_instance],
    allow_delegation=False
    )


# Tasks
task_research = Task(
    description="""Conduct an in-depth analysis of the published research paper.
                    Identify the key components and try to understand how each works.""",
    expected_output="""Your final answer must be a full analysis report of the research paper""",
    agent=reasarcher,
    output_file='researcher_analysis.md' 

)


### WRITER ###

# Agent
writer = Agent(
    role="Computer Science Writer",
    goal= 'Craft easy to understand content for Computer Science students',
    backstory="""You are working on writing a summary of a research paper.
                  Use the content of the Computer Science Researcher to develop
                  a short comprehensive summary of the research paper""",
    verbose=True,
    allow_delegation=False,
)

# Tasks
task_writer = Task(
    description="""Extract the most important topics and key components of the report
                    developed by the Computer Science Researcher.""",
    expected_output="""Your final answer must be a short comprehensive summary 
                        to understand report for Computer Science students""",
    agent=writer,
    output_file='writer_summary.md' 

)

# CREWAI KICKOFF

Crew = Crew(
    agents=[reasarcher, writer],
    tasks=[task_research, task_writer],
    verbose=2)

result = Crew.kickoff()



[1m[95m [DEBUG]: == Working Agent: Computer Science Researcher[00m
[1m[95m [INFO]: == Starting Task: Conduct an in-depth analysis of the published research paper.
                    Identify the key components and try to understand how each works.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mFirst, I need to read the research paper thoroughly to understand its content and key points.

Action: Research Paper
Action Input: {"argument": "string"}
[0m

[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
[95m 

I can provide an analysis of the research paper "Attention Is All You Need" by Ashish Vaswani et al. Here is a brief overview and analysis:

### Overview
"Attention Is All You Need" is a seminal paper in the field of natural language processing (NLP) and machine learning, introducing the Transformer model. The paper was authored by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, and Łukasz Ka

In [64]:
print(result)

The research paper titled "Attention Is All You Need" by Ashish Vaswani and team from Google Brain, Google Research, and the University of Toronto introduces a revolutionary model known as the Transformer model. This model is entirely based on self-attention mechanisms, eliminating the need for recurrent or convolutional layers.

The researchers introduced the self-attention mechanism, a novel method that allows the model to assign weights to different words when encoding a word, enabling it to capture long-range dependencies more effectively. This approach led to more parallelization during training by removing recurrence, which resulted in faster and more efficient training processes on modern hardware.

The paper provides a detailed description of the Transformer architecture, which includes the encoder-decoder structure, multi-head self-attention, and position-wise feed-forward networks. It also explains the self-attention mechanism and the addition of positional encodings to input