In [7]:
from src import get_azure_openai_model, get_azure_openai_chat_model, create_vector_store

model1 = get_azure_openai_model()
model2 = get_azure_openai_chat_model()
vector_store = create_vector_store()

In [2]:
# Load PDFs
from langchain_community.document_loaders import PyPDFLoader
import os

folder_path = "papers"
pdf_files = [f for f in os.listdir(folder_path) if f.endswith('.pdf')]

pages = []
for pdf_file in pdf_files:
    loader = PyPDFLoader(os.path.join(folder_path, pdf_file))
    async for page in loader.alazy_load():
        pages.append(page)

In [3]:
# Add docs to vector store
# Index chunks
_ = vector_store.add_documents(documents=pages)

# Retrival
retriever = vector_store.as_retriever()

### experiment with query construction

In [4]:
# messages
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

# output parsers
from langchain_core.output_parsers import StrOutputParser

# prompts
from langchain_core.prompts import (
    AIMessagePromptTemplate,
    ChatPromptTemplate,
    FewShotPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    PromptTemplate,
    SystemMessagePromptTemplate,
)

In [5]:

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to understand and generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_perspectives 
    | model2 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [6]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is deeplearning?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union

# Print generated queries
generated_queries_list = generate_queries.invoke({"question": question})
print("Generated Queries:")
for query in generated_queries_list:
    print(query)
    
docs = retrieval_chain.invoke({"question":question})

print(len(docs))
for doc in docs:
    print(f'Page {doc.metadata["page"]}: {doc.page_content[:300]}\n')


Generated Queries:
What does deep learning refer to in the context of artificial intelligence?  
Can you explain the concept of deep learning and its applications?  
What are the key principles and techniques involved in deep learning?  
How does deep learning differ from traditional machine learning methods?  
What are some examples of deep learning in practice today?  
10
Page 42: 21Summary
generative adversarial networks, evolutionary methods, meta-learning, and transfer
learning. Again, this is all in line with our skills-focused mode of teaching, so the par-
ticulars of these advances is not what’s important.
Summary
 Reinforcement learning is a subclass of machine learni

Page 24: 3
What is reinforcement
learning?
Computer languages of the future will be more concerned with goals and less with
procedures specified by the programmer.
—Marvin Minksy, 1970 ACM Turing Lecture
If you’re reading this book, you are probably familiar with how deep neural net-
works are used for thing

P

  return [loads(doc) for doc in unique_docs]


Everything is is fed to the model without filtering. 

In [31]:
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough

# RAG
# template = """Answer the following question based on this context:

# {context}

# Question: {question}
# """

template = """
You are a friendly assistant that helps students.
Answer the query using only the sources provided below in a friendly and concise bulleted manner.
Answer ONLY with the facts listed in the list of sources below.
If there isn't enough information below, say you don't know.
Do not generate answers that don't use the sources below.
Query: {question}
Sources:\n{context}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | model2
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'- Deep learning is a subfield of machine learning that utilizes deep neural networks to process complex data efficiently.\n- It is characterized by its ability to learn layered representations of input data, allowing for compositional representations of complexity.\n- Deep learning models are particularly effective for tasks like image classification and prediction, where traditional automated image processing was limited.\n- The "deep" in deep learning refers to the multiple layers in neural networks, which help in learning intricate patterns and features from data.\n- Deep learning has been instrumental in the success of deep reinforcement learning (DRL), which combines deep learning with reinforcement learning tasks.'

### Reranking the retrieved documents:

In [None]:
def reciprocal_rank_fusion(retrieved_docs: list[list], k: int = 70):
    """ Reciprocal Rank Fusion """
    # Get unique union of retrieved docs
    unique_docs = get_unique_union(retrieved_docs)
    
    # Initialize a dictionary to store the RRF scores
    rrf_scores = {dumps(doc): 0 for doc in unique_docs}
    
    # Calculate RRF scores
    for docs in retrieved_docs:
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str in rrf_scores:
                rrf_scores[doc_str] += 1 / (k + rank + 1)
    
    # Sort documents by their RRF scores in descending order
    sorted_docs = sorted(unique_docs, key=lambda doc: rrf_scores[dumps(doc)], reverse=True)
    
    # Return sorted documents and their scores
    return sorted_docs, [rrf_scores[dumps(doc)] for doc in sorted_docs]

In [50]:
reciprocal_rank_fusion([docs])

[Document(id='9d90014e-0d81-4e8b-bfcc-a8f36f65e2b4', metadata={'page': 27, 'source': 'papers\\Alexander Zai, Brandon Brown - Deep Reinforcement Learning in Action-Manning Publications (2020).pdf'}, page_content='6 CHAPTER 1 What is reinforcement learning?\nwhich are composed into elementary shapes, and so on, until you get the complete,\ncomplex image. This ability to handle complexity with compositional representations\nis largely what makes deep learning so powerful.\n1.2 Reinforcement learning\nIt is important to distinguish between problems and their solutions, or in other words,\nbetween the tasks we wish to solve and the algorithms we design to solve them. Deep\nlearning algorithms can be applied to many problem types and tasks. Image classifica-\ntion and prediction tasks are common applications of deep learning because auto-\nmated image processing before deep learning was very limited, given the complexity\nof images. But there are many other kinds of tasks we might wish to au

In [54]:
retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
docs


[Document(id='751dfe74-6c27-44e6-8182-5e9300db5269', metadata={'page': 357, 'source': 'papers\\Alexander Zai, Brandon Brown - Deep Reinforcement Learning in Action-Manning Publications (2020).pdf'}, page_content='336\nappendix\nMathematics,\ndeep learning, PyTorch\nThis appendix offers a rapid review of deep learning, the relevant mathematics we\nuse in this book, and how to implement deep learning models in PyTorch. We’ll\ncover these topics by demonstrating how to implement a deep learning model in\nPyTorch to classify images of handwritten digits from the famous MNIST dataset.\nDeep learning algorithms , which are also called artificial neural networks , are rela -\ntively simple mathematical functions and mostly just require an understanding of\nvectors and matrices. Training a neural network, however, requires an understand -\ning of the basics of calculus, namely the derivative. The fundamentals of applied\ndeep learning therefore require only knowing how to multiply vectors and 

In [55]:
template = """
You are a friendly assistant that helps students.
Answer the query using only the sources provided below in a friendly and concise bulleted manner.
Answer ONLY with the facts listed in the list of sources below.
If there isn't enough information below, say you don't know.
Do not generate answers that don't use the sources below.
Query: {question}
Sources:\n{context}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | model2
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'- Deep learning algorithms, also known as artificial neural networks, are relatively simple mathematical functions that primarily require an understanding of vectors and matrices.\n- Training a neural network necessitates knowledge of calculus, particularly derivatives.\n- Deep learning models are a subset of machine learning models that can classify images and handle complex data efficiently.\n- They utilize layered representations of data, allowing them to learn compositional structures, which is a key aspect of their power.\n- Deep learning is particularly effective for tasks like image classification and prediction, as it can abstract important features from raw data.'

### PLaying with llm judge

##### self rag
##### llm jude

2 types of retriver: re-ranker and llm-grader