In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.readers.file import PyMuPDFReader
from llama_index.core.node_parser import HierarchicalNodeParser

from IPython.display import Markdown, display

In [32]:
import glob
pdf_loader = PyMuPDFReader()
base_path = '../articles'

documents = []
for file in glob.glob(base_path + '/*.pdf', recursive=True):
    documents.extend(pdf_loader.load_data(file))

documents.extend(pdf_loader.load_data('../monografia/main.pdf'))

# documents = []
# documents = SimpleDirectoryReader('../articles', recursive=True).load_data()

In [33]:
Settings.embed_model = HuggingFaceEmbedding("BAAI/bge-m3")
Settings.llm = Ollama(model="llama3", request_timeout=360.0)



In [34]:
from llama_index.core.node_parser import SentenceSplitter, HierarchicalNodeParser

node_parser = HierarchicalNodeParser.from_defaults()

In [35]:
nodes = node_parser.get_nodes_from_documents(documents)

In [36]:
# define storage context
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core import StorageContext

docstore = SimpleDocumentStore()

# insert nodes into docstore
docstore.add_documents(nodes)

# define storage context (will include vector store by default too)
storage_context = StorageContext.from_defaults(docstore=docstore)

In [37]:
## Load index into vector index
from llama_index.core.node_parser import get_leaf_nodes, get_root_nodes
from llama_index.core import VectorStoreIndex

leaf_nodes = get_leaf_nodes(nodes)
root_nodes = get_root_nodes(nodes)

base_index = VectorStoreIndex(
    leaf_nodes,
    storage_context=storage_context,
)

In [38]:
from llama_index.core.retrievers import AutoMergingRetriever

base_retriever = base_index.as_retriever(similarity_top_k=6)
retriever = AutoMergingRetriever(base_retriever, storage_context, verbose=True)

In [39]:
from llama_index.core.query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine.from_args(retriever)
base_query_engine = RetrieverQueryEngine.from_args(base_retriever)

In [44]:
query_str = (
    'You are an academic reviewer and you\'ll be helping me critique a paper. '
    'Using main.pdf as the paper, please provide a critique of the paper. '
)

In [45]:
response = query_engine.query(query_str)
base_response = base_query_engine.query(query_str)

In [46]:
print(str(response))
print('---')
print(str(base_response))

Based on the provided manuscript (main.pdf), I will offer my critique.

Overall, the manuscript is well-organized and easy to follow. The authors' core contributions are evident in their comprehensive survey of large language models. One of the most notable strengths is the thoroughness with which they cover the current state of research in this field. Multiple reviewers praised the paper's clarity and coherence, making it accessible to a broad audience.

However, some common weaknesses mentioned by reviewers include the lack of depth in certain sections and the need for more nuanced analysis in others. To improve the paper, I would suggest providing more concrete examples to illustrate key concepts and expanding on the implications of the findings.

As for missing references, one reviewer noted that the authors could have incorporated additional sources to further support their claims. Another reviewer suggested exploring similar topics in other fields or disciplines to provide a more