In [1]:
import torch
import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.huggingface import HuggingFaceInferenceAPI  
from llama_index.core import Settings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.llms.llama_cpp.llama_utils import messages_to_prompt,completion_to_prompt
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from llama_index.core import PromptTemplate

  from .autonotebook import tqdm as notebook_tqdm


In [18]:
os.environ["HUGGINGFACEHUB_API_TOKEN"]  = "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"

In [26]:
documents = SimpleDirectoryReader("./pdfs/").load_data()

In [20]:
query_str = "I'm providing you with a research paper your job is to summarizes the information within it."

query_wrapper_prompt = PromptTemplate(
    "Your job is to summarize different sections of the document given to you."
    "Write a response that appropriately completes the request given to you.\n\n"
    "### Instruction:\n{query_str}\n\n### Response:"
)

In [21]:
llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha")


In [22]:
embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
)

In [27]:
Settings.llm = llm
Settings.node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=5,
    window_metadata_key="window",
    original_text_metadata_key="original_text").get_nodes_from_documents(documents)
Settings.text_splitter = SentenceSplitter(chunk_size=128,chunk_overlap=20)
Settings.embed_model = embed_model

In [28]:
index = VectorStoreIndex.from_documents(documents)

In [32]:
query_engine = index.as_query_engine(similarity_top_k=20,
    verbose=True,
    response_mode="tree_summarize",
    node_postprocessor=[MetadataReplacementPostProcessor("window")])
response = query_engine.query("Generate a summary about the paper like abstract, methodology and results and output them.")
print(F"Response: \n {response}")

Response: 
 

The paper presents the Inception architecture, a new approach to deep neural networks that aims to
reduce the computational cost while maintaining or improving the accuracy. The architecture is based
on the concept of inception modules, which are designed to perform computations at multiple scales
simultaneously. The paper also introduces the GoogLeNet model, which is a particular implementation
of the Inception architecture used in the ImageNet Large Scale Visual Recognition Challenge (ILSVRC)
2014. The model achieved state-of-the-art results with a computational budget of 1.5 billion multiply-
adds at inference time. The paper also discusses the use of ensemble prediction and other techniques
during testing to obtain higher performance. The results show a significant improvement in accuracy
compared to the previous edition of the detection task, with all top performing teams using Convolutional
Networks. The paper also provides a comparison with other approaches and dis