In [1]:
import torch
import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.huggingface import HuggingFaceInferenceAPI, HuggingFaceLLM
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core import Settings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.llms.llama_cpp.llama_utils import messages_to_prompt,completion_to_prompt
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from llama_index.core import PromptTemplate

In [2]:
documents = SimpleDirectoryReader("./pdfs/").load_data()

In [3]:
query_str = "I'm providing you with a research paper your job is to summarizes the information within it."

query_wrapper_prompt = PromptTemplate(
    "Your job is to summarize different sections of the document given to you."
    "Write a response that appropriately completes the request given to you.\n\n"
    "### Instruction:\n{query_str}\n\n### Response:"
)

In [4]:
llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    #You can also use others LLMs of bigger size by using Quantization through bitsandbytes
    model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    #model_path="C:\Users\manje\AppData\Local\llama_index\models\mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    #How creative the llm can be while generating responses
    temperature=0.2, 
    max_new_tokens=256,
    context_window=4096,
    generate_kwargs={},
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": -1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from C:\Users\manje\AppData\Local\llama_index\models\mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_mo

llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...
llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
llama_model_loader: - kv  16:                tokenizer.ggml.bos_token_id u32              = 1
llama_model_loader: - kv  17:                tokenizer.ggml.eos_token_id u32              = 2
llama_model_loader: - kv  18:            tokenizer.ggml.unknown_token_id u32              = 0
llama_model_loader: - kv  19:               general.quantization_version u32              = 2
llama_model_loader: - type  f32:   65 tensors
llama_model_loader: - type q4_K:  193 tensors
llama_model_loader: - type q6_K:   33 tensors
llm_load_vocab: special tokens definition check successful ( 259/32000 ).
llm_load_print_meta: format           = GGUF V2
llm_load_print_meta: arch             = llama
llm_load_print_meta: vocab type       = SPM
llm_load

In [5]:
embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
)

In [6]:
Settings.llm = llm
Settings.node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=5,
    window_metadata_key="window",
    original_text_metadata_key="original_text").get_nodes_from_documents(documents)
Settings.text_splitter = SentenceSplitter(chunk_size=128,chunk_overlap=20)
Settings.embed_model = embed_model

In [7]:
index = VectorStoreIndex.from_documents(documents)

In [8]:
query_engine = index.as_query_engine(similarity_top_k=5,
    verbose=True,
    node_postprocessor=[MetadataReplacementPostProcessor("window")])
response = query_engine.query("Generate a summary about the abstract")
print(F"Response: \n {response}")


llama_print_timings:        load time =   45912.61 ms
llama_print_timings:      sample time =      31.14 ms /   178 runs   (    0.17 ms per token,  5716.49 tokens per second)
llama_print_timings: prompt eval time =   58500.72 ms /   700 tokens (   83.57 ms per token,    11.97 tokens per second)
llama_print_timings:        eval time =   23716.12 ms /   177 runs   (  133.99 ms per token,     7.46 tokens per second)
llama_print_timings:       total time =   82774.94 ms /   877 tokens


Response: 
  The abstract describes a study that uses reinforcement learning (RL) to predict the outcome of critically ill patients with sepsis. The study first selects a cohort of patients that meet sepsis-3 criteria and aggregates their data to form an MDP (Markov decision process) that can be learned by RL algorithms. The MDP includes state representation, action formulation, and reward design. The study uses the DDPG (Deep Deterministic Policy Gradient) algorithm with specific hyperparameters to train the model. The authors note that extrapolation errors in offline RL may be a potential issue in this study, which they discuss further in the text. The study cites previous work on the use of the sofa score to predict outcome in critically ill patients and on addressing function approximation error in actor-critic methods.


In [9]:
# response = query_engine.query("Generate a summary about the Methodology")
# print(F"Response: \n {response}")
# response = query_engine.query("Generate a summary about the Results and conclusion")
# print(F"Response: \n {response}") 

In [10]:
from PyPDF2 import PdfReader

In [11]:
import os

def extract_images_from_pdf(pdf_path, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    with open(pdf_path, "rb") as f:
        reader = PdfReader(f)
        for page_num in range(len(reader.pages)):
            selected_page = reader.pages[page_num]
            for img_file_obj in selected_page.images:
                # Construct the output file path
                output_path = os.path.join(output_folder, img_file_obj.name)
                with open(output_path, "wb") as out:
                    out.write(img_file_obj.data)


In [12]:
extract_images_from_pdf("./pdfs/93+Reinforcement_learning_for_sepsis+(2).pdf","images")

FileNotFoundError: [Errno 2] No such file or directory: 'requirements.txt'