#### Load the required functionality from the 'packages.py' and 'model.py' files.

In [1]:
from packages import * 
from model import *

  from .autonotebook import tqdm as notebook_tqdm
2024-05-22 18:00:54.635407: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Your GPU supports bfloat16: accelerate training with bf16=True


`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.15s/it]


#### Rietrieval Augmentation: Define context's input, embedding and database construction 

- The context file used to apply the RAG support to the LLM is not provided as it containts institutional data not to be shared.

In [2]:

# Define the path and file with the context information to be used by the RAG search engine
# when assisting the LLM with institute-specific information. 
# This file is not provided here because of copyright issues with the included information.
path = "/home/saltas/Project_LLM"
loader = TextLoader(path + "/Output2.txt")

# Loads the text file with the context and divides it into chunks 
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=10, add_start_index=True)
docs = text_splitter.split_documents(documents)

# Define how to construct the embedings of the context file and the data base algorithm
# I use the FAISS algorithm for the data base 
embeddings = HuggingFaceEmbeddings() 
db = FAISS.from_documents(docs, embeddings)


#### Define the LLM chain model to answer context-based questions (trained on internal wiki data, etc)
##### Notes: 
 -  The context file used to apply the RAG support to the LLM is not provided as it containts institutional data not to be shared.
 - The LLM model is defined in the 'model.py' file.
 - The temperature option can be tweaked accordingly. 
 - For the moment the user input is an one-time input, cell needs to be re-run for new input. 


In [5]:
# Define the LLM chain: model + tokenizer + options
text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
                                                )

## Define the prompt template for the LLM. This will provide to it generic instructions
## and the relevant context derived from the RAG process, to help it answer the question.
prompt_template = """
### [INST] 
Instruction: Answer the question based on context knowledge. Do not make up new information or things which are not based on context. 
Here is context:

{context}

### QUESTION:
{question} 

[/INST]
 """

# Compile eveything into a LLM pipeline. 
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)


# Create prompt from prompt template 
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)


#### Define the user input and the print out of an answer. 
##### Note: The contextual information to help the LLM is found through the database defined earlier.


In [6]:
import warnings
warnings.filterwarnings('ignore') # ignore warnings 

question = input("Please ask me a question... ") # Define user prompt 

if question: 
    
    retriever = db.as_retriever()
    docs = retriever.invoke(question)
    context = docs[0].page_content
    
    rag_chain = (
        {"context": lambda x: context, "question": RunnablePassthrough()} | llm_chain
    )
    
    blah = rag_chain.invoke(question)
    blah_answer = blah['text']
    blah_answer_only = blah_answer.split("[/INST]",1)[1]
    
    print(blah_answer_only)
else:
    print("No question? That's fine.")


Please ask me a question...  


No question? That's fine.


#### This is a different application of the LLM. One provides a pre-print arXiv number (https://arxiv.org/), the code loads the pdf article from online, and the network provides a summary of the paper.

##### Notes:
- The code has not been tested on many papers yet. It can happen that sometimes the output is erroneous. This is a matetr of fine-tuning.
- The summary is provided based on the first few pages of the article. I do not include all the article to avoid overloading the network.


In [7]:
arXiv_number = input("Provide me the arXiv number of the paper (e.g 2405.12685) and press Enter")
text_splitter = CharacterTextSplitter()
pages = load_doc('https://arxiv.org/pdf/' + str(arXiv_number))

pages_all = pages[0].page_content # all pages 
max_pages = 4 #len(pages) # max number of pages. 
# I set the max_pages to the first few to avoid overloading the model. 
# This should naturally include abstract + introduction of the paper.

for i in range(1,max_pages): # construct a big string with all the pages 
    pages_all = pages_all + pages[i].page_content
docs = pages_all
docs = doc(pages_all)

Provide me the arXiv number of the paper (e.g 2405.12685) and press Enter 2405.12685


[1mDocument loaded OK.[0m


In [9]:
#### As usual, define the prompt template.
prompt_template = """ Write a concise summary in bullet points of the following text. 
Try to ignore equations, images, and refereces/citations:

{text}

SUMMARY IN BULLET POINTS: """
#### 

summary_prompt = PromptTemplate(template=prompt_template,
                        input_variables=["text"])
#
chain = load_summarize_chain(mistral_llm, 
                             chain_type="stuff", 
                             prompt=summary_prompt)
#
output_summary = chain.run(docs) # apply the chain on the loaded document 
#
answer_only = output_summary.split("SUMMARY IN BULLET POINTS:",1)[1] # extract only the summary, not document.
#
wrap = textwrap.fill(answer_only, 
                             width=100,
                             break_long_words=False,
                             replace_whitespace=False)
# print out the result
print(bf('This is a bullet-point summary of the paper:'))
print(wrap)

[1mThis is a bullet-point summary of the paper:[0m
 

* The paper discusses the effective field theory formulation of gravity.
* It critically assesses
the common practice of using local field redefinitions in order to simplify the dynamics.
* The
paper reviews the corrections related to local effective interactions containing up to six
derivatives.
* It identifies the parameterized post-Newtonian (PPN) order at which a given higher-
derivative term starts to contribute also at the non-linear level.
* The paper is closely related to
recent efforts to construct black hole spacetimes in the framework of effective field theory.
* The
paper uses the Einstein-Hilbert action as a starting point and organizes the expansion of the
gravitational action in terms of derivatives.
* It identifies the relevant terms in the derivative
expansion of the gravitational action.
* The paper adopts a specific choice for the set of
independent interaction terms.
* The paper groups the terms according to th