In [None]:
!pip install langchain_community &> /dev/null
!pip install -U bitsandbytes &> /dev/null
!pip install -U accelerate &> /dev/null
!pip install pypdf &> /dev/null
!pip install sentence-transformers &> /dev/null
!pip install faiss-gpu &> /dev/null

# Import necessary libraries
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
import transformers

# Login huggingface
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
def load_model(model_name):

  quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                          bnb_4bit_compute_dtype=torch.bfloat16,
                                          bnb_4bit_use_double_quant=True,
                                          bnb_4bit_quant_type= "nf4"
                                          )

  quantized_model = AutoModelForCausalLM.from_pretrained(
                    model_name,
                    device_map="auto",
                    torch_dtype=torch.bfloat16,
                    quantization_config=quantization_config
                    )

  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.pad_token = tokenizer.eos_token
  return quantized_model, tokenizer

# Load the model (assuming you've already loaded it as shown in your code)
MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
quantized_model, tokenizer = load_model(MODEL_NAME)
quantized_model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps

In [None]:
# Load and preprocess PDF documents
def load_and_split_documents(pdf_paths):
    documents = []
    for path in pdf_paths:
        loader = PyPDFLoader(path)
        documents.extend(loader.load())

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    split_docs = text_splitter.split_documents(documents)
    return split_docs

# Create embeddings and vector store
def create_vector_store(split_docs):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.from_documents(split_docs, embeddings)
    return vector_store

# Set up the language model pipeline
def setup_llm_pipeline(model, tokenizer):
    pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.95,
        repetition_penalty=1.15
    )
    return HuggingFacePipeline(pipeline=pipeline)

# Create the RAG chain
def create_rag_chain(llm, vector_store):
    prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

    {context}

    Question: {question}
    Answer:"""

    PROMPT = PromptTemplate(
        template=prompt_template, input_variables=["context", "question"]
    )

    chain_type_kwargs = {"prompt": PROMPT}
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
        chain_type_kwargs=chain_type_kwargs,
        return_source_documents=True
    )
    return qa_chain

# Main function to set up and use the RAG system
def setup_rag_system(pdf_paths, model, tokenizer):
    split_docs = load_and_split_documents(pdf_paths)
    vector_store = create_vector_store(split_docs)
    llm = setup_llm_pipeline(model, tokenizer)
    qa_chain = create_rag_chain(llm, vector_store)
    return qa_chain


In [None]:
def ask_question_without_rag(model, tokenizer, question):
    prompt = f"Answer the following question: {question}"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=200)
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    return response

def ask_question(rag_system, question):
    result = rag_system({"query": question})
    return result["result"], result["source_documents"]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Fact Checking: Example 1

In [None]:
# Example usage without RAG
question = "New York State banned the import of puma, spotted cat, and crocodilian hides with which act"
answer = ask_question_without_rag(quantized_model, tokenizer, question)
print(f"Answer: {answer}")

Answer: Answer the following question: New York State banned the import of puma, spotted cat, and crocodilian hides with which act in the year?
The correct answer is: The Wildlife Conservation Act of 1997. The Wildlife Conservation Act of 1997, which was enacted in New York State, banned the importation of hides of pumas, spotted cats, and crocodilians. This act was aimed at conserving wildlife and preventing the exploitation of endangered species. The act also prohibited the sale and distribution of these hides in the state. The Wildlife Conservation Act of 1997 was a significant step towards protecting wildlife and promoting conservation efforts in New York State.... Read More
New York State banned the import of puma, spotted cat, and crocodilian hides with which act in the year?
The correct answer is: The Wildlife Conservation Act of 1997. The Wildlife Conservation Act of 1997, which was enacted in New York State, banned the importation of hides of pumas, spotted cats, and crocodili

In [None]:
# Example usage with RAG
pdf_paths = [
    "/content/drive/My Drive/endangered.pdf"
    ]
rag_system = setup_rag_system(pdf_paths, quantized_model, tokenizer)

question = "New York State banned the import of puma, spotted cat, and crocodilian hides with which act"
answer, sources = ask_question(rag_system, question)
answer = answer.split('Answer')[-1]
print(f"Answer: {answer}")

print("\nSources:")
for i, doc in enumerate(sources):
    print(f"Source {i+1}: {doc.metadata['source']} (page {doc.metadata['page']})")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Answer: : The Manson Smith Act (1970).

Sources:
Source 1: /content/drive/My Drive/endangered.pdf (page 6)
Source 2: /content/drive/My Drive/endangered.pdf (page 6)
Source 3: /content/drive/My Drive/endangered.pdf (page 7)


#Fact checking: Example 2

In [None]:
query = "The fastest growing nail is on which finger?"

# Without rag
answer = ask_question_without_rag(quantized_model, tokenizer, query)
print(f"Answer: {answer}")

Answer: Answer the following question: The fastest growing nail is on which finger? The answer is: The pinky! (That's because the pinky is the smallest and fastest-growing nail on the human body.)... More
Answer the following question: The fastest growing nail is on which finger? The answer is: The pinky! (That's because the pinky is the smallest and fastest-growing nail on the human body.)... More
Answer the following question: The fastest growing nail is on which finger? The answer is: The pinky! (That's because the pinky is the smallest and fastest-growing nail on the human body.)... More
Answer the following question: The fastest growing nail is on which finger? The answer is: The pinky! (That's because the pinky is the smallest and fastest-growing nail on the human body.)... More
Answer the following question: The fastest growing nail is on which finger? The answer is: The pinky! (That's because the pinky is the smallest and fastest-growing nail on the


In [None]:
# Example usage with RAG
pdf_paths = [
    "/content/drive/My Drive/facts_body.pdf"
    ]
rag_system = setup_rag_system(pdf_paths, quantized_model, tokenizer)

question = "The fastest growing nail is on which finger?"
answer, sources = ask_question(rag_system, question)
answer = answer.split('Answer')[-1]

print(f"Answer: {answer}")
print("\nSources:")
for i, doc in enumerate(sources):
    print(f"Source {i+1}: {doc.metadata['source']} (page {doc.metadata['page']})")



Answer: : According to the text, the fastest growing nail is on the middle finger.

Sources:
Source 1: /content/drive/My Drive/facts_body.pdf (page 1)
Source 2: /content/drive/My Drive/facts_body.pdf (page 2)
Source 3: /content/drive/My Drive/facts_body.pdf (page 5)


#Fact checking: Example 3

In [None]:
query = "Initial curtailment of botanical remedies began in the mid-1930s with the introduction of"

# Without rag
answer = ask_question_without_rag(quantized_model, tokenizer, query)
print(f"Answer: {answer}")

Answer: Answer the following question: Initial curtailment of botanical remedies began in the mid-1930s with the introduction of synthetic medicines. This marked the beginning of the decline of traditional medicine in the West. What were the primary factors that contributed to this decline?

I. Introduction
The use of botanical remedies has been a cornerstone of traditional medicine for centuries. However, with the advent of synthetic medicines in the mid-1930s, the use of botanical remedies began to decline. This decline was a result of various factors, including the development of synthetic alternatives, changes in societal values, and the rise of modern medicine.

II. Synthetic Medicines
The introduction of synthetic medicines was a major factor in the decline of botanical remedies. Synthetic medicines were seen as more effective and efficient than botanical remedies, as they were mass-produced and had a longer shelf life. This led to a shift away from the use of botanical remedies,

In [None]:
# Example usage with RAG
pdf_paths = [
    "/content/drive/My Drive/herb.pdf"
    ]
rag_system = setup_rag_system(pdf_paths, quantized_model, tokenizer)

question = "Initial curtailment of botanical remedies began in the mid-1930s with the introduction o"
answer, sources = ask_question(rag_system, question)
answer = answer.split('Answer')[-1]

print(f"Answer: {answer}")
print("\nSources:")
for i, doc in enumerate(sources):
    print(f"Source {i+1}: {doc.metadata['source']} (page {doc.metadata['page']})")



Answer: : Sulfanilamide.

Sources:
Source 1: /content/drive/My Drive/herb.pdf (page 0)
Source 2: /content/drive/My Drive/herb.pdf (page 1)
Source 3: /content/drive/My Drive/herb.pdf (page 5)


#Self consistency: Example 1

In [None]:
query = "Is light a particle or a wave?"

# Without rag
answer = ask_question_without_rag(quantized_model, tokenizer, query)
print(f"Answer: {answer}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Answer: Answer the following question: Is light a particle or a wave? Explain your answer and provide evidence to support it.

Light is a wave. This is a widely accepted scientific fact, supported by a large body of evidence from various fields of physics, including optics, electromagnetism, and quantum mechanics.

One of the key pieces of evidence for light being a wave is the way it behaves when passing through a prism. When white light passes through a prism, it is refracted, or bent, and separated into its individual colors. This is because each color of light has a different wavelength and is refracted at a slightly different angle. This phenomenon is known as dispersion, and it is a characteristic of waves.

Another piece of evidence for light being a wave is the way it interferes with itself. When two light waves overlap, they can either add together (constructive interference) or cancel each other out (destructive interference). This is similar to the way that water waves or so

In [None]:
query = "Is light a particle or a wave?"

# Without rag
answer = ask_question_without_rag(quantized_model, tokenizer, query)
print(f"Answer: {answer}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Answer: Answer the following question: Is light a particle or a wave? The answer is... it depends! Light exhibits both wave-like and particle-like behavior, depending on how it is observed. This property is known as wave-particle duality. In some experiments, light behaves like a wave, while in others, it behaves like a particle. This duality is a fundamental aspect of quantum mechanics and has been experimentally confirmed numerous times.

Here are some examples of light behaving like a wave:

1. **Diffraction**: When light passes through a narrow slit or around a corner, it bends and spreads out, just like a wave. This is known as diffraction.
2. **Interference**: When two light waves overlap, they can either add together (constructive interference) or cancel each other out (destructive interference). This is similar to how water waves or sound waves behave.
3. **Superposition**: Light can exist in multiple states simultaneously, just like a wave can have multiple frequencies or ampl

In [None]:
query = "Is light a particle or a wave?"

# Without rag
answer = ask_question_without_rag(quantized_model, tokenizer, query)
print(f"Answer: {answer}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Answer: Answer the following question: Is light a particle or a wave? (Note: This question is often considered to be a classic example of a false dichotomy, as light can exhibit both wave-like and particle-like properties depending on the context and the experimental setup. Nevertheless, it is still a useful question to consider the implications of light being either a wave or a particle.)

If light is a wave, it would be a type of electromagnetic radiation, such as radio waves, microwaves, or X-rays. In this case, light would be a disturbance that propagates through a medium, such as air or a vacuum, and would exhibit properties such as amplitude, frequency, and wavelength. Light waves would also be capable of interference, diffraction, and refraction, which are all characteristic of wave behavior.

On the other hand, if light is a particle, it would be a type of photon, which is a massless particle that carries energy and momentum. In this case, light would be a stream of particles t

In [None]:
# Example usage with RAG
pdf_paths = [
    "/content/drive/My Drive/light.pdf"
    ]
rag_system = setup_rag_system(pdf_paths, quantized_model, tokenizer)

question = "Is light a particle or a wave?"
answer, sources = ask_question(rag_system, question)
answer = answer.split('Answer')[-1]

print(f"Answer: {answer}")
print("\nSources:")
for i, doc in enumerate(sources):
    print(f"Source {i+1}: {doc.metadata['source']} (page {doc.metadata['page']})")



Answer: : Light exhibits both wave-like and particle-like behavior depending on the experiment being performed. The text mentions various phenomena such as interference, diffraction, and polarization that demonstrate the wave nature of light, while others like the photoelectric effect, Compton effect, and Raman effect require a particle-like understanding of light. According to Bohr's complementary principle, these two descriptions are mutually exclusive, and according to Heisenberg's uncertainty principle, it is impossible to observe both aspects simultaneously.

Sources:
Source 1: /content/drive/My Drive/light.pdf (page 2)
Source 2: /content/drive/My Drive/light.pdf (page 0)
Source 3: /content/drive/My Drive/light.pdf (page 12)


#Self consistency: Example 2

In [None]:
query = "Who won the Best Actor Oscar in 2010?"

# Without rag
answer = ask_question_without_rag(quantized_model, tokenizer, query)
print(f"Answer: {answer}")

Answer: Answer the following question: Who won the Best Actor Oscar in 2010? A) Jeff Bridges B) Sean Penn C) Morgan Freeman D) Daniel Day-Lewis
The correct answer is A) Jeff Bridges. Jeff Bridges won the Best Actor Oscar in 2010 for his role in the film "Crazy Heart". The film is a drama about a faded country music star who gets a second chance at fame. Bridges' performance in the film earned him widespread critical acclaim and numerous awards, including the Academy Award for Best Actor. Bridges received the award at the 82nd Academy Awards, which took place on March 7, 2010, at the Kodak Theatre in Hollywood, California. The ceremony was hosted by Steve Martin and Alec Baldwin. Bridges' win marked his first Oscar victory, and he delivered a heartfelt acceptance speech, thanking his family and the film's cast and crew. The film also earned a Golden Globe nomination for Best Actor in a Motion Picture - Drama, which went to Jeff Bridges. The film's success can be attributed to the powerf

In [None]:
query = "Who won the Best Actor Oscar in 2010?"

# Without rag
answer = ask_question_without_rag(quantized_model, tokenizer, query)
print(f"Answer: {answer}")

Answer: Answer the following question: Who won the Best Actor Oscar in 2010? (A) Jeff Bridges, (B) Colin Firth, (C) Jeremy Renner, (D) Javier Bardem.
The correct answer is B) Colin Firth. He won the Best Actor Oscar for his role as King George VI in "The King's Speech" at the 83rd Academy Awards held on February 27, 2011. The movie was released in 2010, so it's possible that you're thinking of it. Jeff Bridges won the Best Actor Oscar in 2010 for his role in "Crazy Heart", not Jeremy Renner or Javier Bardem. Jeremy Renner was nominated for Best Actor for his role in "The Hurt Locker" in 2010, but did not win. Javier Bardem was also nominated for Best Actor for his role in "Biutiful" in 2010, but did not win. The Best Actor Oscar in 2010 went to Jeff Bridges. I hope this clears up any confusion. Let


In [None]:
# Example usage with RAG
pdf_paths = [
    "/content/drive/My Drive/oscar.pdf"
    ]
rag_system = setup_rag_system(pdf_paths, quantized_model, tokenizer)

question = "Who won the Best Actor Oscar in 2010?"
answer, sources = ask_question(rag_system, question)
answer = answer.split('Answer')[-1]

print(f"Answer: {answer}")
print("\nSources:")
for i, doc in enumerate(sources):
    print(f"Source {i+1}: {doc.metadata['source']} (page {doc.metadata['page']})")



Answer: : Jeff Bridges. He won for his role as Bad Blake in Crazy Heart.

Sources:
Source 1: /content/drive/My Drive/oscar.pdf (page 0)
Source 2: /content/drive/My Drive/oscar.pdf (page 3)
Source 3: /content/drive/My Drive/oscar.pdf (page 5)


#Self consistecny: Example 3

In [None]:
query = "Who was responsible for the execution of Joan of Arc?"

# Without rag
answer = ask_question_without_rag(quantized_model, tokenizer, query)
print(f"Answer: {answer}")

Answer: Answer the following question: Who was responsible for the execution of Joan of Arc? (Hint: It was not the English.)
Joan of Arc was executed by the French themselves, specifically by the court-martial that was convened to try her. The court was composed of French bishops and nobles, who were appointed by the French king, Charles VII. The trial was a sham, and Joan was denied the right to defend herself and was subjected to false testimony and coerced confessions. She was ultimately found guilty of heresy and witchcraft and was burned at the stake in the marketplace of Rouen, France, on May 30, 1431. The English, who had been her enemies and had captured her, were not responsible for her execution. They had actually been her allies in her early campaigns, but they had turned against her when she began to question their authority and when she refused to back down from her claim that she was receiving divine guidance. The French, on the other hand, were motivated by a desire to e

In [None]:
query = "Who was responsible for the execution of Joan of Arc?"

# Without rag
answer = ask_question_without_rag(quantized_model, tokenizer, query)
print(f"Answer: {answer}")

Answer: Answer the following question: Who was responsible for the execution of Joan of Arc? (a) The English, (b) The French, (c) The Church, or (d) The King of France.

The correct answer is (a) The English. Joan of Arc was captured by the English in 1430 and put on trial for heresy and witchcraft. She was found guilty and executed by burning at the stake in 1431.

The English, who had been fighting against the French in the Hundred Years' War, saw Joan as a threat to their interests and sought to eliminate her. The French, who had once hailed Joan as a hero, failed to come to her aid and ultimately allowed the English to execute her.

The Church, which had initially supported Joan, eventually turned against her and played a role in her trial and execution. The King of France, Charles VII, who had once sought Joan's help in his war against the English, failed to intervene on her behalf and allowed her to be executed.

Overall, the execution of Joan of Arc


In [None]:
# Example usage with RAG
pdf_paths = [
    "/content/drive/My Drive/arc.pdf"
    ]
rag_system = setup_rag_system(pdf_paths, quantized_model, tokenizer)

question = "Who was responsible for the execution of Joan of Arc?"
answer, sources = ask_question(rag_system, question)
answer = answer.split('Answer')[-1]

print(f"Answer: {answer}")
print("\nSources:")
for i, doc in enumerate(sources):
    print(f"Source {i+1}: {doc.metadata['source']} (page {doc.metadata['page']})")



Answer: : Pierre Cauchon, a bishop and inquisitor, conducted Joan of Arc's trial and ensured her condemnation and execution. However, it is important to note that he did attempt to find a way to spare her life, suggesting that while he was heavily influenced by politics and may have been motivated by a desire to curry favor with the English, he also had some degree of personal responsibility for her ultimate fate. It is worth noting that there is ongoing debate among scholars about the extent to which Cauchon's actions were driven by purely personal or professional considerations versus external pressures and influences. Nonetheless, it is clear that Cauchon played a significant role in ensuring Joan of Arc's execution. It is also worth noting that the trial itself was highly controversial and many historians view it as fundamentally unfair and biased against Joan.

Sources:
Source 1: /content/drive/My Drive/arc.pdf (page 0)
Source 2: /content/drive/My Drive/arc.pdf (page 2)
Source 3: 